diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 736869bf0..f37d8aa42 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.12.10 +[debug] youtube-dl version 2017.12.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a1fdcab99..03d2defb7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version 2017.12.14 + +Core +* [postprocessor/xattr] Clarify NO_SPACE message (#14970) +* [downloader/http] Return actual download result from real_download (#14971) + +Extractors ++ [itv] Extract more subtitles and duration +* [itv] Improve extraction (#14944) ++ [byutv] Add support for geo restricted videos +* [byutv] Fix extraction (#14966, #14967) ++ [bbccouk] Fix extraction for 320k HLS streams ++ [toutv] Add support for special video URLs (#14179) +* [discovery] Fix free videos extraction (#14157, #14954) +* [tvnow] Fix extraction (#7831) ++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893) +* [nick] Improve extraction (#14876) +* [tbs] Fix extraction (#13658) + + version 2017.12.10 Core diff --git a/README.md b/README.md index cd30d147a..47b0640ab 100644 --- a/README.md +++ b/README.md @@ -539,6 +539,8 @@ The basic usage is not to set any template arguments when downloading a single f - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier - `playlist_title` (string): Playlist title + - `playlist_uploader` (string): Full name of the playlist uploader + - `playlist_uploader_id` (string): Nickname or id of the playlist uploader Available for the video that belongs to some logical chapter or section: diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d5e3a8a67..ebddd5b9d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -122,7 +122,6 @@ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** - - **BYUtvEvent** - **Camdemy** - **CamdemyFolder** - **CamWithHer** @@ -538,6 +537,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nickelodeon:br** - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 @@ -556,8 +556,6 @@ - **nowness** - **nowness:playlist** - **nowness:series** - - **NowTV** (Currently broken) - - **NowTVList** - **nowvideo**: NowVideo - **Noz** - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl @@ -794,7 +792,7 @@ - **tagesschau:player** - **Tass** - **TastyTrade** - - **TBS** (Currently broken) + - **TBS** - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos @@ -865,6 +863,8 @@ - **tvland.com** - **TVN24** - **TVNoe** + - **TVNow** + - **TVNowList** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** diff --git a/test/test_utils.py b/test/test_utils.py index cc13f795c..0857c0fc0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -343,6 +343,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) + self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 68721e9ab..ace80f14b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -975,6 +975,8 @@ class YoutubeDL(object): 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 7bb61a541..ea5e3a4b5 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -112,7 +112,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(ctx['fragment_filename_sanitized']) + os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 8a6638cc2..3ff26ff70 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -284,8 +284,7 @@ class HttpFD(FileDownloader): while count <= retries: try: establish_connection() - download() - return True + return download() except RetryDownload as e: count += 1 if count <= retries: diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index e6513c7a4..513dd81df 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -228,10 +228,19 @@ class AfreecaTVIE(InfoExtractor): r'^(\d{8})_', key, 'upload date', default=None) file_duration = int_or_none(file_element.get('duration')) format_id = key if key else '%s_%s' % (video_id, file_num) - formats = self._extract_m3u8_formats( - file_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', - note='Downloading part %d m3u8 information' % file_num) + if determine_ext(file_url) == 'm3u8': + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', + note='Downloading part %d m3u8 information' % file_num) + else: + formats = [{ + 'url': file_url, + 'format_id': 'http', + }] + if not formats: + continue + self._sort_formats(formats) file_info = common_entry.copy() file_info.update({ 'id': format_id, diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py new file mode 100644 index 000000000..670abce0c --- /dev/null +++ b/youtube_dl/extractor/aws.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import hashlib +import hmac + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlencode + + +class AWSIE(InfoExtractor): + _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' + _AWS_REGION = 'us-east-1' + + def _aws_execute_api(self, aws_dict, video_id, query=None): + query = query or {} + amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + date = amz_date[:8] + headers = { + 'Accept': 'application/json', + 'Host': self._AWS_PROXY_HOST, + 'X-Amz-Date': amz_date, + } + session_token = aws_dict.get('session_token') + if session_token: + headers['X-Amz-Security-Token'] = session_token + headers['X-Api-Key'] = self._AWS_API_KEY + + def aws_hash(s): + return hashlib.sha256(s.encode('utf-8')).hexdigest() + + # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_headers = '' + for header_name, header_value in headers.items(): + canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + signed_headers = ';'.join([header.lower() for header in headers.keys()]) + canonical_request = '\n'.join([ + 'GET', + aws_dict['uri'], + canonical_querystring, + canonical_headers, + signed_headers, + aws_hash('') + ]) + + # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html + credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] + credential_scope = '/'.join(credential_scope_list) + string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) + + # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html + def aws_hmac(key, msg): + return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + + def aws_hmac_digest(key, msg): + return aws_hmac(key, msg).digest() + + def aws_hmac_hexdigest(key, msg): + return aws_hmac(key, msg).hexdigest() + + k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + for value in credential_scope_list: + k_signing = aws_hmac_digest(k_signing, value) + + signature = aws_hmac_hexdigest(k_signing, string_to_sign) + + # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html + headers['Authorization'] = ', '.join([ + '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + 'SignedHeaders=%s' % signed_headers, + 'Signature=%s' % signature, + ]) + + return self._download_json( + 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + video_id, headers=headers) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 8ef089653..4bf4efe1f 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,20 +3,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { - 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', 'title': 'Season 5 Episode 5', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', - 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', + 'thumbnail': r're:^https?://.*', 'duration': 1486.486, }, 'params': { @@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor): }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'only_matching': True, + }, { + 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', + 'only_matching': True, }] def _real_extract(self, url): @@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, display_id) - episode_code = self._search_regex( - r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - - ep = self._parse_json( - episode_code, display_id, transform_source=lambda s: - re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) - - if ep['providerType'] != 'Ooyala': - raise ExtractorError('Unsupported provider %s' % ep['provider']) + ep = self._download_json( + 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, + query={ + 'contentid': video_id, + 'channel': 'byutv', + 'x-byutv-context': 'web$US', + }, headers={ + 'x-byutv-context': 'web$US', + 'x-byutv-platformkey': 'xsaaw9c7y5', + })['ooyalaVOD'] return { '_type': 'url_transparent', @@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor): 'url': 'ooyala:%s' % ep['providerId'], 'id': video_id, 'display_id': display_id, - 'title': ep['title'], + 'title': ep.get('title'), 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } - - -class BYUtvEventIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P[0-9a-f-]+)' - _TEST = { - 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', - 'info_dict': { - 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', - 'ext': 'mp4', - 'title': 'Toledo vs. BYU (9/30/16)', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - ooyala_id = self._search_regex( - r'providerId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala id', group='id') - - title = self._search_regex( - r'class=["\']description["\'][^>]*>\s*

([^<]+)

', webpage, - 'title').strip() - - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ooyala_id, - 'id': video_id, - 'title': title, - } diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 7d78e3aae..90852a9ef 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) - time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) - if time_str: - timestamp = unified_timestamp(time_str) - else: - timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) + timestamp = unified_timestamp(self._html_search_regex( + r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, + 'released date', default=None)) or parse_iso8601( + self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 80a9c982f..e5ef5e490 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -301,8 +301,9 @@ class InfoExtractor(object): There must be a key "entries", which is a list, an iterable, or a PagedList object, each element of which is a valid dictionary by this specification. - Additionally, playlists can have "title", "description" and "id" attributes - with the same semantics as videos (see above). + Additionally, playlists can have "id", "title", "description", "uploader", + "uploader_id", "uploader_url" attributes with the same semantics as videos + (see above). _type "multi_video" indicates that there are multiple videos that diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index b53f2d705..b92f25447 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -392,7 +392,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not sub_doc: + if sub_doc is None: continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -479,9 +479,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if streamdata: + if streamdata is not None: stream_info = streamdata.find('./{default}preload/stream_info') - if stream_info: + if stream_info is not None: stream_infos.append(stream_info) stream_info = self._call_rpc_api( 'VideoEncode_GetStreamInfo', video_id, @@ -490,7 +490,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if stream_info: + if stream_info is not None: stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 171820e27..67d6df4b0 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -4,13 +4,14 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, - unescapeHTML, - find_xpath_attr, - smuggle_url, determine_ext, ExtractorError, extract_attributes, + find_xpath_attr, + get_element_by_class, + int_or_none, + smuggle_url, + unescapeHTML, ) from .senateisvp import SenateISVPIE from .ustream import UstreamIE @@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor): 'uploader': 'HouseCommittee', 'uploader_id': '12987475', }, + }, { + # Audio Only + 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights', + 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' @@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor): title = self._og_search_title(webpage) surl = smuggle_url(senate_isvp_url, {'force_title': title}) return self.url_result(surl, 'SenateISVP', video_id, title) + video_id = self._search_regex( + r'jwsetup\.clipprog\s*=\s*(\d+);', + webpage, 'jwsetup program id', default=None) + if video_id: + video_type = 'program' if video_type is None or video_id is None: + error_message = get_element_by_class('VLplayer-error-message', webpage) + if error_message: + raise ExtractorError(error_message) raise ExtractorError('unable to find video id and type') def get_text_attr(d, attr): @@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor): entries = [] for partnum, f in enumerate(files): formats = [] - for quality in f['qualities']: + for quality in f.get('qualities', []): formats.append({ 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), 'url': unescapeHTML(get_text_attr(quality, 'file')), diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 99376454b..3368c4c07 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + determine_ext, extract_attributes, ExtractorError, int_or_none, @@ -73,7 +74,11 @@ class DiscoveryGoBaseIE(InfoExtractor): not subtitle_url.startswith('http')): continue lang = caption.get('fileLang', 'en') - subtitles.setdefault(lang, []).append({'url': subtitle_url}) + ext = determine_ext(subtitle_url) + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) return { 'id': video_id, diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 968c4c7fd..0eee82fd6 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -10,6 +10,7 @@ from ..utils import ( compat_str, determine_ext, ExtractorError, + update_url_query, ) @@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor): continue tbr = int_or_none(flavor.get('bitrate')) if tbr == 99999: - formats.extend(self._extract_m3u8_formats( + # wrong ks(Kaltura Signature) causes 404 Error + flavor_url = update_url_query(flavor_url, {'ks': ''}) + m3u8_formats = self._extract_m3u8_formats( flavor_url, video_id, 'mp4', - m3u8_id=flavor_format, fatal=False)) + m3u8_id=flavor_format, fatal=False) + for f in m3u8_formats: + # Apple FairPlay + if '/fpshls/' in f['url']: + continue + formats.append(f) continue format_id = [] if flavor_format: diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 88851f5f8..e90ab4415 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -138,10 +138,7 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import ( - BYUtvIE, - BYUtvEventIE, -) +from .byutv import BYUtvIE from .c56 import C56IE from .camdemy import ( CamdemyIE, @@ -934,7 +931,10 @@ from .seznamzpravy import ( SeznamZpravyIE, SeznamZpravyArticleIE, ) -from .shahid import ShahidIE +from .shahid import ( + ShahidIE, + ShahidShowIE, +) from .shared import ( SharedIE, VivoIE, diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 413a219dc..18a7d7f8c 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -26,7 +26,7 @@ from ..utils import ( class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-zA-Z]+)' _GEO_COUNTRIES = ['GB'] - _TEST = { + _TESTS = [{ 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { 'id': '2a2936a0053', @@ -37,7 +37,11 @@ class ITVIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + # unavailable via data-playlist-url + 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -101,6 +105,18 @@ class ITVIE(InfoExtractor): 'Content-Type': 'text/xml; charset=utf-8', 'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', }) + + info = self._search_json_ld(webpage, video_id, default={}) + formats = [] + subtitles = {} + + def extract_subtitle(sub_url): + ext = determine_ext(sub_url, 'ttml') + subtitles.setdefault('en', []).append({ + 'url': sub_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) + resp_env = self._download_xml( params['data-playlist-url'], video_id, headers=headers, data=etree.tostring(req_env)) @@ -111,37 +127,55 @@ class ITVIE(InfoExtractor): if fault_code == 'InvalidGeoRegion': self.raise_geo_restricted( msg=fault_string, countries=self._GEO_COUNTRIES) - raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) - title = xpath_text(playlist, 'EpisodeTitle', fatal=True) - video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) - media_files = xpath_element(video_element, 'MediaFiles', fatal=True) - rtmp_url = media_files.attrib['base'] + elif fault_code != 'InvalidEntity': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, fault_string), expected=True) + info.update({ + 'title': self._og_search_title(webpage), + 'episode_title': params.get('data-video-episode'), + 'series': params.get('data-video-title'), + }) + else: + title = xpath_text(playlist, 'EpisodeTitle', default=None) + info.update({ + 'title': title, + 'episode_title': title, + 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), + 'series': xpath_text(playlist, 'ProgrammeTitle'), + 'duration': parse_duration(xpath_text(playlist, 'Duration')), + }) + video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) + media_files = xpath_element(video_element, 'MediaFiles', fatal=True) + rtmp_url = media_files.attrib['base'] - formats = [] - for media_file in media_files.findall('MediaFile'): - play_path = xpath_text(media_file, 'URL') - if not play_path: - continue - tbr = int_or_none(media_file.get('bitrate'), 1000) - f = { - 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), - 'play_path': play_path, - # Providing this swfVfy allows to avoid truncated downloads - 'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', - 'page_url': url, - 'tbr': tbr, - 'ext': 'flv', - } - app = self._search_regex( - 'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) - if app: - f.update({ - 'url': rtmp_url.split('?', 1)[0], - 'app': app, - }) - else: - f['url'] = rtmp_url - formats.append(f) + for media_file in media_files.findall('MediaFile'): + play_path = xpath_text(media_file, 'URL') + if not play_path: + continue + tbr = int_or_none(media_file.get('bitrate'), 1000) + f = { + 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), + 'play_path': play_path, + # Providing this swfVfy allows to avoid truncated downloads + 'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', + 'page_url': url, + 'tbr': tbr, + 'ext': 'flv', + } + app = self._search_regex( + 'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) + if app: + f.update({ + 'url': rtmp_url.split('?', 1)[0], + 'app': app, + }) + else: + f['url'] = rtmp_url + formats.append(f) + + for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): + if caption_url.text: + extract_subtitle(caption_url.text) ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') hmac = params.get('data-video-hmac') @@ -198,27 +232,22 @@ class ITVIE(InfoExtractor): formats.append({ 'url': href, }) + subs = video_data.get('Subtitles') + if isinstance(subs, list): + for sub in subs: + if not isinstance(sub, dict): + continue + href = sub.get('Href') + if isinstance(href, compat_str): + extract_subtitle(href) + if not info.get('duration'): + info['duration'] = parse_duration(video_data.get('Duration')) + self._sort_formats(formats) - subtitles = {} - for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): - if not caption_url.text: - continue - ext = determine_ext(caption_url.text, 'ttml') - subtitles.setdefault('en', []).append({ - 'url': caption_url.text, - 'ext': 'ttml' if ext == 'xml' else ext, - }) - - info = self._search_json_ld(webpage, video_id, default={}) info.update({ 'id': video_id, - 'title': title, 'formats': formats, 'subtitles': subtitles, - 'episode_title': title, - 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), - 'series': xpath_text(playlist, 'ProgrammeTitle'), - 'duartion': parse_duration(xpath_text(playlist, 'Duration')), }) return info diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index f7cc3c832..6b7c5e3e0 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,8 +13,15 @@ from ..utils import ( class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P(?:[^/]+/){3}\d+)|(?:(?P(?:[^/]+/){2})video/(?P[^/]+/\d+))\.html)' - + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m)\.)?my\.mail\.ru/ + (?: + video/.*\#video=/?(?P(?:[^/]+/){3}\d+)| + (?:(?P(?:[^/]+/){2})video/(?P[^/]+/\d+))\.html| + (?:video/embed|\+/video/meta)/(?P\d+) + ) + ''' _TESTS = [ { 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', @@ -23,7 +30,7 @@ class MailRuIE(InfoExtractor): 'id': '46301138_76', 'ext': 'mp4', 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', - 'timestamp': 1393232740, + 'timestamp': 1393235077, 'upload_date': '20140224', 'uploader': 'sonypicturesrus', 'uploader_id': 'sonypicturesrus@mail.ru', @@ -40,7 +47,7 @@ class MailRuIE(InfoExtractor): 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 'timestamp': 1397039888, 'upload_date': '20140409', - 'uploader': 'hitech@corp.mail.ru', + 'uploader': 'hitech', 'uploader_id': 'hitech@corp.mail.ru', 'duration': 245, }, @@ -65,28 +72,42 @@ class MailRuIE(InfoExtractor): { 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru/video/embed/7949340477499637815', + 'only_matching': True, + }, + { + 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('idv1') + meta_id = mobj.group('metaid') - if not video_id: - video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') - - webpage = self._download_webpage(url, video_id) + video_id = None + if meta_id: + meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id + else: + video_id = mobj.group('idv1') + if not video_id: + video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') + webpage = self._download_webpage(url, video_id) + page_config = self._parse_json(self._search_regex( + r'(?s)]+class="sp-video__page-config"[^>]*>(.+?)', + webpage, 'page config', default='{}'), video_id, fatal=False) + if page_config: + meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') + else: + meta_url = None video_data = None - - page_config = self._parse_json(self._search_regex( - r'(?s)]+class="sp-video__page-config"[^>]*>(.+?)', - webpage, 'page config', default='{}'), video_id, fatal=False) - if page_config: - meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') - if meta_url: - video_data = self._download_json( - meta_url, video_id, 'Downloading video meta JSON', fatal=False) + if meta_url: + video_data = self._download_json( + meta_url, video_id or meta_id, 'Downloading video meta JSON', + fatal=not video_id) # Fallback old approach if not video_data: diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index b446a02ba..4023aeef8 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -1,13 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import datetime import json import hashlib -import hmac import re -from .common import InfoExtractor +from .aws import AWSIE from .anvato import AnvatoIE from ..utils import ( smuggle_url, @@ -16,7 +14,7 @@ from ..utils import ( ) -class ScrippsNetworksWatchIE(InfoExtractor): +class ScrippsNetworksWatchIE(AWSIE): IE_NAME = 'scrippsnetworks:watch' _VALID_URL = r'''(?x) https?:// @@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor): 'travelchannel': 'trav', 'geniuskitchen': 'genius', } - _SNI_HOST = 'web.api.video.snidigital.com' - _AWS_REGION = 'us-east-1' - _AWS_IDENTITY_ID_JSON = json.dumps({ - 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION - }) - _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' - _AWS_SERVICE = 'execute-api' - _AWS_REQUEST = 'aws4_request' - _AWS_SIGNED_HEADERS = ';'.join([ - 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) - _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET -%(uri)s + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' -host:%(host)s -x-amz-date:%(date)s -x-amz-security-token:%(token)s -x-api-key:%(key)s - -%(signed_headers)s -%(payload_hash)s''' + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) site_id, video_id = mobj.group('site', 'id') - def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() - + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') token = self._download_json( - 'https://cognito-identity.us-east-1.amazonaws.com/', video_id, - data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, headers={ 'Accept': '*/*', 'Content-Type': 'application/x-amz-json-1.1', 'Referer': url, - 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', 'X-Amz-User-Agent': self._AWS_USER_AGENT, })['Token'] @@ -124,64 +105,12 @@ x-api-key:%(key)s sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, fatal=True) - access_key_id = get('AccessKeyId') - secret_access_key = get('SecretAccessKey') - session_token = get('SessionToken') - - # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) - datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') - date = datetime_now[:8] - canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { - 'uri': uri, - 'host': self._SNI_HOST, - 'date': datetime_now, - 'token': session_token, - 'key': self._AWS_API_KEY, - 'signed_headers': self._AWS_SIGNED_HEADERS, - 'payload_hash': aws_hash(''), - } - - # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html - credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) - string_to_sign = '\n'.join([ - 'AWS4-HMAC-SHA256', datetime_now, credential_string, - aws_hash(canonical_string)]) - - # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html - def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) - - def aws_hmac_digest(key, msg): - return aws_hmac(key, msg).digest() - - def aws_hmac_hexdigest(key, msg): - return aws_hmac(key, msg).hexdigest() - - k_secret = 'AWS4' + secret_access_key - k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) - k_region = aws_hmac_digest(k_date, self._AWS_REGION) - k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) - k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) - - signature = aws_hmac_hexdigest(k_signing, string_to_sign) - - auth_header = ', '.join([ - 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( - [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), - 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, - 'Signature=%s' % signature, - ]) - - mcp_id = self._download_json( - 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ - 'Accept': '*/*', - 'Referer': url, - 'Authorization': auth_header, - 'X-Amz-Date': datetime_now, - 'X-Amz-Security-Token': session_token, - 'X-Api-Key': self._AWS_API_KEY, - })['results'][0]['mcpId'] + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] return self.url_result( smuggle_url( diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 374f7faf9..5c2a6206b 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,22 +1,53 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json +import math +import re -from .common import InfoExtractor +from .aws import AWSIE from ..compat import compat_HTTPError from ..utils import ( + clean_html, ExtractorError, + InAdvancePagedList, int_or_none, parse_iso8601, str_or_none, urlencode_postdata, - clean_html, ) -class ShahidIE(InfoExtractor): +class ShahidBaseIE(AWSIE): + _AWS_PROXY_HOST = 'api2.shahid.net' + _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' + + def _handle_error(self, e): + fail_data = self._parse_json( + e.cause.read().decode('utf-8'), None, fatal=False) + if fail_data: + faults = fail_data.get('faults', []) + faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) + if faults_message: + raise ExtractorError(faults_message, expected=True) + + def _call_api(self, path, video_id, request=None): + query = {} + if request: + query['request'] = json.dumps(request) + try: + return self._aws_execute_api({ + 'uri': '/proxy/v2/' + path, + 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', + 'secret_key': '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', + }, video_id, query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise + + +class ShahidIE(ShahidBaseIE): _NETRC_MACHINE = 'shahid' _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?Pepisode|clip|movie)-(?P\d+)' _TESTS = [{ @@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor): 'only_matching': True }] - def _api2_request(self, *args, **kwargs): - try: - return self._download_json(*args, **kwargs) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - fail_data = self._parse_json( - e.cause.read().decode('utf-8'), None, fatal=False) - if fail_data: - faults = fail_data.get('faults', []) - faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) - if faults_message: - raise ExtractorError(faults_message, expected=True) - raise - def _real_initialize(self): email, password = self._get_login_info() if email is None: return - user_data = self._api2_request( - 'https://shahid.mbc.net/wd/service/users/login', - None, 'Logging in', data=json.dumps({ - 'email': email, - 'password': password, - 'basic': 'false', - }).encode('utf-8'), headers={ - 'Content-Type': 'application/json; charset=UTF-8', - })['user'] + try: + user_data = self._download_json( + 'https://shahid.mbc.net/wd/service/users/login', + None, 'Logging in', data=json.dumps({ + 'email': email, + 'password': password, + 'basic': 'false', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/json; charset=UTF-8', + })['user'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise self._download_webpage( 'https://shahid.mbc.net/populateContext', @@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor): 'sessionId': user_data['sessionId'], })) - def _get_api_data(self, response): - data = response.get('data', {}) - - error = data.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), - expected=True) - - return data - def _real_extract(self, url): page_type, video_id = re.match(self._VALID_URL, url).groups() if page_type == 'clip': page_type = 'episode' - playout = self._api2_request( - 'https://api2.shahid.net/proxy/v2/playout/url/' + video_id, - video_id, 'Downloading player JSON')['playout'] + playout = self._call_api( + 'playout/url/' + video_id, video_id)['playout'] if playout.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._get_api_data(self._download_json( + # video = self._call_api( + # 'product/id', video_id, { + # 'id': video_id, + # 'productType': 'ASSET', + # 'productSubType': page_type.upper() + # })['productModel'] + + response = self._download_json( 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), video_id, 'Downloading video JSON', query={ 'apiKey': 'sh@hid0nlin3', 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - }))[page_type] + }) + data = response.get('data', {}) + error = data.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + expected=True) + video = data[page_type] title = video['title'] categories = [ category['name'] @@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor): 'episode_id': video_id, 'formats': formats, } + + +class ShahidShowIE(ShahidBaseIE): + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P\d+)' + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', + 'info_dict': { + 'id': '79187', + 'title': 'رامز قرش البحر', + 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', + }, + 'playlist_mincount': 32, + }, { + 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', + 'only_matching': True + }] + _PAGE_SIZE = 30 + + def _real_extract(self, url): + show_id = self._match_id(url) + + product = self._call_api( + 'playableAsset', show_id, {'showId': show_id})['productModel'] + playlist = product['playlist'] + playlist_id = playlist['id'] + show = product.get('show', {}) + + def page_func(page_num): + playlist = self._call_api( + 'product/playlist', show_id, { + 'playListId': playlist_id, + 'pageNumber': page_num, + 'pageSize': 30, + 'sorts': [{ + 'order': 'DESC', + 'type': 'SORTDATE' + }], + }) + for product in playlist.get('productList', {}).get('products', []): + product_url = product.get('productUrl', []).get('url') + if not product_url: + continue + yield self.url_result( + product_url, 'Shahid', + str_or_none(product.get('id')), + product.get('title')) + + entries = InAdvancePagedList( + page_func, + math.ceil(playlist['count'] / self._PAGE_SIZE), + self._PAGE_SIZE) + + return self.playlist_result( + entries, show_id, show.get('title'), show.get('description')) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 0d8376522..d4838b3e5 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -414,7 +414,7 @@ class VKIE(VKBaseIE): view_count = str_to_int(self._search_regex( r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', - info_page, 'view count', fatal=False)) + info_page, 'view count', default=None)) formats = [] for format_id, format_url in data.items(): diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py index 5de3deb8c..426754489 100644 --- a/youtube_dl/extractor/voot.py +++ b/youtube_dl/extractor/voot.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .kaltura import KalturaIE from ..utils import ( ExtractorError, int_or_none, @@ -17,11 +16,10 @@ class VootIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', 'info_dict': { - 'id': '0_8ledb18o', + 'id': '441353', 'ext': 'mp4', 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', - 'uploader_id': 'batchUser', 'timestamp': 1472162937, 'upload_date': '20160825', 'duration': 1146, @@ -61,8 +59,11 @@ class VootIE(InfoExtractor): media = media_info['assets'] - entry_id = media['EntryId'] title = media['MediaName'] + formats = self._extract_m3u8_formats( + 'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + media['EntryId'], + video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) description, series, season_number, episode, episode_number = [None] * 5 @@ -82,9 +83,7 @@ class VootIE(InfoExtractor): episode_number = int_or_none(value) return { - '_type': 'url_transparent', - 'url': 'kaltura:1982551:%s' % entry_id, - 'ie_key': KalturaIE.ie_key(), + 'id': video_id, 'title': title, 'description': description, 'series': series, @@ -95,4 +94,5 @@ class VootIE(InfoExtractor): 'duration': int_or_none(media.get('Duration')), 'view_count': int_or_none(media.get('ViewCounter')), 'like_count': int_or_none(media.get('like_counter')), + 'formats': formats, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9943dddc1..0919bef0e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2270,6 +2270,19 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): r'(?s)

]*>\s*(.*?)\s*

', page, 'title', default=None) + _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*
  • \s*]+\bhref=' + uploader = self._search_regex( + r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE, + page, 'uploader', default=None) + mobj = re.search( + r'%s(["\'])(?P/(?:user|channel)/(?P.+?))\1' % _UPLOADER_BASE, + page) + if mobj: + uploader_id = mobj.group('uploader_id') + uploader_url = compat_urlparse.urljoin(url, mobj.group('path')) + else: + uploader_id = uploader_url = None + has_videos = True if not playlist_title: @@ -2280,8 +2293,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): except StopIteration: has_videos = False - return has_videos, self.playlist_result( + playlist = self.playlist_result( self._entries(page, playlist_id), playlist_id, playlist_title) + playlist.update({ + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + }) + + return has_videos, playlist def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index fbdfa02ac..b0aed9ca7 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -42,6 +42,7 @@ class XAttrMetadataPP(PostProcessor): 'user.dublincore.format': 'format', } + num_written = 0 for xattrname, infoname in xattr_mapping.items(): value = info.get(infoname) @@ -52,6 +53,7 @@ class XAttrMetadataPP(PostProcessor): byte_value = value.encode('utf-8') write_xattr(filename, xattrname, byte_value) + num_written += 1 return [], info @@ -62,8 +64,8 @@ class XAttrMetadataPP(PostProcessor): except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self._downloader.report_warning( - 'There\'s no disk space left or disk quota exceeded. ' + - 'Extended attributes are not written.') + 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' + + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) elif e.reason == 'VALUE_TOO_LONG': self._downloader.report_warning( 'Unable to write extended attributes due to too long values.') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index eccbc0b1f..2843a3dc0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -159,6 +159,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M', '%b %d %Y at %H:%M', '%b %d %Y at %H:%M:%S', + '%B %d %Y at %H:%M', + '%B %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d0c438317..2b5a63464 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.12.10' +__version__ = '2017.12.14'