From 114ed20e6435f9a68eb757f2d6e26e7ea941afbf Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 22:10:57 +0100 Subject: [PATCH 01/21] [shahid] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/shahid.py | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/shahid.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f21064b..6c1d4f08a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -502,6 +502,7 @@ from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE from .sexykarma import SexyKarmaIE +from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE from .sina import SinaIE diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py new file mode 100644 index 000000000..d492070da --- /dev/null +++ b/youtube_dl/extractor/shahid.py @@ -0,0 +1,56 @@ +from .common import InfoExtractor +from ..utils import get_element_by_id + +class ShahidIE(InfoExtractor): + _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' + _TESTS = [ + { + 'url': 'https://shahid.mbc.net/ar/episode/108084/%D8%AE%D9%88%D8%A7%D8%B7%D8%B1-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-11-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'info_dict': { + 'id': '108084', + 'ext': 'm3u8', + 'title': 'بسم الله', + 'description': 'بسم الله' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, + { + #shahid plus subscriber only + 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', + 'only_matching': True + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_data = self._parse_json( + get_element_by_id('jsonld', webpage), + video_id + ) + title = json_data['name'] + thumbnail = json_data['image'] + categories = json_data['genre'] + description = json_data['description'] + player_json_data = self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-'+video_id+'.type-player.html', + video_id + )['data'] + if 'url' in player_json_data: + m3u8_url = player_json_data['url'] + else: + for error in json_data['error'].values(): + self.report_warning(error) + return + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'categories': categories, + 'description': description, + 'formats': formats, + } From 02c126a7c2453b101505e3c7b8209e49e36fcd71 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 22:55:40 +0100 Subject: [PATCH 02/21] [shahid] raise ExtractorError instead of warning --- youtube_dl/extractor/shahid.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index d492070da..6d76ef590 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,5 +1,8 @@ from .common import InfoExtractor -from ..utils import get_element_by_id +from ..utils import ( + get_element_by_id, + ExtractorError, +} class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' @@ -43,7 +46,7 @@ class ShahidIE(InfoExtractor): m3u8_url = player_json_data['url'] else: for error in json_data['error'].values(): - self.report_warning(error) + raise ExtractorError(error) return formats = self._extract_m3u8_formats(m3u8_url, video_id) return { From 84c0ed50a5491146a8a6b489c530c2335c023f8f Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 23:10:20 +0100 Subject: [PATCH 03/21] [shahid] fix backets closing --- youtube_dl/extractor/shahid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 6d76ef590..9846c052c 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,7 +2,7 @@ from .common import InfoExtractor from ..utils import ( get_element_by_id, ExtractorError, -} +) class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' From 3c07a729a652a25caf9aac6143a19d040ecf9a15 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 23:16:57 +0100 Subject: [PATCH 04/21] [shahid] don't fail on missing info --- youtube_dl/extractor/shahid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 9846c052c..04de866f4 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -35,9 +35,9 @@ class ShahidIE(InfoExtractor): video_id ) title = json_data['name'] - thumbnail = json_data['image'] - categories = json_data['genre'] - description = json_data['description'] + thumbnail = json_data.get('image') + categories = json_data.get('genre') + description = json_data.get('description') player_json_data = self._download_json( 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-'+video_id+'.type-player.html', video_id From a62fd1af275a125b77d782e821724d6c6c896b69 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 25 Jul 2015 18:13:44 +0100 Subject: [PATCH 05/21] [shahid] improve info extraction --- youtube_dl/extractor/shahid.py | 42 +++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 04de866f4..da3141f11 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,7 +1,8 @@ from .common import InfoExtractor from ..utils import ( - get_element_by_id, + js_to_json, ExtractorError, + int_or_none ) class ShahidIE(InfoExtractor): @@ -12,8 +13,9 @@ class ShahidIE(InfoExtractor): 'info_dict': { 'id': '108084', 'ext': 'm3u8', - 'title': 'بسم الله', - 'description': 'بسم الله' + 'title': 'خواطر الموسم 11 الحلقة 1', + 'description': 'بسم الله', + 'duration': 1166, }, 'params': { # m3u8 download @@ -30,16 +32,33 @@ class ShahidIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - json_data = self._parse_json( - get_element_by_id('jsonld', webpage), + player_info = '' + for line in self._search_regex( 'var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): + if '+' not in line and '(' not in line and ')' not in line: + player_info += line + player_info = self._parse_json(js_to_json(player_info), video_id) + video_id = player_info['id'] + player_type = player_info['playerType'] + + video_info = self._download_json( + player_info['url'] + '/' + player_type + '/' + video_id + + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', video_id - ) - title = json_data['name'] - thumbnail = json_data.get('image') - categories = json_data.get('genre') - description = json_data.get('description') + )['data'] + if video_info['error']: + for error in video_info['error']: + raise ExtractorError(error) + video_info = video_info[player_type] + if video_info.get('availabilities').get('plus'): + raise ExtractorError('plus members only') + title = video_info['title'] + thumbnail = video_info.get('thumbnailUrl') + categories = [category['name'] for category in video_info.get('genres')] + description = video_info.get('description') + duration = int_or_none(video_info.get('duration')) + player_json_data = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-'+video_id+'.type-player.html', + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + player_info['type'] + '.html', video_id )['data'] if 'url' in player_json_data: @@ -55,5 +74,6 @@ class ShahidIE(InfoExtractor): 'thumbnail': thumbnail, 'categories': categories, 'description': description, + 'duration': duration, 'formats': formats, } From dfaba1ab9586a7c47017bd897ffda0b795a1d27e Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 25 Jul 2015 19:14:18 +0100 Subject: [PATCH 06/21] [shahid] fix variable name --- youtube_dl/extractor/shahid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index da3141f11..b3b45da24 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -64,7 +64,7 @@ class ShahidIE(InfoExtractor): if 'url' in player_json_data: m3u8_url = player_json_data['url'] else: - for error in json_data['error'].values(): + for error in player_json_data['error'].values(): raise ExtractorError(error) return formats = self._extract_m3u8_formats(m3u8_url, video_id) From 3be3c622dc1d3d7b92c5268a079d202a9f2b0a5a Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 6 Aug 2015 19:37:45 +0100 Subject: [PATCH 07/21] [shahid] generic errors handling and check with flake8 --- youtube_dl/extractor/shahid.py | 42 ++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b3b45da24..57c159833 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,3 +1,6 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import ( js_to_json, @@ -5,6 +8,7 @@ from ..utils import ( int_or_none ) + class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' _TESTS = [ @@ -23,7 +27,7 @@ class ShahidIE(InfoExtractor): } }, { - #shahid plus subscriber only + # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', 'only_matching': True } @@ -32,31 +36,15 @@ class ShahidIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + player_info = '' - for line in self._search_regex( 'var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): + for line in self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): if '+' not in line and '(' not in line and ')' not in line: player_info += line player_info = self._parse_json(js_to_json(player_info), video_id) video_id = player_info['id'] player_type = player_info['playerType'] - video_info = self._download_json( - player_info['url'] + '/' + player_type + '/' + video_id + - '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', - video_id - )['data'] - if video_info['error']: - for error in video_info['error']: - raise ExtractorError(error) - video_info = video_info[player_type] - if video_info.get('availabilities').get('plus'): - raise ExtractorError('plus members only') - title = video_info['title'] - thumbnail = video_info.get('thumbnailUrl') - categories = [category['name'] for category in video_info.get('genres')] - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) - player_json_data = self._download_json( 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + player_info['type'] + '.html', video_id @@ -66,8 +54,22 @@ class ShahidIE(InfoExtractor): else: for error in player_json_data['error'].values(): raise ExtractorError(error) - return formats = self._extract_m3u8_formats(m3u8_url, video_id) + + video_info = self._download_json( + player_info['url'] + '/' + player_type + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', + video_id + )['data'] + if video_info.get('error'): + for error in video_info['error']: + raise ExtractorError(error) + video_info = video_info[player_type] + title = video_info['title'] + thumbnail = video_info.get('thumbnailUrl') + categories = [category['name'] for category in video_info.get('genres')] + description = video_info.get('description') + duration = int_or_none(video_info.get('duration')) + return { 'id': video_id, 'title': title, From c29458f3ec77072e9c17169b78871bf4473134d6 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 7 Aug 2015 21:38:50 +0100 Subject: [PATCH 08/21] [shahid] change the tests --- youtube_dl/extractor/shahid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 57c159833..b2050525e 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -13,13 +13,13 @@ class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' _TESTS = [ { - 'url': 'https://shahid.mbc.net/ar/episode/108084/%D8%AE%D9%88%D8%A7%D8%B7%D8%B1-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-11-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'info_dict': { - 'id': '108084', + 'id': '90574', 'ext': 'm3u8', - 'title': 'خواطر الموسم 11 الحلقة 1', - 'description': 'بسم الله', - 'duration': 1166, + 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', + 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', + 'duration': 2972, }, 'params': { # m3u8 download @@ -28,7 +28,7 @@ class ShahidIE(InfoExtractor): }, { # shahid plus subscriber only - 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', + 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'only_matching': True } ] From 59e89e62d7b45554cef502dc4986f35618110679 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 8 Aug 2015 12:59:10 +0100 Subject: [PATCH 09/21] [shahid] add default fallbacks for extracting api vars --- youtube_dl/extractor/shahid.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b2050525e..399140189 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -33,20 +33,30 @@ class ShahidIE(InfoExtractor): } ] + _api_vars = { + 'type': 'player', + 'url': 'http://api.shahid.net/api/v1_1', + 'playerType': 'episode', + } + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_info = '' - for line in self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): - if '+' not in line and '(' not in line and ')' not in line: - player_info += line - player_info = self._parse_json(js_to_json(player_info), video_id) - video_id = player_info['id'] - player_type = player_info['playerType'] + flash_vars = self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars', None) + if flash_vars is not None: + for line in flash_vars.splitlines(): + if '+' not in line and '(' not in line and ')' not in line: + player_info += line + player_info = self._parse_json(player_info, video_id, js_to_json, False) + if player_info is not None: + for key in self._api_vars: + if key in player_info: + self._api_vars[key] = player_info[key] player_json_data = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + player_info['type'] + '.html', + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + self._api_vars['type'] + '.html', video_id )['data'] if 'url' in player_json_data: @@ -57,13 +67,13 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(m3u8_url, video_id) video_info = self._download_json( - player_info['url'] + '/' + player_type + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', + self._api_vars['url'] + '/' + self._api_vars['playerType'] + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', video_id )['data'] if video_info.get('error'): for error in video_info['error']: raise ExtractorError(error) - video_info = video_info[player_type] + video_info = video_info[self._api_vars['playerType']] title = video_info['title'] thumbnail = video_info.get('thumbnailUrl') categories = [category['name'] for category in video_info.get('genres')] From 3cafca04aaf2bfc4d31e8255b9cb75e8f1ad4b16 Mon Sep 17 00:00:00 2001 From: reddraggone9 Date: Fri, 14 Aug 2015 00:35:35 -0500 Subject: [PATCH 10/21] Updated line numbers in the fragment portion of README links. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 15baf75ce..e91119d84 100644 --- a/README.md +++ b/README.md @@ -544,7 +544,7 @@ If you want to add support for a new site, you can follow this quick list (assum ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62). Add tests and code for as many as you want. 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: @@ -572,7 +572,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L92). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From 4d2ad866f347086d3a1cf4cb7e0a8cadd3c87748 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Aug 2015 19:18:03 +0800 Subject: [PATCH 11/21] [README.md] Document format_id field in output template section (#6557) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 15baf75ce..8fa402ee2 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. - `playlist`: The name or the id of the playlist that contains the video. - `playlist_index`: The index of the video in the playlist, a five-digit number. + - `format_id`: The sequence will be replaced by the format code specified by `--format`. The current default template is `%(title)s-%(id)s.%(ext)s`. From 41dbc50f9c7dfaad4084fbeac77192c7ac37daca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Aug 2015 22:07:02 +0600 Subject: [PATCH 12/21] [lynda] Capture and output login error (Closes #6556) --- youtube_dl/extractor/lynda.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index deead220a..5b9157ed4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -11,6 +11,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + clean_html, int_or_none, ) @@ -70,6 +71,15 @@ class LyndaBaseIE(InfoExtractor): 'Confirming log in and log out from another device') if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + if 'login error' in login_page: + mobj = re.search( + r'(?s)]+class="topmost">(?P[^<]+)</h1>\s*<div>(?P<description>.+?)</div>', + login_page) + if mobj: + raise ExtractorError( + 'lynda returned error: %s - %s' + % (mobj.group('title'), clean_html(mobj.group('description'))), + expected=True) raise ExtractorError('Unable to log in') From 6be5e46994ea5db76d7a2659260606898c265957 Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Fri, 14 Aug 2015 22:22:39 +0600 Subject: [PATCH 13/21] [README.md] Clarify line ranges --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e52cdb941..542a7c26a 100644 --- a/README.md +++ b/README.md @@ -545,7 +545,7 @@ If you want to add support for a new site, you can follow this quick list (assum ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: @@ -573,7 +573,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L92). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From d0d6c097fc7859180f16a445536029c600b1e57f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 15 Aug 2015 15:17:27 +0800 Subject: [PATCH 14/21] [moniker] Support embed- URLs (#6450) --- youtube_dl/extractor/moniker.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 88dcd4f73..69e4bcd1a 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -9,7 +9,10 @@ from ..compat import ( compat_urllib_parse, compat_urllib_request, ) -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + remove_start, +) class MonikerIE(InfoExtractor): @@ -24,6 +27,14 @@ class MonikerIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video', }, + }, { + 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'jih3nce3x6wn', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, }, { 'url': 'http://vidspot.net/l2ngsmhs8ci5', 'md5': '710883dee1bfc370ecf9fa6a89307c88', @@ -38,7 +49,10 @@ class MonikerIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + orig_video_id = self._match_id(url) + video_id = remove_start(orig_video_id, 'embed-') + url = url.replace(orig_video_id, video_id) + assert re.match(self._VALID_URL, url) is not None orig_webpage = self._download_webpage(url, video_id) if '>File Not Found<' in orig_webpage: From 8b8c1093b65ee02aad859ed8d82217312ed0d9d8 Mon Sep 17 00:00:00 2001 From: Shaun Walbridge <shaun.walbridge@gmail.com> Date: Sat, 18 Apr 2015 00:37:04 -0400 Subject: [PATCH 15/21] [EsriVideo] Add new extractor Add extractor for [videos.esri.com](https://videos.esri.com), a collection of videos relating to GIS. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videoesri.py | 90 +++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/videoesri.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 83d21bd15..a4387636f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -695,6 +695,7 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE +from .videoesri import VideoEsriIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/videoesri.py new file mode 100644 index 000000000..0f84323a4 --- /dev/null +++ b/youtube_dl/extractor/videoesri.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os +import re + +from .common import InfoExtractor + +from ..utils import ( + unified_strdate +) + + +class VideoEsriIE(InfoExtractor): + _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://video.esri.com/watch/4228', + 'md5': '170b4d513c2466ed483c150a48384133', + 'info_dict': { + 'id': '4228', + 'ext': 'mp4', + 'title': 'AppStudio for ArcGIS', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150310', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') + + upload_date_raw = self._search_regex( + r'http-equiv="last-modified" content="(.*)"', + webpage, 'upload date') + upload_date = unified_strdate(upload_date_raw) + + settings_info = self._search_regex( + r'evPlayerSettings = {(.*?);\s*$', + webpage, 'settings info', flags=re.MULTILINE | re.DOTALL) + + # thumbnail includes '_x' for large, also has {_m,_t,_s} or + # without size suffix returns full image + thumbnail_path = re.findall( + r'image\': \'(\/thumbs.*)\'', + settings_info)[0] + + if thumbnail_path: + thumbnail = '/'.join(['http://video.esri.com', thumbnail_path]) + + # note that this misses the (exceedly rare) webm files + video_paths = re.findall(r'mp4:(.*)\'', settings_info) + + # find possible http servers of the mp4 files (also has rtsp) + base_url = re.findall( + r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0] + + # these are the numbers used internally, but really map + # to other resolutions, e.g. 960 is 720p. + heights = [480, 720, 960] + videos_by_res = {} + for video_path in video_paths: + url = "{base_url}{video_path}".format( + base_url=base_url, + video_path=video_path) + filename, ext = os.path.splitext(video_path) + height_label = int(filename.split('_')[1]) + videos_by_res[height_label] = { + 'url': url, + 'ext': ext[1:], + 'protocol': 'http', # http-only supported currently + } + + formats = [] + for height in heights: + if height in videos_by_res: + formats.append(videos_by_res[height]) + + result = { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'formats': formats, + } + + if thumbnail: + result['thumbnail'] = thumbnail + + return result From 8b9848ac5678356757f67a412f7ed89a0f559be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:58:30 +0600 Subject: [PATCH 16/21] [extractor/common] Expand meta regex --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5982055be..16ae4b98f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -640,7 +640,7 @@ class InfoExtractor(object): @staticmethod def _meta_regex(prop): return r'''(?isx)<meta - (?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1) + (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1) [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): From 3aa697f993e3719cf032c5b1e192a034100b0534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:58:56 +0600 Subject: [PATCH 17/21] [esri:video] Extract all formats and simplify --- youtube_dl/extractor/videoesri.py | 106 +++++++++++++----------------- 1 file changed, 45 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/videoesri.py index 0f84323a4..84faba678 100644 --- a/youtube_dl/extractor/videoesri.py +++ b/youtube_dl/extractor/videoesri.py @@ -1,90 +1,74 @@ # coding: utf-8 from __future__ import unicode_literals -import os import re from .common import InfoExtractor - +from ..compat import compat_urlparse from ..utils import ( - unified_strdate + int_or_none, + parse_filesize, + unified_strdate, ) class VideoEsriIE(InfoExtractor): _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' _TEST = { - 'url': 'https://video.esri.com/watch/4228', - 'md5': '170b4d513c2466ed483c150a48384133', + 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', + 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', 'info_dict': { - 'id': '4228', + 'id': '1124', 'ext': 'mp4', - 'title': 'AppStudio for ArcGIS', + 'title': 'ArcGIS Online - Developing Applications', + 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150310', + 'duration': 185, + 'upload_date': '20120419', } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') - - upload_date_raw = self._search_regex( - r'http-equiv="last-modified" content="(.*)"', - webpage, 'upload date') - upload_date = unified_strdate(upload_date_raw) - - settings_info = self._search_regex( - r'evPlayerSettings = {(.*?);\s*$', - webpage, 'settings info', flags=re.MULTILINE | re.DOTALL) - - # thumbnail includes '_x' for large, also has {_m,_t,_s} or - # without size suffix returns full image - thumbnail_path = re.findall( - r'image\': \'(\/thumbs.*)\'', - settings_info)[0] - - if thumbnail_path: - thumbnail = '/'.join(['http://video.esri.com', thumbnail_path]) - - # note that this misses the (exceedly rare) webm files - video_paths = re.findall(r'mp4:(.*)\'', settings_info) - - # find possible http servers of the mp4 files (also has rtsp) - base_url = re.findall( - r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0] - - # these are the numbers used internally, but really map - # to other resolutions, e.g. 960 is 720p. - heights = [480, 720, 960] - videos_by_res = {} - for video_path in video_paths: - url = "{base_url}{video_path}".format( - base_url=base_url, - video_path=video_path) - filename, ext = os.path.splitext(video_path) - height_label = int(filename.split('_')[1]) - videos_by_res[height_label] = { - 'url': url, - 'ext': ext[1:], - 'protocol': 'http', # http-only supported currently - } - formats = [] - for height in heights: - if height in videos_by_res: - formats.append(videos_by_res[height]) + for width, height, content in re.findall( + r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): + for video_url, ext, filesize in re.findall( + r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): + formats.append({ + 'url': compat_urlparse.urljoin(url, video_url), + 'ext': ext.lower(), + 'format_id': '%s-%s' % (ext.lower(), height), + 'width': int(width), + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + }) + self._sort_formats(formats) - result = { + title = self._html_search_meta('title', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) + + duration = int_or_none(self._search_regex( + [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], + webpage, 'duration', fatal=False)) + + upload_date = unified_strdate(self._html_search_meta( + 'last-modified', webpage, 'upload date', fatal=None)) + + return { 'id': video_id, 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, 'upload_date': upload_date, - 'formats': formats, + 'formats': formats } - - if thumbnail: - result['thumbnail'] = thumbnail - - return result From 9c21f229236c77a8865c857b43c6cbd95dcc6f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:59:35 +0600 Subject: [PATCH 18/21] [esri:video] Rename extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{videoesri.py => esri.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{videoesri.py => esri.py} (98%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a4387636f..760f73918 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -695,7 +695,7 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE -from .videoesri import VideoEsriIE +from .esri import EsriVideoIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/esri.py similarity index 98% rename from youtube_dl/extractor/videoesri.py rename to youtube_dl/extractor/esri.py index 84faba678..bf5d2019f 100644 --- a/youtube_dl/extractor/videoesri.py +++ b/youtube_dl/extractor/esri.py @@ -12,7 +12,7 @@ from ..utils import ( ) -class VideoEsriIE(InfoExtractor): +class EsriVideoIE(InfoExtractor): _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', From 1d25e9d173931da0d2cb65b114f44bbf24184f6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 16:00:24 +0600 Subject: [PATCH 19/21] [extractor/__init__] Fix order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 760f73918..a8be63624 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE from .espn import ESPNIE +from .esri import EsriVideoIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE @@ -695,7 +696,6 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE -from .esri import EsriVideoIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE From fab83e24567226fa70e7f5076d961b83239ccfbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 16:10:20 +0600 Subject: [PATCH 20/21] Credit @scw for video.esri.com (#5459) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index ded9e87d2..d1693224e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -139,3 +139,4 @@ slangangular Behrouz Abbasi ngld nyuszika7h +Shaun Walbridge From c576ef1e7cfd31ca94ca6025c054b3ae4f611b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 18:13:37 +0600 Subject: [PATCH 21/21] [shahid] Improve and simplify --- youtube_dl/extractor/shahid.py | 140 ++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 399140189..6e9903d5e 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,90 +2,106 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - js_to_json, ExtractorError, - int_or_none + int_or_none, + parse_iso8601, ) class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' - _TESTS = [ - { - 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', - 'info_dict': { - 'id': '90574', - 'ext': 'm3u8', - 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', - 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', - 'duration': 2972, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', + 'info_dict': { + 'id': '90574', + 'ext': 'm3u8', + 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', + 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', + 'duration': 2972, + 'timestamp': 1422057420, + 'upload_date': '20150123', }, - { - # shahid plus subscriber only - 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', - 'only_matching': True + 'params': { + # m3u8 download + 'skip_download': True, } - ] + }, { + # shahid plus subscriber only + 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'only_matching': True + }] - _api_vars = { - 'type': 'player', - 'url': 'http://api.shahid.net/api/v1_1', - 'playerType': 'episode', - } + def _handle_error(self, response): + if not isinstance(response, dict): + return + error = response.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + expected=True) + + def _download_json(self, url, video_id, note='Downloading JSON metadata'): + response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] + self._handle_error(response) + return response def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - player_info = '' - flash_vars = self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars', None) - if flash_vars is not None: - for line in flash_vars.splitlines(): - if '+' not in line and '(' not in line and ')' not in line: - player_info += line - player_info = self._parse_json(player_info, video_id, js_to_json, False) - if player_info is not None: - for key in self._api_vars: - if key in player_info: - self._api_vars[key] = player_info[key] + api_vars = { + 'id': video_id, + 'type': 'player', + 'url': 'http://api.shahid.net/api/v1_1', + 'playerType': 'episode', + } - player_json_data = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + self._api_vars['type'] + '.html', - video_id - )['data'] - if 'url' in player_json_data: - m3u8_url = player_json_data['url'] - else: - for error in player_json_data['error'].values(): - raise ExtractorError(error) - formats = self._extract_m3u8_formats(m3u8_url, video_id) + flashvars = self._search_regex( + r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) + if flashvars: + for key in api_vars.keys(): + value = self._search_regex( + r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key, + flashvars, 'type', default=None, group='value') + if value: + api_vars[key] = value - video_info = self._download_json( - self._api_vars['url'] + '/' + self._api_vars['playerType'] + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', - video_id - )['data'] - if video_info.get('error'): - for error in video_info['error']: - raise ExtractorError(error) - video_info = video_info[self._api_vars['playerType']] - title = video_info['title'] - thumbnail = video_info.get('thumbnailUrl') - categories = [category['name'] for category in video_info.get('genres')] - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) + player = self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' + % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + + formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') + + video = self._download_json( + '%s/%s/%s?%s' % ( + api_vars['url'], api_vars['playerType'], api_vars['id'], + compat_urllib_parse.urlencode({ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }).encode('utf-8')), + video_id, 'Downloading video JSON') + + video = video[api_vars['playerType']] + + title = video['title'] + description = video.get('description') + thumbnail = video.get('thumbnailUrl') + duration = int_or_none(video.get('duration')) + timestamp = parse_iso8601(video.get('referenceDate')) + categories = [ + category['name'] + for category in video.get('genres', []) if 'name' in category] return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, - 'categories': categories, 'description': description, + 'thumbnail': thumbnail, 'duration': duration, + 'timestamp': timestamp, + 'categories': categories, 'formats': formats, }