mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-19 00:47:17 +08:00
Merge branch 'master' into Vimeo-issue-16717
This commit is contained in:
commit
1b4d8e8573
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.07.10
|
||||
[debug] youtube-dl version 2018.07.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
23
ChangeLog
23
ChangeLog
@ -1,3 +1,26 @@
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
+ [utils] Introduce url_or_none
|
||||
* [utils] Allow JSONP without function name (#17028)
|
||||
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||
* [iwara] Fix download URLs (#17026)
|
||||
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||
+ [vimeo] Add another config regular expression (#17013)
|
||||
+ [facebook] Extract view count (#16942)
|
||||
* [dailymotion] Improve description extraction (#16984)
|
||||
* [slutload] Fix and improve extraction (#17001)
|
||||
* [mediaset] Fix extraction (#16977)
|
||||
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||
* [imgur] Relax URL regular expression (#16987)
|
||||
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||
#16959)
|
||||
|
||||
|
||||
version 2018.07.10
|
||||
|
||||
Core
|
||||
|
@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
self.assertEqual(url_or_none(''), None)
|
||||
self.assertEqual(url_or_none('foo'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not video_id:
|
||||
entries = []
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
episode_url = url_or_none(episode.get('url'))
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
|
@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = source.get('file')
|
||||
if not source_url or not isinstance(source_url, compat_str):
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
|
@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = item.get('file')
|
||||
file_url = url_or_none(item.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
|
@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@ -15,6 +14,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
|
@ -778,6 +778,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
'info_dict': {
|
||||
'id': 'b0b9z4vz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -1000,6 +1011,36 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
|
@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = video.get('url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
|
@ -2,10 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = media.get('location')
|
||||
if not media_url or not isinstance(media_url, compat_str):
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
}
|
||||
})
|
||||
|
@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
||||
media_url = media['media']['url']
|
||||
if isinstance(media_url, list):
|
||||
for format_ in media_url:
|
||||
format_url = format_.get('file')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
|
@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -69,7 +70,7 @@ class DctpTvIE(InfoExtractor):
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if isinstance(server.get('endpoint'), compat_str) and
|
||||
if url_or_none(server.get('endpoint')) and
|
||||
'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
@ -92,8 +93,8 @@ class DctpTvIE(InfoExtractor):
|
||||
for image in images:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = image.get('url')
|
||||
if not image_url or not isinstance(image_url, compat_str):
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@ -12,6 +11,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
|
@ -7,7 +7,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -17,6 +16,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
sub_url = url_or_none(sub.get('url'))
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_url = lesson.get('http_url')
|
||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
||||
lesson_url = url_or_none(lesson.get('http_url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = lesson.get('id')
|
||||
if lesson_id:
|
||||
@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for _, format_url in lesson['media_urls'].items():
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
src = url_or_none(format_dict.get('src'))
|
||||
if not src or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
signed_url = url_or_none(self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
}))
|
||||
if signed_url:
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
|
@ -11,6 +11,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -80,7 +81,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||
chapters = []
|
||||
lesson_elements = course.get('lessonElements')
|
||||
if isinstance(lesson_elements, list):
|
||||
chapters = [e for e in lesson_elements if isinstance(e, compat_str)]
|
||||
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
|
@ -32,6 +32,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@ -3130,8 +3131,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
@ -8,6 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
m3u8_url = url_or_none(bitrates.get('hls'))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file[2])
|
||||
if not isinstance(cc_lang, compat_str) or not cc_url:
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
qualities,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -61,8 +61,8 @@ class ImdbIE(InfoExtractor):
|
||||
for encoding in video_metadata.get('encodings', []):
|
||||
if not encoding or not isinstance(encoding, dict):
|
||||
continue
|
||||
video_url = encoding.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||
if ext == 'm3u8':
|
||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
@ -250,8 +251,8 @@ class ITVIE(InfoExtractor):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = sub.get('Href')
|
||||
if isinstance(href, compat_str):
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if href:
|
||||
extract_subtitle(href)
|
||||
if not info.get('duration'):
|
||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_uri = url_or_none(a_format.get('uri'))
|
||||
if not format_uri:
|
||||
continue
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'url': self._proto_relative_url(format_uri, 'https:'),
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
|
@ -4,16 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_decrypt_text
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
|
||||
encrypted = False
|
||||
|
||||
def extract_format(format_url, height=None):
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//')):
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
|
@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = url_or_none(subtitle_url)
|
||||
if lang != 'none' and subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
|
||||
|
||||
stream_formats = []
|
||||
for unum, VideoUrl in enumerate(video_urls):
|
||||
video_url = VideoUrl.get('Location')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(VideoUrl.get('Location'))
|
||||
if not video_url:
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -200,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
for file_ in video['files']:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = file_.get('fileUrl')
|
||||
if not file_url or not isinstance(file_url, compat_str):
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
if not file_url:
|
||||
continue
|
||||
file_size = int_or_none(file_.get('size'))
|
||||
format_id = try_get(
|
||||
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
|
||||
video_id, fatal=False)
|
||||
if medias and isinstance(medias, list):
|
||||
for media in medias:
|
||||
format_url = media.get('videoUrl')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('quality')
|
||||
formats.append({
|
||||
|
@ -6,6 +6,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
|
||||
title = config['title']
|
||||
formats = []
|
||||
for video in config['src']:
|
||||
src = video.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(video.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
break
|
||||
|
||||
for result in results:
|
||||
video_url = result.get('video_url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(result.get('video_url'))
|
||||
if not video_url:
|
||||
continue
|
||||
entry = self._extract_video(result, require_title=False)
|
||||
entry.update({
|
||||
|
@ -33,6 +33,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
_TP_TLD = 'com'
|
||||
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(
|
||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = track.get('url')
|
||||
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
if not track_url or track_url.endswith('/big'):
|
||||
continue
|
||||
lang = track.get('lang') or track.get('label') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
|
||||
for stream in self._download_json(data_file, video_id):
|
||||
if not isinstance(stream, dict):
|
||||
continue
|
||||
stream_url = stream.get('url')
|
||||
if (stream_url in stream_urls or not stream_url or
|
||||
not isinstance(stream_url, compat_str)):
|
||||
stream_url = url_or_none(stream.get('url'))
|
||||
if stream_url in stream_urls or not stream_url:
|
||||
continue
|
||||
stream_urls.add(stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -255,7 +256,8 @@ class TVPlayIE(InfoExtractor):
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams.get('streams', {}).items():
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
|
@ -27,6 +27,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -663,8 +664,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
for option in status['quality_options']:
|
||||
if not isinstance(option, dict):
|
||||
continue
|
||||
source = option.get('source')
|
||||
if not source or not isinstance(source, compat_str):
|
||||
source = url_or_none(option.get('source'))
|
||||
if not source:
|
||||
continue
|
||||
formats.append({
|
||||
'url': source,
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
|
||||
if not isinstance(source_list, list):
|
||||
return
|
||||
for source in source_list:
|
||||
video_url = source.get('file') or source.get('src')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(source.get('file') or source.get('src'))
|
||||
if not video_url:
|
||||
continue
|
||||
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
src = track.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(track.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
lang = track.get('language') or track.get(
|
||||
'srclang') or track.get('label')
|
||||
@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
|
||||
for cc in captions:
|
||||
if not isinstance(cc, dict):
|
||||
continue
|
||||
cc_url = cc.get('url')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc.get('url'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||
|
@ -3,15 +3,13 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for f in video.get('formats', []):
|
||||
format_url = f.get('uri')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(f.get('uri'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_type = f.get('type')
|
||||
if format_type == 'dash':
|
||||
|
@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# We try to find out to which variable is assigned the config dic
|
||||
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
|
||||
if m_variable_name is not None:
|
||||
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
||||
config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
|
||||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
|
@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
|
||||
'skip': 'Geo-restricted to Hong Kong',
|
||||
}]
|
||||
|
||||
_AREA_ID = {
|
||||
'HK': 1,
|
||||
'SG': 2,
|
||||
'TH': 4,
|
||||
'PH': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
query = {
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
}
|
||||
|
||||
area_id = self._AREA_ID.get(country_code.upper())
|
||||
if area_id:
|
||||
query['area_id'] = area_id
|
||||
|
||||
product_data = self._download_json(
|
||||
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
||||
'Downloading video info', query={
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
})['data']
|
||||
'Downloading video info', query=query)['data']
|
||||
|
||||
video_data = product_data.get('current_product')
|
||||
if not video_data:
|
||||
@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query={
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
'Origin': re.search(r'https?://[^/]+', url).group(0),
|
||||
})['data']['stream']
|
||||
|
||||
stream_sizes = stream_data.get('size', {})
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
@ -423,7 +424,8 @@ class VKIE(VKBaseIE):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data.items():
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
continue
|
||||
if (format_id.startswith(('url', 'cache')) or
|
||||
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
|
||||
else:
|
||||
format_url = format_item
|
||||
filesize = None
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, quality),
|
||||
@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
for format_id, format_url in sources.items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_url in format_urls:
|
||||
continue
|
||||
|
@ -4,12 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
|
||||
formats = []
|
||||
for format_id in QUALITIES:
|
||||
is_hd = format_id == 'hd'
|
||||
format_url = playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else ''))
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else '')))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@ -3,11 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
|
||||
for encoding in encodings:
|
||||
if not isinstance(encoding, dict):
|
||||
continue
|
||||
format_url = encoding.get('filename')
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(encoding.get('filename'))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
|
||||
@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
|
||||
for definition in definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if isinstance(video_url, compat_str) and video_url:
|
||||
video_url = url_or_none(definition.get('videoUrl'))
|
||||
if video_url:
|
||||
links.append(video_url)
|
||||
|
||||
# Fallback #1, this also contains extra low quality 180p format
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
|
||||
for watch in watch_urls:
|
||||
if not isinstance(watch, dict):
|
||||
continue
|
||||
watch_url = watch.get('url')
|
||||
if not watch_url or not isinstance(watch_url, compat_str):
|
||||
watch_url = url_or_none(watch.get('url'))
|
||||
if not watch_url:
|
||||
continue
|
||||
format_id_list = [stream_type]
|
||||
maxrate = watch.get('maxrate')
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
|
||||
def _extract_subtitles(src):
|
||||
subtitles = {}
|
||||
for caption in try_get(src, lambda x: x['captions'], list) or []:
|
||||
subtitle_url = caption.get('uri')
|
||||
if subtitle_url and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = url_or_none(caption.get('uri'))
|
||||
if subtitle_url:
|
||||
lang = caption.get('language', 'deu')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
|
||||
return subtitles
|
||||
|
||||
def _extract_format(self, video_id, formats, format_urls, meta):
|
||||
format_url = meta.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(meta.get('url'))
|
||||
if not format_url:
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
|
||||
content, lambda x: x['teaserImageRef']['layouts'], dict)
|
||||
if layouts:
|
||||
for layout_key, layout_url in layouts.items():
|
||||
if not isinstance(layout_url, compat_str):
|
||||
layout_url = url_or_none(layout_url)
|
||||
if not layout_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': layout_url,
|
||||
|
@ -1866,6 +1866,13 @@ def strip_or_none(v):
|
||||
return None if v is None else v.strip()
|
||||
|
||||
|
||||
def url_or_none(url):
|
||||
if not url or not isinstance(url, compat_str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
if not isinstance(s, compat_basestring):
|
||||
return None
|
||||
@ -2282,7 +2289,7 @@ def parse_age_limit(s):
|
||||
def strip_jsonp(code):
|
||||
return re.sub(
|
||||
r'''(?sx)^
|
||||
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
|
||||
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
|
||||
(?:\s*&&\s*(?P=func_name))?
|
||||
\s*\(\s*(?P<callback_data>.*)\);?
|
||||
\s*?(?://[^\n]*)*$''',
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.07.10'
|
||||
__version__ = '2018.07.21'
|
||||
|
Loading…
x
Reference in New Issue
Block a user