mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-09 05:52:53 +08:00
Merge changes
This commit is contained in:
commit
6b31024945
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.29**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.06.14
|
||||
[debug] youtube-dl version 2018.07.29
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
7
AUTHORS
7
AUTHORS
@ -239,3 +239,10 @@ Martin Weinelt
|
||||
Surya Oktafendri
|
||||
TingPing
|
||||
Alexandre Macabies
|
||||
Bastian de Groot
|
||||
Niklas Haas
|
||||
András Veres-Szentkirályi
|
||||
Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
|
127
ChangeLog
127
ChangeLog
@ -1,3 +1,130 @@
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||
* [dailymotion:playlist] Fix extraction (#16894)
|
||||
* [ted] Improve extraction
|
||||
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||
* [telecinco] Fix extraction (#17080)
|
||||
* [mitele] Reduce number of requests
|
||||
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||
* [vk] Fix extraction for inline only videos (#16923)
|
||||
* [streamcloud] Fix extraction (#17054)
|
||||
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||
|
||||
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
+ [utils] Introduce url_or_none
|
||||
* [utils] Allow JSONP without function name (#17028)
|
||||
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||
* [iwara] Fix download URLs (#17026)
|
||||
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||
+ [vimeo] Add another config regular expression (#17013)
|
||||
+ [facebook] Extract view count (#16942)
|
||||
* [dailymotion] Improve description extraction (#16984)
|
||||
* [slutload] Fix and improve extraction (#17001)
|
||||
* [mediaset] Fix extraction (#16977)
|
||||
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||
* [imgur] Relax URL regular expression (#16987)
|
||||
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||
#16959)
|
||||
|
||||
|
||||
version 2018.07.10
|
||||
|
||||
Core
|
||||
* [utils] Share JSON-LD regular expression
|
||||
* [downloader/dash] Improve error handling (#16927)
|
||||
|
||||
Extractors
|
||||
+ [nrktv] Add support for new season and serie URL schema
|
||||
+ [nrktv] Add support for new episode URL schema (#16909)
|
||||
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
|
||||
* [funk] Fix extraction (#16918)
|
||||
* [watchbox] Fix extraction (#16904)
|
||||
* [dplayit] Sort formats
|
||||
* [dplayit] Fix extraction (#16901)
|
||||
* [youtube] Improve login error handling (#13822)
|
||||
|
||||
|
||||
version 2018.07.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Properly escape % in MPD templates (#16867)
|
||||
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
|
||||
* Prefer ffmpeg over avconv by default (#8622)
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
|
||||
* [lynda] Simplify login and improve error capturing (#16891)
|
||||
+ [go90] Add support for embed URLs (#16873)
|
||||
* [go90] Detect geo restriction error and pass geo verification headers
|
||||
(#16874)
|
||||
* [vlive] Fix live streams extraction (#16871)
|
||||
* [npo] Fix typo (#16872)
|
||||
+ [mediaset] Add support for new videos and extract all formats (#16568)
|
||||
* [dctptv] Restore extraction based on REST API (#16850)
|
||||
* [svt] Improve extraction and add support for pages (#16802)
|
||||
* [porncom] Fix extraction (#16808)
|
||||
|
||||
|
||||
version 2018.06.25
|
||||
|
||||
Extractors
|
||||
* [joj] Relax URL regular expression (#16771)
|
||||
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
|
||||
* [motherless] Fix extraction (#16786)
|
||||
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
|
||||
- [foxnews:insider] Remove extractor (#15810)
|
||||
+ [foxnews] Add support for iframe embeds (#15810, #16711)
|
||||
|
||||
|
||||
version 2018.06.19
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce expected_status in _download_* methods
|
||||
for convenient accept of HTTP requests failed with non 2xx status codes
|
||||
+ [compat] Introduce compat_integer_types
|
||||
|
||||
Extractors
|
||||
* [peertube] Improve generic support (#16733)
|
||||
+ [6play] Use geo verification headers
|
||||
* [rtbf] Fix extraction for python 3.2
|
||||
* [vgtv] Improve HLS formats extraction
|
||||
+ [vgtv] Add support for www.aftonbladet.se/tv URLs
|
||||
* [bbccouk] Use expected_status
|
||||
* [markiza] Expect 500 HTTP status code
|
||||
* [tvnow] Try all clear manifest URLs (#15361)
|
||||
|
||||
|
||||
version 2018.06.18
|
||||
|
||||
Core
|
||||
* [downloader/rtmp] Fix downloading in verbose mode (#16736)
|
||||
|
||||
Extractors
|
||||
+ [markiza] Add support for markiza.sk (#16750)
|
||||
* [wat] Try all supported adaptive URLs
|
||||
+ [6play] Add support for rtlplay.be and extract hd usp formats
|
||||
+ [rtbf] Add support for audio and live streams (#9638, #11923)
|
||||
+ [rtbf] Extract HLS, DASH and all HTTP formats
|
||||
+ [rtbf] Extract subtitles
|
||||
+ [rtbf] Fixup specific HTTP URLs (#16101)
|
||||
+ [expressen] Add support for expressen.se
|
||||
* [vidzi] Fix extraction (#16678)
|
||||
* [pbs] Improve extraction (#16623, #16684)
|
||||
* [bilibili] Restrict cid regular expression (#16638, #16734)
|
||||
|
||||
|
||||
version 2018.06.14
|
||||
|
||||
Core
|
||||
|
12
README.md
12
README.md
@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
@ -35,7 +35,7 @@ You can also use pip:
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
@ -266,6 +266,7 @@
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
@ -289,7 +290,6 @@
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-what**
|
||||
- **FranceCulture**
|
||||
@ -302,6 +302,9 @@
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
@ -455,6 +458,8 @@
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
@ -587,7 +592,9 @@
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisode**
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeason**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@ -665,6 +672,8 @@
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**: QQ音乐
|
||||
@ -811,6 +820,7 @@
|
||||
- **StretchInternet**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
|
@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
self.assertEqual(url_or_none(''), None)
|
||||
self.assertEqual(url_or_none('foo'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
@ -305,8 +305,8 @@ class YoutubeDL(object):
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||
otherwise prefer ffmpeg.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
|
@ -2787,6 +2787,12 @@ except NameError: # Python 3
|
||||
compat_numeric_types = (int, float, complex)
|
||||
|
||||
|
||||
try:
|
||||
compat_integer_types = (int, long)
|
||||
except NameError: # Python 3
|
||||
compat_integer_types = (int, )
|
||||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
@ -2974,6 +2980,7 @@ __all__ = [
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_numeric_types',
|
||||
|
@ -2,7 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import urljoin
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
|
@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not video_id:
|
||||
entries = []
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
episode_url = url_or_none(episode.get('url'))
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
|
@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = source.get('file')
|
||||
if not source_url or not isinstance(source_url, compat_str):
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
|
@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = item.get('file')
|
||||
file_url = url_or_none(item.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
|
@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@ -15,6 +14,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
|
@ -21,7 +21,6 @@ from ..utils import (
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
@ -334,14 +333,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
@ -784,6 +778,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
'info_dict': {
|
||||
'id': 'b0b9z4vz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -1006,6 +1011,36 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
|
@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = video.get('url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
|
@ -572,7 +572,8 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if ext == 'ism' or container == 'WVM':
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@ -629,6 +630,14 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
|
@ -2,10 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = media.get('location')
|
||||
if not media_url or not isinstance(media_url, compat_str):
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
}
|
||||
})
|
||||
|
@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
||||
media_url = media['media']['url']
|
||||
if isinstance(media_url, list):
|
||||
for format_ in media_url:
|
||||
format_url = format_.get('file')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
|
@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
@ -19,6 +19,7 @@ from ..compat import (
|
||||
compat_cookies,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
@ -51,6 +52,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
@ -548,8 +550,26 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
""" Returns the response handle """
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
if isinstance(expected_status, compat_integer_types):
|
||||
return err.code == expected_status
|
||||
elif isinstance(expected_status, (list, tuple)):
|
||||
return err.code in expected_status
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
else:
|
||||
assert False
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the response handle.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
elif note is not False:
|
||||
@ -578,6 +598,10 @@ class InfoExtractor(object):
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
if errnote is None:
|
||||
@ -590,13 +614,17 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return a tuple (page content as string, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
@ -685,13 +713,52 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns the data of the page as a string """
|
||||
def _download_webpage(
|
||||
self, url_or_request, video_id, note=None, errnote=None,
|
||||
fatal=True, tries=1, timeout=5, encoding=None, data=None,
|
||||
headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the data of the page as a string.
|
||||
|
||||
Arguments:
|
||||
url_or_request -- plain text URL as a string or
|
||||
a compat_urllib_request.Requestobject
|
||||
video_id -- Video/playlist/item identifier (string)
|
||||
|
||||
Keyword arguments:
|
||||
note -- note printed before downloading (string)
|
||||
errnote -- note printed in case of an error (string)
|
||||
fatal -- flag denoting whether error should be considered fatal,
|
||||
i.e. whether it should cause ExtractionError to be raised,
|
||||
otherwise a warning will be reported and extraction continued
|
||||
tries -- number of tries
|
||||
timeout -- sleep interval between tries
|
||||
encoding -- encoding for a page content decoding, guessed automatically
|
||||
when not explicitly specified
|
||||
data -- POST data (bytes)
|
||||
headers -- HTTP headers (dict)
|
||||
query -- URL query (dict)
|
||||
expected_status -- allows to accept failed HTTP requests (non 2xx
|
||||
status code) by explicitly specifying a set of accepted status
|
||||
codes. Can be any of the following entities:
|
||||
- an integer type specifying an exact failed status code to
|
||||
accept
|
||||
- a list or a tuple of integer types specifying a list of
|
||||
failed status codes to accept
|
||||
- a callable accepting an actual failed status code and
|
||||
returning True if it should be accepted
|
||||
Note that this argument does not affect success status codes (2xx)
|
||||
which are always accepted.
|
||||
"""
|
||||
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
@ -707,11 +774,17 @@ class InfoExtractor(object):
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
@ -719,15 +792,21 @@ class InfoExtractor(object):
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
def _download_xml(
|
||||
self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the xml as an xml.etree.ElementTree.Element.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
@ -745,11 +824,17 @@ class InfoExtractor(object):
|
||||
def _download_json_handle(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (JSON object, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (JSON object, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
json_string, urlh = res
|
||||
@ -760,11 +845,18 @@ class InfoExtractor(object):
|
||||
def _download_json(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return the JSON object as a dict.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_json_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
@ -1058,8 +1150,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
@ -1768,9 +1859,7 @@ class InfoExtractor(object):
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
elif src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
@ -1780,9 +1869,13 @@ class InfoExtractor(object):
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
@ -1793,7 +1886,6 @@ class InfoExtractor(object):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
return formats
|
||||
|
||||
@ -2015,7 +2107,21 @@ class InfoExtractor(object):
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/rg3/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
@ -2346,6 +2452,8 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
@ -262,6 +262,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@ -580,7 +583,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@ -16,11 +17,13 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -144,7 +147,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count_str = self._search_regex(
|
||||
@ -342,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
'id': 'xv4bw',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://graphql.api.dailymotion.com',
|
||||
playlist_id, 'Downloading page %d' % page,
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
collection(xid: "%s") {
|
||||
videos(first: %d, page: %d) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
xid
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||
}).encode(), headers={
|
||||
'Authorization': authorizaion,
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
})['data']['collection']['videos']
|
||||
for edge in videos['edges']:
|
||||
node = edge['node']
|
||||
yield self.url_result(
|
||||
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
api = self._parse_json(self._search_regex(
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||
webpage, 'player config'), playlist_id)['context']['api']
|
||||
auth = self._download_json(
|
||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||
playlist_id, data=urlencode_postdata({
|
||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||
'grant_type': 'client_credentials',
|
||||
}))
|
||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
self._og_search_title(webpage))
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = set()
|
||||
@ -378,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist_id),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
@ -5,13 +5,16 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# 4x3
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 71.24,
|
||||
'timestamp': 1302172322,
|
||||
'upload_date': '20110407',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# 16x9
|
||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
version = self._download_json(
|
||||
'%s/version.json' % self._BASE_URL, display_id,
|
||||
'Downloading version JSON')
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'DC.identifier', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
|
||||
restapi_base = '%s/%s/restapi' % (
|
||||
self._BASE_URL, version['version_name'])
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
info = self._download_json(
|
||||
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
|
||||
'Downloading video info JSON')
|
||||
|
||||
media = self._download_json(
|
||||
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
|
||||
display_id, 'Downloading media JSON')
|
||||
|
||||
uuid = media['uuid']
|
||||
title = media['title']
|
||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||
|
||||
servers = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||
note='Downloading server list', fatal=False)
|
||||
note='Downloading server list JSON', fatal=False)
|
||||
|
||||
if servers:
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if isinstance(server.get('endpoint'), compat_str) and
|
||||
if url_or_none(server.get('endpoint')) and
|
||||
'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': endpoint,
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||
'ext': 'flv',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
|
||||
default=None), scale=1000)
|
||||
thumbnails = []
|
||||
images = media.get('images')
|
||||
if isinstance(images, list):
|
||||
for image in images:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'id': uuid,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'title': title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description') or media.get('teaser'),
|
||||
'timestamp': unified_timestamp(media.get('created')),
|
||||
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@ -12,6 +11,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
|
@ -21,6 +21,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
@ -310,9 +311,11 @@ class DPlayItIE(InfoExtractor):
|
||||
|
||||
if not info:
|
||||
info_url = self._search_regex(
|
||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
webpage, 'info url')
|
||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||
webpage, 'info url', group='url')
|
||||
|
||||
info_url = urljoin(url, info_url)
|
||||
video_id = info_url.rpartition('/')[-1]
|
||||
|
||||
try:
|
||||
@ -322,6 +325,8 @@ class DPlayItIE(InfoExtractor):
|
||||
'dplayit_token').value,
|
||||
'Referer': url,
|
||||
})
|
||||
if isinstance(info, compat_str):
|
||||
info = self._parse_json(info, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
@ -337,6 +342,7 @@ class DPlayItIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||
|
@ -7,7 +7,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -17,6 +16,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
sub_url = url_or_none(sub.get('url'))
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_url = lesson.get('http_url')
|
||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
||||
lesson_url = url_or_none(lesson.get('http_url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = lesson.get('id')
|
||||
if lesson_id:
|
||||
@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for _, format_url in lesson['media_urls'].items():
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
src = url_or_none(format_dict.get('src'))
|
||||
if not src or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
77
youtube_dl/extractor/expressen.py
Normal file
77
youtube_dl/extractor/expressen.py
Normal file
@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
'info_dict': {
|
||||
'id': '8690962',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
|
||||
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 788,
|
||||
'timestamp': 1526639109,
|
||||
'upload_date': '20180518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def extract_data(name):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, 'info', group='value'),
|
||||
display_id, transform_source=unescapeHTML)
|
||||
|
||||
info = extract_data('video-tracking-info')
|
||||
video_id = info['videoId']
|
||||
|
||||
data = extract_data('article-data')
|
||||
stream = data['stream']
|
||||
|
||||
if determine_ext(stream) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{
|
||||
'url': stream,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info.get('titleRaw') or data['title']
|
||||
description = info.get('descriptionRaw')
|
||||
thumbnail = info.get('socialMediaImage') or data.get('image')
|
||||
duration = int_or_none(info.get('videoTotalSecondsDuration') or
|
||||
data.get('totalSecondsDuration'))
|
||||
timestamp = unified_timestamp(info.get('publishDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@ -335,6 +335,7 @@ from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import (
|
||||
@ -372,7 +373,6 @@ from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
@ -390,6 +390,11 @@ from .francetv import (
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
@ -589,6 +594,10 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
)
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
@ -759,7 +768,9 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@ -849,6 +860,10 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
@ -1036,6 +1051,7 @@ from .stretchinternet import StretchInternetIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
@ -1275,6 +1291,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'title': 're:^Asif Nawab Butt posted a video',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'title': 'md5:1db063d6a8c13faa8da727817339c857',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'title': 'md5:19a428bbde91364e3de815383b54a235',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
@ -353,7 +355,6 @@ class FacebookIE(InfoExtractor):
|
||||
tahoe_data = self._download_webpage(
|
||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'__user': 0,
|
||||
'__a': 1,
|
||||
'__pc': self._search_regex(
|
||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
@ -361,6 +362,9 @@ class FacebookIE(InfoExtractor):
|
||||
'__rev': self._search_regex(
|
||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||
'client revision', default='3944515'),
|
||||
'fb_dtsg': self._search_regex(
|
||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||
webpage, 'dtsg token', default=''),
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
@ -426,6 +430,10 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@ -433,6 +441,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
|
@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# data-video-id
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'timestamp': 1473301045,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'info_dict': {
|
||||
'id': '5748266721001',
|
||||
'ext': 'flv',
|
||||
'title': 'Kyle Kashuv has a positive message for the Trump White House',
|
||||
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 229,
|
||||
'timestamp': 1520594670,
|
||||
'upload_date': '20180309',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
webpage, 'video ID', group='id', default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
|
||||
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
signed_url = url_or_none(self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
}))
|
||||
if signed_url:
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
|
263
youtube_dl/extractor/frontendmasters.py
Normal file
263
youtube_dl/extractor/frontendmasters.py
Normal file
@ -0,0 +1,263 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FrontendMastersBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
|
||||
_LOGIN_URL = 'https://frontendmasters.com/login/'
|
||||
|
||||
_NETRC_MACHINE = 'frontendmasters'
|
||||
|
||||
_QUALITIES = {
|
||||
'low': {'width': 480, 'height': 360},
|
||||
'mid': {'width': 1280, 'height': 720},
|
||||
'high': {'width': 1920, 'height': 1080}
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post_url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if any(p in response for p in (
|
||||
'wp-login.php?action=logout', '>Logout')):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
|
||||
response, 'error message', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||
def _download_course(self, course_name, url):
|
||||
return self._download_json(
|
||||
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
|
||||
'Downloading course JSON', headers={'Referer': url})
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(course):
|
||||
chapters = []
|
||||
lesson_elements = course.get('lessonElements')
|
||||
if isinstance(lesson_elements, list):
|
||||
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_lesson(chapters, lesson_id, lesson):
|
||||
title = lesson.get('title') or lesson_id
|
||||
display_id = lesson.get('slug')
|
||||
description = lesson.get('description')
|
||||
thumbnail = lesson.get('thumbnail')
|
||||
|
||||
chapter_number = None
|
||||
index = lesson.get('index')
|
||||
element_index = lesson.get('elementIndex')
|
||||
if (isinstance(index, int) and isinstance(element_index, int) and
|
||||
index < element_index):
|
||||
chapter_number = element_index - index
|
||||
chapter = (chapters[chapter_number - 1]
|
||||
if chapter_number - 1 < len(chapters) else None)
|
||||
|
||||
duration = None
|
||||
timestamp = lesson.get('timestamp')
|
||||
if isinstance(timestamp, compat_str):
|
||||
mobj = re.search(
|
||||
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
|
||||
timestamp)
|
||||
if mobj:
|
||||
duration = parse_duration(mobj.group('end')) - parse_duration(
|
||||
mobj.group('start'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'frontendmasters:%s' % lesson_id,
|
||||
'ie_key': FrontendMastersIE.ie_key(),
|
||||
'id': lesson_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersIE(FrontendMastersBaseIE):
|
||||
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
|
||||
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'a2qogef6ba',
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}, {
|
||||
'url': 'frontendmasters:a2qogef6ba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lesson_id = self._match_id(url)
|
||||
|
||||
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
|
||||
|
||||
formats = []
|
||||
for ext in ('webm', 'mp4'):
|
||||
for quality in ('low', 'mid', 'high'):
|
||||
resolution = self._QUALITIES[quality].copy()
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
format_url = self._download_json(
|
||||
source_url, lesson_id,
|
||||
'Downloading %s source JSON' % format_id, query={
|
||||
'f': ext,
|
||||
'r': resolution['height'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
}, fatal=False)['url']
|
||||
|
||||
if not format_url:
|
||||
continue
|
||||
|
||||
f = resolution.copy()
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': lesson_id,
|
||||
'title': lesson_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/tools',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'display_id': 'tools',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tools',
|
||||
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'chapter': 'Introduction',
|
||||
'chapter_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
lesson_id, lesson = next(
|
||||
(video_id, data)
|
||||
for video_id, data in course['lessonData'].items()
|
||||
if data.get('slug') == lesson_name)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
return self._extract_lesson(chapters, lesson_id, lesson)
|
||||
|
||||
|
||||
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/',
|
||||
'info_dict': {
|
||||
'id': 'web-development',
|
||||
'title': 'Introduction to Web Development',
|
||||
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
|
||||
},
|
||||
'playlist_count': 81,
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if FrontendMastersLessonIE.suitable(url) else super(
|
||||
FrontendMastersBaseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_name = self._match_id(url)
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
|
||||
lessons = sorted(
|
||||
course['lessonData'].values(), key=lambda data: data['index'])
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_name = lesson.get('slug')
|
||||
if not lesson_name:
|
||||
continue
|
||||
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||
|
||||
title = course.get('title')
|
||||
description = course.get('description')
|
||||
|
||||
return self.playlist_result(entries, course_name, title, description)
|
@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
@ -12,6 +13,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@ -48,19 +62,19 @@ class FunkMixIE(FunkBaseIE):
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
@ -104,25 +118,39 @@ class FunkChannelIE(FunkBaseIE):
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = {
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
|
||||
headers=headers, query={
|
||||
'ids': alias,
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON', headers=headers, query={
|
||||
'size': 100,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
if by_channel_alias:
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if video_list:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers=headers, query={
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
@ -32,6 +32,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@ -111,6 +112,8 @@ from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -1394,17 +1397,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
{
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
@ -2069,6 +2061,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
{
|
||||
# Viqeo embeds
|
||||
'url': 'https://viqeo.tv/',
|
||||
'info_dict': {
|
||||
'id': 'viqeo',
|
||||
'title': 'All-new video platform',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -3076,7 +3077,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
|
||||
|
||||
peertube_urls = PeerTubeIE._extract_urls(webpage)
|
||||
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
|
||||
if peertube_urls:
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
@ -3091,6 +3092,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
apa_urls, video_id, video_title, ie=APAIE.ie_key())
|
||||
|
||||
foxnews_urls = FoxNewsIE._extract_urls(webpage)
|
||||
if foxnews_urls:
|
||||
return self.playlist_from_matches(
|
||||
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
@ -3098,6 +3104,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
viqeo_urls = ViqeoIE._extract_urls(webpage)
|
||||
if viqeo_urls:
|
||||
return self.playlist_from_matches(
|
||||
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@ -3135,8 +3146,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@ -14,8 +15,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class Go90IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||
'info_dict': {
|
||||
@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
|
||||
'upload_date': '20170411',
|
||||
'age_limit': 14,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id,
|
||||
video_id, headers={
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
})
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
||||
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
||||
if 'region unavailable' in message:
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
if video_data.get('requires_drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
@ -8,6 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
m3u8_url = url_or_none(bitrates.get('hls'))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file[2])
|
||||
if not isinstance(cc_lang, compat_str) or not cc_url:
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
qualities,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -61,8 +61,8 @@ class ImdbIE(InfoExtractor):
|
||||
for encoding in video_metadata.get('encodings', []):
|
||||
if not encoding or not isinstance(encoding, dict):
|
||||
continue
|
||||
video_url = encoding.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||
if ext == 'm3u8':
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
|
@ -13,15 +13,17 @@ from ..compat import (
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@ -129,64 +131,65 @@ class ITVIE(InfoExtractor):
|
||||
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env))
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
headers=headers, data=etree.tostring(req_env), fatal=False)
|
||||
if resp_env:
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
||||
hmac = params.get('data-video-hmac')
|
||||
@ -248,8 +251,8 @@ class ITVIE(InfoExtractor):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = sub.get('Href')
|
||||
if isinstance(href, compat_str):
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if href:
|
||||
extract_subtitle(href)
|
||||
if not info.get('duration'):
|
||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||
@ -261,7 +264,17 @@ class ITVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not webpage_info.get('title'):
|
||||
webpage_info['title'] = self._html_search_regex(
|
||||
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or webpage_info['episode']
|
||||
|
||||
return merge_dicts(info, webpage_info)
|
||||
|
||||
|
||||
class ITVBTCCIE(InfoExtractor):
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -108,11 +109,14 @@ class IwaraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_uri = url_or_none(a_format.get('uri'))
|
||||
if not format_uri:
|
||||
continue
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'url': self._proto_relative_url(format_uri, 'https:'),
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
|
@ -18,7 +18,7 @@ class JojIE(InfoExtractor):
|
||||
joj:|
|
||||
https?://media\.joj\.sk/embed/
|
||||
)
|
||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||
(?P<id>[^/?#^]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||
@ -29,16 +29,24 @@ class JojIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 3118,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://media.joj.sk/embed/9i1cxv',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'joj:9i1cxv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
webpage)
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -4,16 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_decrypt_text
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
|
||||
encrypted = False
|
||||
|
||||
def extract_format(format_url, height=None):
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//')):
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
|
@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = url_or_none(subtitle_url)
|
||||
if lang != 'none' and subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
|
@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
form_data = self._hidden_inputs(form_html)
|
||||
form_data.update(extra_form_data)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
response = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
self._check_error(response, ('email', 'password'))
|
||||
raise
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, expected_status=(418, 500, ))
|
||||
|
||||
self._check_error(response, 'ErrorMessage')
|
||||
self._check_error(response, ('email', 'password', 'ErrorMessage'))
|
||||
|
||||
return response, action_url
|
||||
|
||||
|
125
youtube_dl/extractor/markiza.py
Normal file
125
youtube_dl/extractor/markiza.py
Normal file
@ -0,0 +1,125 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MarkizaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
|
||||
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||
'info_dict': {
|
||||
'id': '139078',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oteckovia 109',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2760,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
|
||||
'info_dict': {
|
||||
'id': '85430',
|
||||
'title': 'Televízne noviny',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/84723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/embed/85295',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
|
||||
video_id, query={'id': video_id})
|
||||
|
||||
info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
|
||||
|
||||
if info.get('_type') == 'playlist':
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': try_get(
|
||||
data, lambda x: x['details']['name'], compat_str),
|
||||
})
|
||||
else:
|
||||
info['duration'] = parse_duration(
|
||||
try_get(data, lambda x: x['details']['duration'], compat_str))
|
||||
return info
|
||||
|
||||
|
||||
class MarkizaPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
|
||||
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||
'info_dict': {
|
||||
'id': '139355',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oteckovia 110',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2604,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
# Downloading for some hosts (e.g. dajto, doma) fails with 500
|
||||
# although everything seems to be OK, so considering 500
|
||||
# status code to be expected.
|
||||
url, playlist_id, expected_status=500)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
|
||||
webpage))]
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
@ -3,59 +3,75 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class MediasetIE(InfoExtractor):
|
||||
class MediasetIE(ThePlatformBaseIE):
|
||||
_TP_TLD = 'eu'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
mediaset:|
|
||||
https?://
|
||||
(?:www\.)?video\.mediaset\.it/
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9]+)
|
||||
)(?P<id>[0-9A-Z]{16})
|
||||
'''
|
||||
_TESTS = [{
|
||||
# full episode
|
||||
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
|
||||
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||
'info_dict': {
|
||||
'id': '661824',
|
||||
'id': 'FAFU000000661824',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quarta puntata',
|
||||
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1414,
|
||||
'creator': 'mediaset',
|
||||
'duration': 1414.26,
|
||||
'upload_date': '20161107',
|
||||
'series': 'Hello Goodbye',
|
||||
'categories': ['reality'],
|
||||
'timestamp': 1478532900,
|
||||
'uploader': 'Rete 4',
|
||||
'uploader_id': 'R4',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
|
||||
'md5': '288532f0ad18307705b01e581304cd7b',
|
||||
'info_dict': {
|
||||
'id': 'F309013801000501',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puntata del 25 maggio',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 6565.007,
|
||||
'upload_date': '20180526',
|
||||
'series': 'Matrix',
|
||||
'timestamp': 1527326245,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe simple
|
||||
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mediaset:661824',
|
||||
'url': 'mediaset:FAFU000000665924',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -68,51 +84,54 @@ class MediasetIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_list = self._download_json(
|
||||
'http://cdnsel01.mediaset.net/GetCdn.aspx',
|
||||
video_id, 'Downloading video CDN JSON', query={
|
||||
'streamid': video_id,
|
||||
'format': 'json',
|
||||
})['videoList']
|
||||
guid = self._match_id(url)
|
||||
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||
info = self._extract_theplatform_metadata(tp_path, guid)
|
||||
|
||||
formats = []
|
||||
for format_url in video_list:
|
||||
if '.ism' in format_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': determine_ext(format_url),
|
||||
})
|
||||
subtitles = {}
|
||||
first_e = None
|
||||
for asset_type in ('SD', 'HD'):
|
||||
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
'mbr': 'true',
|
||||
'formats': f,
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
|
||||
except ExtractorError as e:
|
||||
if not first_e:
|
||||
first_e = e
|
||||
break
|
||||
for tp_f in tp_formats:
|
||||
tp_f['quality'] = 1 if asset_type == 'HD' else 0
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if first_e and not formats:
|
||||
raise first_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
mediainfo = self._download_json(
|
||||
'http://plr.video.mediaset.it/html/metainfo.sjson',
|
||||
video_id, 'Downloading video info JSON', query={
|
||||
'id': video_id,
|
||||
})['video']
|
||||
fields = []
|
||||
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
|
||||
fields.extend(templ % repl for repl in repls)
|
||||
feed_data = self._download_json(
|
||||
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
|
||||
guid, fatal=False, query={'fields': ','.join(fields)})
|
||||
if feed_data:
|
||||
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
|
||||
info.update({
|
||||
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
|
||||
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
|
||||
'series': feed_data.get('mediasetprogram$brandTitle'),
|
||||
'uploader': publish_info.get('description'),
|
||||
'uploader_id': publish_info.get('channel'),
|
||||
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
|
||||
})
|
||||
|
||||
title = mediainfo['title']
|
||||
|
||||
creator = try_get(
|
||||
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
|
||||
category = try_get(
|
||||
mediainfo, lambda x: x['brand-info']['category'], compat_str)
|
||||
categories = [category] if category else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': mediainfo.get('short-description'),
|
||||
'thumbnail': mediainfo.get('thumbnail'),
|
||||
'duration': parse_duration(mediainfo.get('duration')),
|
||||
'creator': creator,
|
||||
'upload_date': unified_strdate(mediainfo.get('production-date')),
|
||||
'webpage_url': mediainfo.get('url'),
|
||||
'series': mediainfo.get('brand-value'),
|
||||
'categories': categories,
|
||||
info.update({
|
||||
'id': guid,
|
||||
'formats': formats,
|
||||
}
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
|
||||
|
||||
stream_formats = []
|
||||
for unum, VideoUrl in enumerate(video_urls):
|
||||
video_url = VideoUrl.get('Location')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(VideoUrl.get('Location'))
|
||||
if not video_url:
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
|
@ -1,84 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
smuggle_url,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleBaseIE(InfoExtractor):
|
||||
def _get_player_info(self, url, webpage):
|
||||
player_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
if player_data.get('data-cms-id') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
mmc_url = config['services']['mmc']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
||||
mmc = self._download_json(
|
||||
m_url, video_id, 'Downloading mmc JSON')
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
'id': '57b0dfb9c715da65618b4afa',
|
||||
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
'info_dict': {
|
||||
'id': '57b0de3dc915da14058b4876',
|
||||
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
||||
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
||||
@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
gigya_url = self._search_regex(
|
||||
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
|
||||
webpage, 'gigya', default=None)
|
||||
gigya_sc = self._download_webpage(
|
||||
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
|
||||
video_id, 'Downloading gigya script')
|
||||
|
||||
# Get a appKey/uuid for getting the session key
|
||||
appKey = self._search_regex(
|
||||
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||
gigya_sc, 'appKey')
|
||||
|
||||
session_json = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/session',
|
||||
video_id, 'Downloading session keys', query={
|
||||
'appKey': appKey,
|
||||
'uuid': compat_str(uuid.uuid4()),
|
||||
})
|
||||
|
||||
paths = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
|
||||
video_id, 'Downloading paths JSON',
|
||||
query={'sessionKey': compat_str(session_json['sessionKey'])})
|
||||
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
|
||||
video_id, 'Downloading paths JSON')
|
||||
|
||||
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
||||
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
|
||||
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
|
||||
source = self._download_json(
|
||||
'http://%s%s%s/docs/%s' % (
|
||||
ooyala_s['base_url'], ooyala_s['full_path'],
|
||||
ooyala_s['provider_id'], video_id),
|
||||
'%s://%s%s%s/docs/%s' % (
|
||||
ooyala_s.get('protocol', 'https'), base_url, full_path,
|
||||
ooyala_s.get('provider_id', '104951'), video_id),
|
||||
video_id, 'Downloading data JSON', query={
|
||||
'include_titles': 'Series,Season',
|
||||
'product_name': 'test',
|
||||
'product_name': ooyala_s.get('product_name', 'test'),
|
||||
'format': 'full',
|
||||
})['hits']['hits'][0]['_source']
|
||||
|
||||
|
@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
|
||||
video_url = (self._html_search_regex(
|
||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'video URL', default=None, group='url') or
|
||||
'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Views</strong>\s+([^<]+)<',
|
||||
@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
|
||||
class MotherlessGroupIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/g/movie_scenes',
|
||||
'info_dict': {
|
||||
|
@ -282,7 +282,7 @@ class NPOIE(NPOBaseIE):
|
||||
video_url = stream_info.get('url')
|
||||
if not video_url or video_url in urls:
|
||||
continue
|
||||
urls.add(item_url)
|
||||
urls.add(video_url)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4',
|
||||
|
@ -4,12 +4,18 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
JSON_LD_RE,
|
||||
NO_DEFAULT,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@ -359,6 +365,182 @@ class NRKTVIE(NRKBaseIE):
|
||||
}]
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||
'info_dict': {
|
||||
'id': 'MSUI14000816AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Backstage 8:30',
|
||||
'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
|
||||
'duration': 1320,
|
||||
'series': 'Backstage',
|
||||
'season_number': 1,
|
||||
'episode_number': 8,
|
||||
'episode': '8:30',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nrk_id = self._parse_json(
|
||||
self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
|
||||
display_id)['@id']
|
||||
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
return self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
|
||||
|
||||
|
||||
class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
||||
default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
if not config:
|
||||
return
|
||||
return try_get(config, lambda x: x['series'], dict)
|
||||
|
||||
def _extract_episodes(self, season):
|
||||
entries = []
|
||||
if not isinstance(season, dict):
|
||||
return entries
|
||||
episodes = season.get('episodes')
|
||||
if not isinstance(episodes, list):
|
||||
return entries
|
||||
for episode in episodes:
|
||||
nrk_id = episode.get('prfId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
|
||||
|
||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'title': 'Sesong 1',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
series = self._extract_series(webpage, display_id)
|
||||
|
||||
season = next(
|
||||
s for s in series['seasons']
|
||||
if int(display_id) == s.get('seasonNumber'))
|
||||
|
||||
title = try_get(season, lambda x: x['titles']['title'], compat_str)
|
||||
return self.playlist_result(
|
||||
self._extract_episodes(season), display_id, title)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# new layout
|
||||
'url': 'https://tv.nrk.no/serie/backstage',
|
||||
'info_dict': {
|
||||
'id': 'backstage',
|
||||
'title': 'Backstage',
|
||||
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
# old layout
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
False if any(ie.suitable(url)
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
# New layout (e.g. https://tv.nrk.no/serie/backstage)
|
||||
series = self._extract_series(webpage, series_id, fatal=False)
|
||||
if series:
|
||||
title = try_get(series, lambda x: x['titles']['title'], compat_str)
|
||||
description = try_get(
|
||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||
entries = []
|
||||
for season in series['seasons']:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKTVDirekteIE(NRKTVIE):
|
||||
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
||||
@ -438,64 +620,6 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@ -375,6 +376,35 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
|
||||
'info_dict': {
|
||||
'id': '3007193718',
|
||||
'ext': 'mp4',
|
||||
'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
|
||||
'description': 'md5:37efbac85e0c09b009586523ec143652',
|
||||
'duration': 6292,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
|
||||
'info_dict': {
|
||||
'id': '3011407934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stories from the Stage - Road Trip',
|
||||
'duration': 1619,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
@ -438,6 +468,7 @@ class PBSIE(InfoExtractor):
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
@ -472,7 +503,8 @@ class PBSIE(InfoExtractor):
|
||||
if not url:
|
||||
url = self._og_search_url(webpage)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mobj = re.match(
|
||||
self._VALID_URL, self._proto_relative_url(url.strip()))
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
if not display_id:
|
||||
@ -482,13 +514,27 @@ class PBSIE(InfoExtractor):
|
||||
url, display_id, note='Downloading player page',
|
||||
errnote='Could not download player page')
|
||||
video_id = self._search_regex(
|
||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||
r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
|
||||
default=None)
|
||||
if not video_id:
|
||||
video_info = self._extract_video_data(
|
||||
player_page, 'video data', display_id)
|
||||
video_id = compat_str(
|
||||
video_info.get('id') or video_info['contentID'])
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id, None, description
|
||||
|
||||
def _extract_video_data(self, string, name, video_id, fatal=True):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
string, name, default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||
|
||||
@ -519,11 +565,8 @@ class PBSIE(InfoExtractor):
|
||||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||
display_id, 'Downloading %s page' % page, fatal=False)
|
||||
if player:
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
player, '%s video data' % page, default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
video_info = self._extract_video_data(
|
||||
player, '%s video data' % page, display_id, fatal=False)
|
||||
if video_info:
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -116,12 +117,14 @@ class PeerTubeIE(InfoExtractor):
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
%s
|
||||
/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||
(?P<id>[^/?\#&]+)
|
||||
''' % _INSTANCES_RE
|
||||
(?:
|
||||
peertube:(?P<host>[^:]+):|
|
||||
https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_INSTANCES_RE, _UUID_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||
'md5': '80f24ff364cc9d333529506a263e7feb',
|
||||
@ -157,21 +160,40 @@ class PeerTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
|
||||
% PeerTubeIE._INSTANCES_RE, webpage)]
|
||||
def _extract_peertube_url(webpage, source_url):
|
||||
mobj = re.match(
|
||||
r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
|
||||
% PeerTubeIE._UUID_RE, source_url)
|
||||
if mobj and any(p in webpage for p in (
|
||||
'<title>PeerTube<',
|
||||
'There will be other non JS-based clients to access PeerTube',
|
||||
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
|
||||
return 'peertube:%s:%s' % mobj.group('host', 'id')
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, source_url):
|
||||
entries = re.findall(
|
||||
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
|
||||
% (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
|
||||
if not entries:
|
||||
peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
|
||||
if peertube_url:
|
||||
entries = [peertube_url]
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host') or mobj.group('host_2')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
|
||||
'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
|
||||
|
||||
title = video['name']
|
||||
|
||||
@ -179,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
for file_ in video['files']:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = file_.get('fileUrl')
|
||||
if not file_url or not isinstance(file_url, compat_str):
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
if not file_url:
|
||||
continue
|
||||
file_size = int_or_none(file_.get('size'))
|
||||
format_id = try_get(
|
||||
|
@ -27,6 +27,60 @@ from ..utils import (
|
||||
class PluralsightBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://app.pluralsight.com'
|
||||
|
||||
_GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
|
||||
_GRAPHQL_HEADERS = {
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
}
|
||||
_GRAPHQL_COURSE_TMPL = '''
|
||||
query BootstrapPlayer {
|
||||
rpc {
|
||||
bootstrapPlayer {
|
||||
profile {
|
||||
firstName
|
||||
lastName
|
||||
email
|
||||
username
|
||||
userHandle
|
||||
authed
|
||||
isAuthed
|
||||
plan
|
||||
}
|
||||
course(courseId: "%s") {
|
||||
name
|
||||
title
|
||||
courseHasCaptions
|
||||
translationLanguages {
|
||||
code
|
||||
name
|
||||
}
|
||||
supportsWideScreenVideoFormats
|
||||
timestamp
|
||||
modules {
|
||||
name
|
||||
title
|
||||
duration
|
||||
formattedDuration
|
||||
author
|
||||
authorized
|
||||
clips {
|
||||
authorized
|
||||
clipId
|
||||
duration
|
||||
formattedDuration
|
||||
id
|
||||
index
|
||||
moduleIndex
|
||||
moduleTitle
|
||||
name
|
||||
title
|
||||
watched
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _download_course(self, course_id, url, display_id):
|
||||
try:
|
||||
return self._download_course_rpc(course_id, url, display_id)
|
||||
@ -39,20 +93,14 @@ class PluralsightBaseIE(InfoExtractor):
|
||||
|
||||
def _download_course_rpc(self, course_id, url, display_id):
|
||||
response = self._download_json(
|
||||
'%s/player/functions/rpc' % self._API_BASE, display_id,
|
||||
'Downloading course JSON',
|
||||
data=json.dumps({
|
||||
'fn': 'bootstrapPlayer',
|
||||
'payload': {
|
||||
'courseId': course_id,
|
||||
},
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
self._GRAPHQL_EP, display_id, data=json.dumps({
|
||||
'query': self._GRAPHQL_COURSE_TMPL % course_id,
|
||||
'variables': {}
|
||||
}).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
|
||||
|
||||
course = try_get(response, lambda x: x['payload']['course'], dict)
|
||||
course = try_get(
|
||||
response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
|
||||
dict)
|
||||
if course:
|
||||
return course
|
||||
|
||||
@ -90,6 +138,28 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
GRAPHQL_VIEWCLIP_TMPL = '''
|
||||
query viewClip {
|
||||
viewClip(input: {
|
||||
author: "%(author)s",
|
||||
clipIndex: %(clipIndex)d,
|
||||
courseName: "%(courseName)s",
|
||||
includeCaptions: %(includeCaptions)s,
|
||||
locale: "%(locale)s",
|
||||
mediaType: "%(mediaType)s",
|
||||
moduleName: "%(moduleName)s",
|
||||
quality: "%(quality)s"
|
||||
}) {
|
||||
urls {
|
||||
url
|
||||
cdn
|
||||
rank
|
||||
source
|
||||
},
|
||||
status
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@ -277,7 +347,7 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
f = QUALITIES[quality].copy()
|
||||
clip_post = {
|
||||
'author': author,
|
||||
'includeCaptions': False,
|
||||
'includeCaptions': 'false',
|
||||
'clipIndex': int(clip_idx),
|
||||
'courseName': course_name,
|
||||
'locale': 'en',
|
||||
@ -286,11 +356,23 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
'quality': '%dx%d' % (f['width'], f['height']),
|
||||
}
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
viewclip = self._download_json(
|
||||
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
||||
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
||||
data=json.dumps(clip_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
|
||||
try:
|
||||
viewclip = self._download_json(
|
||||
self._GRAPHQL_EP, display_id,
|
||||
'Downloading %s viewclip graphql' % format_id,
|
||||
data=json.dumps({
|
||||
'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
|
||||
'variables': {}
|
||||
}).encode('utf-8'),
|
||||
headers=self._GRAPHQL_HEADERS)['data']['viewClip']
|
||||
except ExtractorError:
|
||||
# Still works but most likely will go soon
|
||||
viewclip = self._download_json(
|
||||
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
||||
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
||||
data=json.dumps(clip_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
|
||||
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
||||
# to return 429 HTTP errors after some time (see
|
||||
|
@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
|
||||
(r'=\s*({.+?})\s*;\s*v1ar\b',
|
||||
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
|
||||
webpage, 'config', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
} for format_url, height, filesize in re.findall(
|
||||
r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
|
||||
r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
|
||||
webpage)]
|
||||
thumbnail = None
|
||||
duration = None
|
||||
|
@ -4,28 +4,22 @@ from __future__ import unicode_literals
|
||||
import functools
|
||||
import itertools
|
||||
import operator
|
||||
# import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
# compat_urllib_parse_unquote,
|
||||
# compat_urllib_parse_unquote_plus,
|
||||
# compat_urllib_parse_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
# sanitized_Request,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
# from ..aes import (
|
||||
# aes_decrypt_text
|
||||
# )
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
@ -62,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
||||
'id': '1331683002',
|
||||
'ext': 'mp4',
|
||||
'title': '重庆婷婷女王足交',
|
||||
'uploader': 'cj397186295',
|
||||
'uploader': 'Unknown',
|
||||
'duration': 1753,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@ -75,6 +69,31 @@ class PornHubIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
'info_dict': {
|
||||
'id': 'ph5af5fef7c2aa7',
|
||||
'ext': 'mp4',
|
||||
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
||||
'uploader': 'BFFs',
|
||||
'duration': 622,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
"ext": 'srt'
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
@ -121,7 +140,7 @@ class PornHubIE(InfoExtractor):
|
||||
self._set_cookie('pornhub.com', 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id)
|
||||
video_id, 'Downloading %s webpage' % platform)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
@ -134,57 +153,105 @@ class PornHubIE(InfoExtractor):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
title = title or self._html_search_meta(
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, default=None) or self._search_regex(
|
||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
|
||||
video_urls = []
|
||||
video_urls_set = set()
|
||||
subtitles = {}
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
if isinstance(media_definitions, list):
|
||||
for definition in media_definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
if video_url in video_urls_set:
|
||||
continue
|
||||
video_urls_set.add(video_url)
|
||||
video_urls.append(
|
||||
(video_url, int_or_none(definition.get('quality'))))
|
||||
else:
|
||||
title, thumbnail, duration = [None] * 3
|
||||
thumbnail, duration = [None] * 2
|
||||
|
||||
if not video_urls:
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
formats = []
|
||||
for video_url, height in video_urls:
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
@ -210,7 +277,6 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@ -219,10 +285,11 @@ class PornHubIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
# 'formats': formats,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
247
youtube_dl/extractor/puhutv.py
Normal file
247
youtube_dl/extractor/puhutv.py
Normal file
@ -0,0 +1,247 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PuhuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
|
||||
IE_NAME = 'puhutv'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://puhutv.com/sut-kardesler-izle',
|
||||
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
|
||||
'info_dict': {
|
||||
'id': '5085',
|
||||
'display_id': 'sut-kardesler',
|
||||
'ext': 'mp4',
|
||||
'title': 'Süt Kardeşler',
|
||||
'description': 'md5:405fd024df916ca16731114eb18e511a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4832.44,
|
||||
'creator': 'Arzu Film',
|
||||
'timestamp': 1469778212,
|
||||
'upload_date': '20160729',
|
||||
'release_year': 1976,
|
||||
'view_count': int,
|
||||
'tags': ['Aile', 'Komedi', 'Klasikler'],
|
||||
},
|
||||
}, {
|
||||
# episode, geo restricted, bypassable with --geo-verification-proxy
|
||||
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 4k, with subtitles
|
||||
'url': 'https://puhutv.com/dip-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUBTITLE_LANGS = {
|
||||
'English': 'en',
|
||||
'Deutsch': 'de',
|
||||
'عربى': 'ar'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-izle' % display_id),
|
||||
display_id)['data']
|
||||
|
||||
video_id = compat_str(info['id'])
|
||||
title = info.get('name') or info['title']['name']
|
||||
if info.get('display_name'):
|
||||
title = '%s %s' % (title, info.get('display_name'))
|
||||
|
||||
try:
|
||||
videos = self._download_json(
|
||||
'https://puhutv.com/api/assets/%s/videos' % video_id,
|
||||
display_id, 'Downloading video JSON',
|
||||
headers=self.geo_verification_headers())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for video in videos['data']['videos']:
|
||||
media_url = url_or_none(video.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
playlist = video.get('is_playlist')
|
||||
if video.get('stream_type') == 'hls' and playlist is True:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
quality = int_or_none(video.get('quality'))
|
||||
f = {
|
||||
'url': media_url,
|
||||
'ext': 'mp4',
|
||||
'height': quality
|
||||
}
|
||||
video_format = video.get('video_format')
|
||||
if video_format == 'hls' and playlist is False:
|
||||
format_id = 'hls'
|
||||
f['protocol'] = 'm3u8_native'
|
||||
elif video_format == 'mp4':
|
||||
format_id = 'http'
|
||||
|
||||
else:
|
||||
continue
|
||||
if quality:
|
||||
format_id += '-%sp' % quality
|
||||
f['format_id'] = format_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = try_get(
|
||||
info, lambda x: x['title']['description'],
|
||||
compat_str) or info.get('description')
|
||||
timestamp = unified_timestamp(info.get('created_at'))
|
||||
creator = try_get(
|
||||
info, lambda x: x['title']['producer']['name'], compat_str)
|
||||
|
||||
duration = float_or_none(
|
||||
try_get(info, lambda x: x['content']['duration_in_ms'], int),
|
||||
scale=1000)
|
||||
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
|
||||
|
||||
images = try_get(
|
||||
info, lambda x: x['content']['images']['wide'], dict) or {}
|
||||
thumbnails = []
|
||||
for image_id, image_url in images.items():
|
||||
if not isinstance(image_url, compat_str):
|
||||
continue
|
||||
if not image_url.startswith(('http', '//')):
|
||||
image_url = 'https://%s' % image_url
|
||||
t = parse_resolution(image_id)
|
||||
t.update({
|
||||
'id': image_id,
|
||||
'url': image_url
|
||||
})
|
||||
thumbnails.append(t)
|
||||
|
||||
release_year = try_get(info, lambda x: x['title']['released_at'], int)
|
||||
|
||||
season_number = int_or_none(info.get('season_number'))
|
||||
season_id = str_or_none(info.get('season_id'))
|
||||
episode_number = int_or_none(info.get('episode_number'))
|
||||
|
||||
tags = []
|
||||
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
|
||||
if not isinstance(genre, dict):
|
||||
continue
|
||||
genre_name = genre.get('name')
|
||||
if genre_name and isinstance(genre_name, compat_str):
|
||||
tags.append(genre_name)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in try_get(
|
||||
info, lambda x: x['content']['subtitles'], list) or []:
|
||||
if not isinstance(subtitle, dict):
|
||||
continue
|
||||
lang = subtitle.get('language')
|
||||
sub_url = url_or_none(subtitle.get('url'))
|
||||
if not lang or not isinstance(lang, compat_str) or not sub_url:
|
||||
continue
|
||||
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||
'url': sub_url
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'timestamp': timestamp,
|
||||
'creator': creator,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class PuhuTVSerieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
|
||||
IE_NAME = 'puhutv:serie'
|
||||
_TESTS = [{
|
||||
'url': 'https://puhutv.com/deniz-yildizi-detay',
|
||||
'info_dict': {
|
||||
'title': 'Deniz Yıldızı',
|
||||
'id': 'deniz-yildizi',
|
||||
},
|
||||
'playlist_mincount': 205,
|
||||
}, {
|
||||
# a film detail page which is using same url with serie page
|
||||
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_entries(self, seasons):
|
||||
for season in seasons:
|
||||
season_id = season.get('id')
|
||||
if not season_id:
|
||||
continue
|
||||
page = 1
|
||||
has_more = True
|
||||
while has_more is True:
|
||||
season = self._download_json(
|
||||
'https://galadriel.puhutv.com/seasons/%s' % season_id,
|
||||
season_id, 'Downloading page %s' % page, query={
|
||||
'page': page,
|
||||
'per': 40,
|
||||
})
|
||||
episodes = season.get('episodes')
|
||||
if isinstance(episodes, list):
|
||||
for ep in episodes:
|
||||
slug_path = str_or_none(ep.get('slugPath'))
|
||||
if not slug_path:
|
||||
continue
|
||||
video_id = str_or_none(int_or_none(ep.get('id')))
|
||||
yield self.url_result(
|
||||
'https://puhutv.com/%s' % slug_path,
|
||||
ie=PuhuTVIE.ie_key(), video_id=video_id,
|
||||
video_title=ep.get('name') or ep.get('eventLabel'))
|
||||
page += 1
|
||||
has_more = season.get('hasMore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-detay' % playlist_id),
|
||||
playlist_id)['data']
|
||||
|
||||
seasons = info.get('seasons')
|
||||
if seasons:
|
||||
return self.playlist_result(
|
||||
self._extract_entries(seasons), playlist_id, info.get('name'))
|
||||
|
||||
# For films, these are using same url with series
|
||||
video_id = info.get('slug') or info['assets'][0]['slug']
|
||||
return self.url_result(
|
||||
'https://puhutv.com/%s-izle' % video_id,
|
||||
PuhuTVIE.ie_key(), video_id)
|
@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id):
|
||||
if not re.match(r'https?://', relinker_url):
|
||||
return {'formats': [{'url': relinker_url}]}
|
||||
|
||||
formats = []
|
||||
geoprotection = None
|
||||
is_live = None
|
||||
@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_content_id(self, content_id, url):
|
||||
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
|
||||
video_id, fatal=False)
|
||||
if medias and isinstance(medias, list):
|
||||
for media in medias:
|
||||
format_url = media.get('videoUrl')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('quality')
|
||||
formats.append({
|
||||
|
@ -6,6 +6,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
|
||||
title = config['title']
|
||||
formats = []
|
||||
for video in config['src']:
|
||||
src = video.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(video.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
|
@ -1,10 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -14,20 +18,19 @@ class RTBFIE(InfoExtractor):
|
||||
(?:
|
||||
video/[^?]+\?.*\bid=|
|
||||
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
|
||||
auvio/[^/]+\?.*id=
|
||||
auvio/[^/]+\?.*\b(?P<live>l)?id=
|
||||
)(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
||||
'md5': '8c876a1cceeb6cf31b476461ade72384',
|
||||
'info_dict': {
|
||||
'id': '1921274',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les Diables au coeur (épisode 2)',
|
||||
'description': 'Football - Diables Rouges',
|
||||
'duration': 3099,
|
||||
'description': '(du 25/04/2014)',
|
||||
'duration': 3099.54,
|
||||
'upload_date': '20140425',
|
||||
'timestamp': 1398456336,
|
||||
'uploader': 'rtbfsport',
|
||||
'timestamp': 1398456300,
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
@ -39,6 +42,18 @@ class RTBFIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Live
|
||||
'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# With Subtitle
|
||||
'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
|
||||
_PROVIDERS = {
|
||||
@ -53,46 +68,94 @@ class RTBFIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
|
||||
live, media_id = re.match(self._VALID_URL, url).groups()
|
||||
embed_page = self._download_webpage(
|
||||
'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
|
||||
media_id, query={'id': media_id})
|
||||
data = self._parse_json(self._html_search_regex(
|
||||
r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
data = data['data']
|
||||
|
||||
provider = data.get('provider')
|
||||
if provider in self._PROVIDERS:
|
||||
return self.url_result(data['url'], self._PROVIDERS[provider])
|
||||
|
||||
title = data['title']
|
||||
is_live = data.get('isLive')
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
height_re = r'-(\d+)p\.'
|
||||
formats = []
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = data.get(key + 'Url')
|
||||
if format_url:
|
||||
|
||||
m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
|
||||
fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
|
||||
http_url = data.get('url')
|
||||
if formats and http_url and re.search(height_re, http_url):
|
||||
http_url = fix_url(http_url)
|
||||
for m3u8_f in formats[:]:
|
||||
height = m3u8_f.get('height')
|
||||
if not height:
|
||||
continue
|
||||
f = m3u8_f.copy()
|
||||
del f['protocol']
|
||||
f.update({
|
||||
'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
|
||||
'url': re.sub(height_re, '-%dp.' % height, http_url),
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
sources = data.get('sources') or {}
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = sources.get(key)
|
||||
if not format_url:
|
||||
continue
|
||||
height = int_or_none(self._search_regex(
|
||||
height_re, format_url, 'height', default=None))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'url': fix_url(format_url),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
|
||||
if thumbnail_id != 'default':
|
||||
thumbnails.append({
|
||||
'url': self._IMAGE_HOST + thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
})
|
||||
mpd_url = data.get('urlDash')
|
||||
if not data.get('drm') and mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, media_id, mpd_id='dash', fatal=False))
|
||||
|
||||
audio_url = data.get('urlAudio')
|
||||
if audio_url:
|
||||
formats.append({
|
||||
'format_id': 'audio',
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for track in (data.get('tracks') or {}).values():
|
||||
sub_url = track.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('lang') or 'fr', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('subtitle'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': data.get('duration') or data.get('realDuration'),
|
||||
'timestamp': int_or_none(data.get('created')),
|
||||
'view_count': int_or_none(data.get('viewCount')),
|
||||
'uploader': data.get('channel'),
|
||||
'tags': data.get('tags'),
|
||||
'title': title,
|
||||
'description': strip_or_none(data.get('description')),
|
||||
'thumbnail': data.get('thumbnail'),
|
||||
'duration': float_or_none(data.get('realDuration')),
|
||||
'timestamp': int_or_none(data.get('liveFrom')),
|
||||
'series': data.get('programLabel'),
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
break
|
||||
|
||||
for result in results:
|
||||
video_url = result.get('video_url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(result.get('video_url'))
|
||||
if not video_url:
|
||||
continue
|
||||
entry = self._extract_video(result, require_title=False)
|
||||
entry.update({
|
||||
|
@ -19,29 +19,33 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
|
||||
'md5': '42310bffe4ba3982db112b9cd3467328',
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
'info_dict': {
|
||||
'id': '11638450',
|
||||
'id': '12041051',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
|
||||
'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
|
||||
'duration': 39,
|
||||
'series': 'Le meilleur pâtissier',
|
||||
'title': 'Le but qui a marqué l\'histoire du football français !',
|
||||
'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain, video_id = re.search(self._VALID_URL, url).groups()
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
|
||||
video_id, query={
|
||||
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
|
||||
video_id, headers={
|
||||
'x-customer-name': consumer_name
|
||||
}, query={
|
||||
'csa': 5,
|
||||
'with': 'clips',
|
||||
})
|
||||
@ -65,7 +69,14 @@ class SixPlayIE(InfoExtractor):
|
||||
subtitles.setdefault('fr', []).append({'url': asset_url})
|
||||
continue
|
||||
if container == 'm3u8' or ext == 'm3u8':
|
||||
if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
|
||||
if protocol == 'usp':
|
||||
if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
|
||||
urlh = self._request_webpage(
|
||||
asset_url, video_id, fatal=False,
|
||||
headers=self.geo_verification_headers())
|
||||
if not urlh:
|
||||
continue
|
||||
asset_url = urlh.geturl()
|
||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
|
@ -1,12 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||
'md5': '868309628ba00fd488cf516a113fd717',
|
||||
@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
|
||||
'title': 'virginie baisee en cam',
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.*?\.jpg'
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# mobile site
|
||||
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
|
||||
webpage = self._download_webpage(desktop_url, video_id)
|
||||
embed_page = self._download_webpage(
|
||||
'http://www.slutload.com/embed_player/%s' % video_id, video_id,
|
||||
'Downloading embed page', fatal=False)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
if embed_page:
|
||||
def extract(what):
|
||||
return self._html_search_regex(
|
||||
r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
|
||||
embed_page, 'video %s' % what, default=None, group='url')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
video_url = extract('url')
|
||||
if video_url:
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)', embed_page, 'title', default=video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': extract('preview'),
|
||||
'age_limit': 18
|
||||
}
|
||||
|
||||
return {
|
||||
webpage = self._download_webpage(
|
||||
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
}
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
})
|
||||
return info
|
||||
|
@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
compat_str,
|
||||
@ -137,7 +139,12 @@ class SVTPlayBaseIE(SVTBaseIE):
|
||||
|
||||
class SVTPlayIE(SVTPlayBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
svt:(?P<svt_id>[^/?#&]+)|
|
||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
@ -164,10 +171,40 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/kanaler/svt1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:1376446-003A',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:14278044',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _adjust_title(self, info):
|
||||
if info['is_live']:
|
||||
info['title'] = self._live_title(info['title'])
|
||||
|
||||
def _extract_by_video_id(self, video_id, webpage=None):
|
||||
data = self._download_json(
|
||||
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
title = dict_get(info_dict, ('episode', 'series'))
|
||||
if not title and webpage:
|
||||
title = re.sub(
|
||||
r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
|
||||
if not title:
|
||||
title = video_id
|
||||
info_dict['title'] = title
|
||||
self._adjust_title(info_dict)
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, svt_id = mobj.group('id', 'svt_id')
|
||||
|
||||
if svt_id:
|
||||
return self._extract_by_video_id(svt_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -179,10 +216,6 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
def adjust_title(info):
|
||||
if info['is_live']:
|
||||
info['title'] = self._live_title(info['title'])
|
||||
|
||||
if data:
|
||||
video_info = try_get(
|
||||
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
|
||||
@ -193,24 +226,14 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
adjust_title(info_dict)
|
||||
self._adjust_title(info_dict)
|
||||
return info_dict
|
||||
|
||||
video_id = self._search_regex(
|
||||
svt_id = self._search_regex(
|
||||
r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
webpage, 'video id', default=None)
|
||||
webpage, 'video id')
|
||||
|
||||
if video_id:
|
||||
data = self._download_json(
|
||||
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
info_dict['title'] = re.sub(
|
||||
r'\s*\|\s*.+?$', '',
|
||||
info_dict.get('episode') or self._og_search_title(webpage))
|
||||
adjust_title(info_dict)
|
||||
return info_dict
|
||||
return self._extract_by_video_id(svt_id, webpage)
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
@ -292,3 +315,57 @@ class SVTSeriesIE(SVTPlayBaseIE):
|
||||
|
||||
return self.playlist_result(
|
||||
entries, series_id, title, metadata.get('description'))
|
||||
|
||||
|
||||
class SVTPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
|
||||
'info_dict': {
|
||||
'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
|
||||
'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
|
||||
'info_dict': {
|
||||
'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
|
||||
'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# only programTitle
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'data-video-id=["\'](\d+)', webpage))]
|
||||
|
||||
title = strip_or_none(self._og_search_title(webpage, default=None))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@ -7,8 +7,10 @@ from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
||||
'md5': 'b0ce2b05ca215042124fbc9e3886493a',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 853,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
||||
'info_dict': {
|
||||
'id': 'tSVI8ta_P4w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
||||
'upload_date': '20140122',
|
||||
'uploader_id': 'TEDInstitute',
|
||||
'uploader': 'TED Institute',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# missing HTTP bitrates
|
||||
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
|
||||
'info_dict': {
|
||||
'id': '6069',
|
||||
'ext': 'mp4',
|
||||
'title': 'The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
|
||||
'uploader': 'Vishal Sikka',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
||||
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'mp4',
|
||||
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
'duration': 1128,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||
'info_dict': {
|
||||
@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube video
|
||||
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
||||
'add_ie': ['Youtube'],
|
||||
# no nativeDownloads
|
||||
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
|
||||
'info_dict': {
|
||||
'id': 'aFBIPO-P7LM',
|
||||
'id': '1792',
|
||||
'ext': 'mp4',
|
||||
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
||||
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
||||
'uploader': 'TEDx Talks',
|
||||
'uploader_id': 'TEDxTalks',
|
||||
'upload_date': '20111216',
|
||||
'title': 'The orchestra in my mouth',
|
||||
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
||||
'uploader': 'Tom Thum',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
info = self._extract_info(webpage)
|
||||
|
||||
talk_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
||||
dict) or info['talks'][0]
|
||||
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
|
||||
talk_info = data['talks'][0]
|
||||
|
||||
title = talk_info['title'].strip()
|
||||
|
||||
external = talk_info.get('external')
|
||||
if external:
|
||||
service = external['service']
|
||||
self.to_screen('Found video from %s' % service)
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
native_downloads = try_get(
|
||||
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
||||
dict) or talk_info['nativeDownloads']
|
||||
talk_info,
|
||||
(lambda x: x['downloads']['nativeDownloads'],
|
||||
lambda x: x['nativeDownloads']),
|
||||
dict) or {}
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
player_talk = talk_info['player_talks'][0]
|
||||
|
||||
external = player_talk.get('external')
|
||||
if isinstance(external, dict):
|
||||
service = external.get('service')
|
||||
if isinstance(service, compat_str):
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in resources_.items():
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
stream_url = url_or_none(resources.get('stream'))
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
stream_url, video_name, 'mp4', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
if not bitrate:
|
||||
continue
|
||||
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
|
||||
if not self._is_valid_url(
|
||||
bitrate_url, video_name, '%s bitrate' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
||||
'url': bitrate_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
'duration': float_or_none(talk_info.get('duration')),
|
||||
'view_count': int_or_none(data.get('viewed_count')),
|
||||
'comment_count': int_or_none(
|
||||
try_get(data, lambda x: x['comments']['count'])),
|
||||
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, talk_info):
|
||||
|
@ -1,26 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mitele import MiTeleBaseIE
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class TelecincoIE(MiTeleBaseIE):
|
||||
class TelecincoIE(InfoExtractor):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'id': '1876350223',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'duration': 662,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
|
||||
'duration': 662,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title')
|
||||
info = self._get_player_info(url, webpage)
|
||||
article = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
||||
webpage, 'article'), display_id)['article']
|
||||
title = article.get('title')
|
||||
description = clean_html(article.get('leadParagraph'))
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
for p in article.get('body', []):
|
||||
content = p.get('content')
|
||||
if p.get('type') != 'video' or not content:
|
||||
continue
|
||||
entries.append(self._parse_content(content, url))
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')), title, description)
|
||||
content = article['opening']['content']
|
||||
info = self._parse_content(content, url)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['og:description', 'twitter:description'],
|
||||
webpage, 'title', fatal=False),
|
||||
'description': description,
|
||||
})
|
||||
return info
|
||||
|
@ -19,6 +19,7 @@ class TF1IE(InfoExtractor):
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
|
||||
'info_dict': {
|
||||
|
@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
_TP_TLD = 'com'
|
||||
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(
|
||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||
headers=self.geo_verification_headers())
|
||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||
if error_element is not None and error_element.attrib['src'].startswith(
|
||||
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
||||
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
|
||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||
|
||||
smil_formats = self._parse_smil_formats(
|
||||
@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
|
||||
return formats, subtitles
|
||||
|
||||
def _download_theplatform_metadata(self, path, video_id):
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
|
||||
return self._download_json(info_url, video_id)
|
||||
|
||||
def _parse_theplatform_metadata(self, info):
|
||||
@ -308,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
|
||||
_TESTS = [{
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
@ -325,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = track.get('url')
|
||||
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
if not track_url or track_url.endswith('/big'):
|
||||
continue
|
||||
lang = track.get('lang') or track.get('label') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
|
||||
for stream in self._download_json(data_file, video_id):
|
||||
if not isinstance(stream, dict):
|
||||
continue
|
||||
stream_url = stream.get('url')
|
||||
if (stream_url in stream_urls or not stream_url or
|
||||
not isinstance(stream_url, compat_str)):
|
||||
stream_url = url_or_none(stream.get('url'))
|
||||
if stream_url in stream_urls or not stream_url:
|
||||
continue
|
||||
stream_urls.add(stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -19,8 +19,8 @@ class TVNowBaseIE(InfoExtractor):
|
||||
_VIDEO_FIELDS = (
|
||||
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
|
||||
'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
|
||||
'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
|
||||
'format.defaultImage169Logo')
|
||||
'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
|
||||
'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
return self._download_json(
|
||||
@ -31,27 +31,42 @@ class TVNowBaseIE(InfoExtractor):
|
||||
video_id = compat_str(info['id'])
|
||||
title = info['title']
|
||||
|
||||
mpd_url = info['manifest']['dashclear']
|
||||
if not mpd_url:
|
||||
paths = []
|
||||
for manifest_url in (info.get('manifest') or {}).values():
|
||||
if not manifest_url:
|
||||
continue
|
||||
manifest_url = update_url_query(manifest_url, {'filter': ''})
|
||||
path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
|
||||
if path in paths:
|
||||
continue
|
||||
paths.append(path)
|
||||
|
||||
def url_repl(proto, suffix):
|
||||
return re.sub(
|
||||
r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
|
||||
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
|
||||
'.ism/' + suffix, manifest_url))
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
url_repl('dash', '.mpd'), video_id,
|
||||
mpd_id='dash', fatal=False)
|
||||
formats.extend(self._extract_ism_formats(
|
||||
url_repl('hss', 'Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url_repl('hls', '.m3u8'), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
if formats:
|
||||
break
|
||||
else:
|
||||
if info.get('isDrm'):
|
||||
raise ExtractorError(
|
||||
'Video %s is DRM protected' % video_id, expected=True)
|
||||
if info.get('geoblocked'):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available from your location due to geo restriction' % video_id,
|
||||
expected=True)
|
||||
raise self.raise_geo_restricted()
|
||||
if not info.get('free', True):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
|
||||
mpd_url = update_url_query(mpd_url, {'filter': ''})
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(self._extract_ism_formats(
|
||||
mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = info.get('articleLong') or info.get('articleShort')
|
||||
@ -88,7 +103,7 @@ class TVNowBaseIE(InfoExtractor):
|
||||
class TVNowIE(TVNowBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
|
||||
(?P<show_id>[^/]+)/
|
||||
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
@ -140,11 +155,13 @@ class TVNowIE(TVNowBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = '%s/%s' % mobj.group(2, 3)
|
||||
|
||||
info = self._call_api(
|
||||
'movies/' + display_id, display_id, query={
|
||||
'fields': ','.join(self._VIDEO_FIELDS),
|
||||
'station': mobj.group(1),
|
||||
})
|
||||
|
||||
return self._extract_video(info, display_id)
|
||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -255,7 +256,8 @@ class TVPlayIE(InfoExtractor):
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams.get('streams', {}).items():
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
|
@ -27,6 +27,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -663,8 +664,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
for option in status['quality_options']:
|
||||
if not isinstance(option, dict):
|
||||
continue
|
||||
source = option.get('source')
|
||||
if not source or not isinstance(source, compat_str):
|
||||
source = url_or_none(option.get('source'))
|
||||
if not source:
|
||||
continue
|
||||
formats.append({
|
||||
'url': source,
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
|
||||
if not isinstance(source_list, list):
|
||||
return
|
||||
for source in source_list:
|
||||
video_url = source.get('file') or source.get('src')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(source.get('file') or source.get('src'))
|
||||
if not video_url:
|
||||
continue
|
||||
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
src = track.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(track.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
lang = track.get('language') or track.get(
|
||||
'srclang') or track.get('label')
|
||||
@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
|
||||
for cc in captions:
|
||||
if not isinstance(cc, dict):
|
||||
continue
|
||||
cc_url = cc.get('url')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc.get('url'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||
|
@ -24,6 +24,7 @@ class VGTVIE(XstreamIE):
|
||||
'aftenposten.no/webtv': 'aptv',
|
||||
'ap.vgtv.no/webtv': 'aptv',
|
||||
'tv.aftonbladet.se/abtv': 'abtv',
|
||||
'www.aftonbladet.se/tv': 'abtv',
|
||||
}
|
||||
|
||||
_APP_NAME_TO_VENDOR = {
|
||||
@ -44,7 +45,7 @@ class VGTVIE(XstreamIE):
|
||||
(?:
|
||||
(?:\#!/)?(?:video|live)/|
|
||||
embed?.*id=|
|
||||
articles/
|
||||
a(?:rticles)?/
|
||||
)|
|
||||
(?P<appname>
|
||||
%s
|
||||
@ -143,6 +144,10 @@ class VGTVIE(XstreamIE):
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.aftonbladet.se/tv/a/36015',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'abtv:140026',
|
||||
'only_matching': True,
|
||||
@ -178,13 +183,15 @@ class VGTVIE(XstreamIE):
|
||||
|
||||
streams = data['streamUrls']
|
||||
stream_type = data.get('streamType')
|
||||
|
||||
is_live = stream_type == 'live'
|
||||
formats = []
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
hls_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
@ -229,13 +236,13 @@ class VGTVIE(XstreamIE):
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
|
||||
'title': self._live_title(data['title']) if is_live else data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
|
||||
'timestamp': data['published'],
|
||||
'duration': float_or_none(data['duration'], 1000),
|
||||
'view_count': data['displays'],
|
||||
'is_live': True if stream_type == 'live' else False,
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
||||
|
@ -3,15 +3,13 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for f in video.get('formats', []):
|
||||
format_url = f.get('uri')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(f.get('uri'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_type = f.get('type')
|
||||
if format_type == 'dash':
|
||||
|
@ -54,7 +54,8 @@ class VidziIE(InfoExtractor):
|
||||
self._search_regex(
|
||||
r'setup\(([^)]+)\)', code, 'jwplayer data',
|
||||
default=NO_DEFAULT if num == len(codes) else '{}'),
|
||||
video_id, transform_source=js_to_json)
|
||||
video_id, transform_source=lambda s: js_to_json(
|
||||
re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
|
||||
if jwplayer_data:
|
||||
break
|
||||
|
||||
|
@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# We try to find out to which variable is assigned the config dic
|
||||
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
|
||||
if m_variable_name is not None:
|
||||
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
||||
config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
|
||||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
|
99
youtube_dl/extractor/viqeo.py
Normal file
99
youtube_dl/extractor/viqeo.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ViqeoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
viqeo:|
|
||||
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
|
||||
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
|
||||
'md5': 'a169dd1a6426b350dca4296226f21e76',
|
||||
'info_dict': {
|
||||
'id': 'cde96f09d25f39bee837',
|
||||
'ext': 'mp4',
|
||||
'title': 'cde96f09d25f39bee837',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 76,
|
||||
},
|
||||
}, {
|
||||
'url': 'viqeo:cde96f09d25f39bee837',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for media_file in data['mediaFiles']:
|
||||
if not isinstance(media_file, dict):
|
||||
continue
|
||||
media_url = url_or_none(media_file.get('url'))
|
||||
if not media_url or not media_url.startswith(('http', '//')):
|
||||
continue
|
||||
media_type = str_or_none(media_file.get('type'))
|
||||
if not media_type:
|
||||
continue
|
||||
media_kind = media_type.split('/')[0].lower()
|
||||
f = {
|
||||
'url': media_url,
|
||||
'width': int_or_none(media_file.get('width')),
|
||||
'height': int_or_none(media_file.get('height')),
|
||||
}
|
||||
format_id = str_or_none(media_file.get('quality'))
|
||||
if media_kind == 'image':
|
||||
f['id'] = format_id
|
||||
thumbnails.append(f)
|
||||
elif media_kind in ('video', 'audio'):
|
||||
is_audio = media_kind == 'audio'
|
||||
f.update({
|
||||
'format_id': 'audio' if is_audio else format_id,
|
||||
'fps': int_or_none(media_file.get('fps')),
|
||||
'vcodec': 'none' if is_audio else None,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(data.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
|
||||
'skip': 'Geo-restricted to Hong Kong',
|
||||
}]
|
||||
|
||||
_AREA_ID = {
|
||||
'HK': 1,
|
||||
'SG': 2,
|
||||
'TH': 4,
|
||||
'PH': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
query = {
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
}
|
||||
|
||||
area_id = self._AREA_ID.get(country_code.upper())
|
||||
if area_id:
|
||||
query['area_id'] = area_id
|
||||
|
||||
product_data = self._download_json(
|
||||
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
||||
'Downloading video info', query={
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
})['data']
|
||||
'Downloading video info', query=query)['data']
|
||||
|
||||
video_data = product_data.get('current_product')
|
||||
if not video_data:
|
||||
@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query={
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
'Origin': re.search(r'https?://[^/]+', url).group(0),
|
||||
})['data']['stream']
|
||||
|
||||
stream_sizes = stream_data.get('size', {})
|
||||
|
@ -17,9 +17,11 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
@ -105,10 +107,10 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'ProtivoGunz - Хуёвая песня',
|
||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||
'uploader_id': '-77521',
|
||||
'duration': 195,
|
||||
'timestamp': 1329060660,
|
||||
'timestamp': 1329049880,
|
||||
'upload_date': '20120212',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -117,12 +119,12 @@ class VKIE(VKBaseIE):
|
||||
'info_dict': {
|
||||
'id': '165548505',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Tom Cruise',
|
||||
'title': 'No name',
|
||||
'uploader': 'Tom Cruise',
|
||||
'uploader_id': '205387401',
|
||||
'duration': 9,
|
||||
'timestamp': 1374374880,
|
||||
'upload_date': '20130721',
|
||||
'view_count': int,
|
||||
'timestamp': 1374364108,
|
||||
'upload_date': '20130720',
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -206,10 +208,10 @@ class VKIE(VKBaseIE):
|
||||
'id': 'V3K4mi0SYkc',
|
||||
'ext': 'webm',
|
||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||
'description': 'md5:d9903938abdc74c738af77f527ca0596',
|
||||
'duration': 178,
|
||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||
'duration': 179,
|
||||
'upload_date': '20130116',
|
||||
'uploader': "Children's Joy Foundation",
|
||||
'uploader': "Children's Joy Foundation Inc.",
|
||||
'uploader_id': 'thecjf',
|
||||
'view_count': int,
|
||||
},
|
||||
@ -221,6 +223,7 @@ class VKIE(VKBaseIE):
|
||||
'id': 'k3lz2cmXyRuJQSjGHUv',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
|
||||
# TODO: fix test by fixing dailymotion description extraction
|
||||
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
|
||||
'uploader': 'AniLibria.Tv',
|
||||
'upload_date': '20160914',
|
||||
@ -240,9 +243,12 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'S-Dance, репетиции к The way show',
|
||||
'uploader': 'THE WAY SHOW | 17 апреля',
|
||||
'timestamp': 1454870100,
|
||||
'uploader_id': '-110305615',
|
||||
'timestamp': 1454859345,
|
||||
'upload_date': '20160207',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -295,7 +301,7 @@ class VKIE(VKBaseIE):
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
list_id = mobj.group('list_id')
|
||||
if list_id:
|
||||
@ -345,6 +351,9 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>This video is no longer available, because its author has been blocked.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
|
||||
r'<!>This video is no longer available, because it has been deleted.':
|
||||
'Video %s is no longer available, because it has been deleted.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
@ -393,7 +402,8 @@ class VKIE(VKBaseIE):
|
||||
if not data:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
|
||||
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
|
||||
info_page, 'json', default='{}'),
|
||||
video_id)
|
||||
if data:
|
||||
data = data['player']['params'][0]
|
||||
@ -415,7 +425,7 @@ class VKIE(VKBaseIE):
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
|
||||
'upload date', fatal=False))
|
||||
'upload date', default=None)) or int_or_none(data.get('date'))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
|
||||
@ -423,7 +433,8 @@ class VKIE(VKBaseIE):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data.items():
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
continue
|
||||
if (format_id.startswith(('url', 'cache')) or
|
||||
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
|
||||
@ -452,9 +463,12 @@ class VKIE(VKBaseIE):
|
||||
'title': title,
|
||||
'thumbnail': data.get('jpg'),
|
||||
'uploader': data.get('md_author'),
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'duration': data.get('duration'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': int_or_none(data.get('liked')),
|
||||
'dislike_count': int_or_none(data.get('nolikes')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,7 @@ class VLiveIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
'https://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
||||
VIDEO_PARAMS_FIELD = 'video params'
|
||||
@ -108,11 +108,11 @@ class VLiveIE(InfoExtractor):
|
||||
|
||||
def _live(self, video_id, webpage):
|
||||
init_page = self._download_webpage(
|
||||
'http://www.vlive.tv/video/init/view',
|
||||
'https://www.vlive.tv/video/init/view',
|
||||
video_id, note='Downloading live webpage',
|
||||
data=urlencode_postdata({'videoSeq': video_id}),
|
||||
headers={
|
||||
'Referer': 'http://www.vlive.tv/video/%s' % video_id,
|
||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
|
@ -19,7 +19,6 @@ class WatIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
|
||||
'info_dict': {
|
||||
'id': '11713067',
|
||||
'ext': 'mp4',
|
||||
@ -28,10 +27,15 @@ class WatIE(InfoExtractor):
|
||||
'upload_date': '20140819',
|
||||
'duration': 120,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
|
||||
'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
|
||||
'md5': 'b16574df2c3cd1a36ca0098f2a791925',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'ext': 'mp4',
|
||||
@ -98,38 +102,25 @@ class WatIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
try:
|
||||
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
m3u8_url = remove_bitrate_limit(m3u8_url)
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
for m3u8_alt_url in alt_urls(m3u8_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_alt_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
format_id = m3u8_format['format_id'].replace('hls', 'http')
|
||||
fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': fmt_url,
|
||||
'format_id': format_id,
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
|
||||
mpd_url), video_id, mpd_id='dash', fatal=False))
|
||||
mpd_url = remove_bitrate_limit(mpd_url)
|
||||
for mpd_alt_url in alt_urls(mpd_url):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
except ExtractorError:
|
||||
abr = 64
|
||||
|
@ -67,11 +67,12 @@ class WatchBoxIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source = self._parse_json(
|
||||
source = (self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
video_id, transform_source=js_to_json,
|
||||
fatal=False) or {}).get('source') or {}
|
||||
|
||||
video_id = compat_str(source.get('videoId') or video_id)
|
||||
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
|
||||
else:
|
||||
format_url = format_item
|
||||
filesize = None
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, quality),
|
||||
@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
for format_id, format_url in sources.items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_url in format_urls:
|
||||
continue
|
||||
|
@ -4,12 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
|
||||
formats = []
|
||||
for format_id in QUALITIES:
|
||||
is_hd = format_id == 'hd'
|
||||
format_url = playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else ''))
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else '')))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user