mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-11 07:17:21 +08:00
Merge remote-tracking branch 'upstream/master' into myversion
This commit is contained in:
commit
9cc3f3ced3
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.25*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.25**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.06.25
|
[debug] youtube-dl version 2018.07.21
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
7
AUTHORS
7
AUTHORS
@ -239,3 +239,10 @@ Martin Weinelt
|
|||||||
Surya Oktafendri
|
Surya Oktafendri
|
||||||
TingPing
|
TingPing
|
||||||
Alexandre Macabies
|
Alexandre Macabies
|
||||||
|
Bastian de Groot
|
||||||
|
Niklas Haas
|
||||||
|
András Veres-Szentkirályi
|
||||||
|
Enes Solak
|
||||||
|
Nathan Rossi
|
||||||
|
Thomas van der Berg
|
||||||
|
Luca Cherubin
|
||||||
|
61
ChangeLog
61
ChangeLog
@ -1,3 +1,64 @@
|
|||||||
|
version 2018.07.21
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce url_or_none
|
||||||
|
* [utils] Allow JSONP without function name (#17028)
|
||||||
|
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||||
|
* [iwara] Fix download URLs (#17026)
|
||||||
|
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||||
|
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||||
|
+ [vimeo] Add another config regular expression (#17013)
|
||||||
|
+ [facebook] Extract view count (#16942)
|
||||||
|
* [dailymotion] Improve description extraction (#16984)
|
||||||
|
* [slutload] Fix and improve extraction (#17001)
|
||||||
|
* [mediaset] Fix extraction (#16977)
|
||||||
|
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||||
|
* [imgur] Relax URL regular expression (#16987)
|
||||||
|
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||||
|
#16959)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.07.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Share JSON-LD regular expression
|
||||||
|
* [downloader/dash] Improve error handling (#16927)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [nrktv] Add support for new season and serie URL schema
|
||||||
|
+ [nrktv] Add support for new episode URL schema (#16909)
|
||||||
|
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
|
||||||
|
* [funk] Fix extraction (#16918)
|
||||||
|
* [watchbox] Fix extraction (#16904)
|
||||||
|
* [dplayit] Sort formats
|
||||||
|
* [dplayit] Fix extraction (#16901)
|
||||||
|
* [youtube] Improve login error handling (#13822)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.07.04
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Properly escape % in MPD templates (#16867)
|
||||||
|
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
|
||||||
|
* Prefer ffmpeg over avconv by default (#8622)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
|
||||||
|
* [lynda] Simplify login and improve error capturing (#16891)
|
||||||
|
+ [go90] Add support for embed URLs (#16873)
|
||||||
|
* [go90] Detect geo restriction error and pass geo verification headers
|
||||||
|
(#16874)
|
||||||
|
* [vlive] Fix live streams extraction (#16871)
|
||||||
|
* [npo] Fix typo (#16872)
|
||||||
|
+ [mediaset] Add support for new videos and extract all formats (#16568)
|
||||||
|
* [dctptv] Restore extraction based on REST API (#16850)
|
||||||
|
* [svt] Improve extraction and add support for pages (#16802)
|
||||||
|
* [porncom] Fix extraction (#16808)
|
||||||
|
|
||||||
|
|
||||||
version 2018.06.25
|
version 2018.06.25
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
10
README.md
10
README.md
@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
|||||||
|
|
||||||
# INSTALLATION
|
# INSTALLATION
|
||||||
|
|
||||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||||
|
|
||||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
@ -35,7 +35,7 @@ You can also use pip:
|
|||||||
|
|
||||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||||
|
|
||||||
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||||
|
|
||||||
brew install youtube-dl
|
brew install youtube-dl
|
||||||
|
|
||||||
@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
default; fix file if we can, warn
|
default; fix file if we can, warn
|
||||||
otherwise)
|
otherwise)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||||
postprocessors (default)
|
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
|
||||||
postprocessors
|
postprocessors
|
||||||
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
|
postprocessors (default)
|
||||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||||
either the path to the binary or its
|
either the path to the binary or its
|
||||||
containing directory.
|
containing directory.
|
||||||
@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||||
|
|
||||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||||
```
|
```
|
||||||
|
@ -302,6 +302,9 @@
|
|||||||
- **Freesound**
|
- **Freesound**
|
||||||
- **freespeech.org**
|
- **freespeech.org**
|
||||||
- **FreshLive**
|
- **FreshLive**
|
||||||
|
- **FrontendMasters**
|
||||||
|
- **FrontendMastersCourse**
|
||||||
|
- **FrontendMastersLesson**
|
||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **FunkChannel**
|
- **FunkChannel**
|
||||||
- **FunkMix**
|
- **FunkMix**
|
||||||
@ -589,7 +592,9 @@
|
|||||||
- **NRKSkole**: NRK Skole
|
- **NRKSkole**: NRK Skole
|
||||||
- **NRKTV**: NRK TV and NRK Radio
|
- **NRKTV**: NRK TV and NRK Radio
|
||||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||||
|
- **NRKTVEpisode**
|
||||||
- **NRKTVEpisodes**
|
- **NRKTVEpisodes**
|
||||||
|
- **NRKTVSeason**
|
||||||
- **NRKTVSeries**
|
- **NRKTVSeries**
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
@ -813,6 +818,7 @@
|
|||||||
- **StretchInternet**
|
- **StretchInternet**
|
||||||
- **SunPorno**
|
- **SunPorno**
|
||||||
- **SVT**
|
- **SVT**
|
||||||
|
- **SVTPage**
|
||||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||||
- **SVTSeries**
|
- **SVTSeries**
|
||||||
- **SWRMediathek**
|
- **SWRMediathek**
|
||||||
|
@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
|||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
base_url,
|
base_url,
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||||
|
|
||||||
|
def test_url_or_none(self):
|
||||||
|
self.assertEqual(url_or_none(None), None)
|
||||||
|
self.assertEqual(url_or_none(''), None)
|
||||||
|
self.assertEqual(url_or_none('foo'), None)
|
||||||
|
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||||
|
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||||
|
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||||
|
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||||
|
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||||
|
|
||||||
def test_parse_age_limit(self):
|
def test_parse_age_limit(self):
|
||||||
self.assertEqual(parse_age_limit(None), None)
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
self.assertEqual(parse_age_limit(False), None)
|
self.assertEqual(parse_age_limit(False), None)
|
||||||
@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, {'status': 'success'})
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
|
@ -2,7 +2,10 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
from ..utils import urljoin
|
from ..utils import (
|
||||||
|
DownloadError,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DashSegmentsFD(FragmentFD):
|
class DashSegmentsFD(FragmentFD):
|
||||||
@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
count += 1
|
count += 1
|
||||||
if count <= fragment_retries:
|
if count <= fragment_retries:
|
||||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||||
|
except DownloadError:
|
||||||
|
# Don't retry fragment if error occurred during HTTP downloading
|
||||||
|
# itself since it has own retry settings
|
||||||
|
if not fatal:
|
||||||
|
self.report_skip_fragment(frag_index)
|
||||||
|
break
|
||||||
|
raise
|
||||||
|
|
||||||
if count > fragment_retries:
|
if count > fragment_retries:
|
||||||
if not fatal:
|
if not fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
self.report_skip_fragment(frag_index)
|
||||||
|
@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
|||||||
if not video_id:
|
if not video_id:
|
||||||
entries = []
|
entries = []
|
||||||
for episode in video_data.get('archiveEpisodes', []):
|
for episode in video_data.get('archiveEpisodes', []):
|
||||||
episode_url = episode.get('url')
|
episode_url = url_or_none(episode.get('url'))
|
||||||
if not episode_url:
|
if not episode_url:
|
||||||
continue
|
continue
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
file_elements = video_element.findall(compat_xpath('./file'))
|
file_elements = video_element.findall(compat_xpath('./file'))
|
||||||
one = len(file_elements) == 1
|
one = len(file_elements) == 1
|
||||||
for file_num, file_element in enumerate(file_elements, start=1):
|
for file_num, file_element in enumerate(file_elements, start=1):
|
||||||
file_url = file_element.text
|
file_url = url_or_none(file_element.text)
|
||||||
if not file_url:
|
if not file_url:
|
||||||
continue
|
continue
|
||||||
key = file_element.get('key', '')
|
key = file_element.get('key', '')
|
||||||
|
@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
mimetype2ext,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_iso8601,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
|||||||
media_thumbnail = [media_thumbnail]
|
media_thumbnail = [media_thumbnail]
|
||||||
for thumbnail_data in media_thumbnail:
|
for thumbnail_data in media_thumbnail:
|
||||||
thumbnail = thumbnail_data.get('@attributes', {})
|
thumbnail = thumbnail_data.get('@attributes', {})
|
||||||
thumbnail_url = thumbnail.get('url')
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
if not thumbnail_url:
|
if not thumbnail_url:
|
||||||
continue
|
continue
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
|||||||
media_subtitle = [media_subtitle]
|
media_subtitle = [media_subtitle]
|
||||||
for subtitle_data in media_subtitle:
|
for subtitle_data in media_subtitle:
|
||||||
subtitle = subtitle_data.get('@attributes', {})
|
subtitle = subtitle_data.get('@attributes', {})
|
||||||
subtitle_href = subtitle.get('href')
|
subtitle_href = url_or_none(subtitle.get('href'))
|
||||||
if not subtitle_href:
|
if not subtitle_href:
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||||
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
|||||||
media_content = [media_content]
|
media_content = [media_content]
|
||||||
for media_data in media_content:
|
for media_data in media_content:
|
||||||
media = media_data.get('@attributes', {})
|
media = media_data.get('@attributes', {})
|
||||||
media_url = media.get('url')
|
media_url = url_or_none(media.get('url'))
|
||||||
if not media_url:
|
if not media_url:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||||
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||||
'url': media['url'],
|
'url': media_url,
|
||||||
'tbr': int_or_none(media.get('bitrate')),
|
'tbr': int_or_none(media.get('bitrate')),
|
||||||
'filesize': int_or_none(media.get('fileSize')),
|
'filesize': int_or_none(media.get('fileSize')),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
}, fatal=False)
|
}, fatal=False)
|
||||||
if not playlist:
|
if not playlist:
|
||||||
continue
|
continue
|
||||||
stream_url = playlist.get('streamurl')
|
stream_url = url_or_none(playlist.get('streamurl'))
|
||||||
if stream_url:
|
if stream_url:
|
||||||
rtmp = re.search(
|
rtmp = re.search(
|
||||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
for rendition in video_data.get('renditions', []):
|
for rendition in video_data.get('renditions', []):
|
||||||
video_url = rendition.get('url')
|
video_url = url_or_none(rendition.get('url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
ext = rendition.get('format')
|
ext = rendition.get('format')
|
||||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
|
|||||||
for source in sources:
|
for source in sources:
|
||||||
if not isinstance(source, dict):
|
if not isinstance(source, dict):
|
||||||
continue
|
continue
|
||||||
source_url = source.get('file')
|
source_url = url_or_none(source.get('file'))
|
||||||
if not source_url or not isinstance(source_url, compat_str):
|
if not source_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(source_url)
|
ext = determine_ext(source_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -5,6 +5,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for item in file_list[0]:
|
for item in file_list[0]:
|
||||||
file_url = item.get('file')
|
file_url = url_or_none(item.get('file'))
|
||||||
if not file_url:
|
if not file_url:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(item.get('type'))
|
ext = mimetype2ext(item.get('type'))
|
||||||
|
@ -5,7 +5,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -15,6 +14,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
quality = stream.get('_quality')
|
quality = stream.get('_quality')
|
||||||
server = stream.get('_server')
|
server = stream.get('_server')
|
||||||
for stream_url in stream_urls:
|
for stream_url in stream_urls:
|
||||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
if not url_or_none(stream_url):
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(stream_url)
|
ext = determine_ext(stream_url)
|
||||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
|
|||||||
fatal=False)
|
fatal=False)
|
||||||
if not stat:
|
if not stat:
|
||||||
continue
|
continue
|
||||||
retry_url = stat.get('retry_url')
|
retry_url = url_or_none(stat.get('retry_url'))
|
||||||
if not isinstance(retry_url, compat_str):
|
if not retry_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||||
@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in show['audio_stream'].items():
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
if not isinstance(format_url, compat_str):
|
if not url_or_none(format_url):
|
||||||
continue
|
continue
|
||||||
for known_ext in KNOWN_EXTENSIONS:
|
for known_ext in KNOWN_EXTENSIONS:
|
||||||
if known_ext in format_id:
|
if known_ext in format_id:
|
||||||
|
@ -778,6 +778,17 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# window.__PRELOADED_STATE__
|
||||||
|
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b0b9z4vz',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||||
|
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||||
|
'uploader': 'Radio 3',
|
||||||
|
'uploader_id': 'bbc_radio_three',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1000,6 +1011,36 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
preload_state = self._parse_json(self._search_regex(
|
||||||
|
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
|
'preload state', default='{}'), playlist_id, fatal=False)
|
||||||
|
if preload_state:
|
||||||
|
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||||
|
programme_id = current_programme.get('id')
|
||||||
|
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||||
|
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
synopses = current_programme.get('synopses') or {}
|
||||||
|
network = current_programme.get('network') or {}
|
||||||
|
duration = int_or_none(
|
||||||
|
current_programme.get('duration', {}).get('value'))
|
||||||
|
thumbnail = None
|
||||||
|
image_url = current_programme.get('image_url')
|
||||||
|
if image_url:
|
||||||
|
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': network.get('short_title'),
|
||||||
|
'uploader_id': network.get('id'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
bbc3_config = self._parse_json(
|
bbc3_config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
|
@ -4,8 +4,10 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_str
|
from ..utils import (
|
||||||
from ..utils import int_or_none
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BreakIE(InfoExtractor):
|
class BreakIE(InfoExtractor):
|
||||||
@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video in content:
|
for video in content:
|
||||||
video_url = video.get('url')
|
video_url = url_or_none(video.get('url'))
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
bitrate = int_or_none(self._search_regex(
|
bitrate = int_or_none(self._search_regex(
|
||||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
|
|||||||
for media in encodings:
|
for media in encodings:
|
||||||
if not isinstance(media, dict):
|
if not isinstance(media, dict):
|
||||||
continue
|
continue
|
||||||
media_url = media.get('location')
|
media_url = url_or_none(media.get('location'))
|
||||||
if not media_url or not isinstance(media_url, compat_str):
|
if not media_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
format_id_list = [format_id]
|
format_id_list = [format_id]
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
|||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
info = self._search_json_ld(webpage, display_id, default={})
|
||||||
|
|
||||||
|
# title is optional here since it may be extracted by extractor
|
||||||
|
# that is delegated from here
|
||||||
|
title = strip_or_none(self._html_search_regex(
|
||||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||||
webpage, 'title').strip()
|
webpage, 'title', default=None))
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||||
@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
|||||||
# the first one
|
# the first one
|
||||||
video_id = list(video.values())[0].get('videoid')
|
video_id = list(video.values())[0].get('videoid')
|
||||||
|
|
||||||
return {
|
return merge_dicts(info, {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||||
'ie_key': CanvasIE.ie_key(),
|
'ie_key': CanvasIE.ie_key(),
|
||||||
@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
|||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'release_date': release_date,
|
'release_date': release_date,
|
||||||
}
|
})
|
||||||
|
@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
|||||||
media_url = media['media']['url']
|
media_url = media['media']['url']
|
||||||
if isinstance(media_url, list):
|
if isinstance(media_url, list):
|
||||||
for format_ in media_url:
|
for format_ in media_url:
|
||||||
format_url = format_.get('file')
|
format_url = url_or_none(format_.get('file'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
label = format_.get('label')
|
label = format_.get('label')
|
||||||
f = parse_resolution(label)
|
f = parse_resolution(label)
|
||||||
|
@ -52,6 +52,7 @@ from ..utils import (
|
|||||||
GeoUtils,
|
GeoUtils,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
JSON_LD_RE,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
@ -1149,8 +1150,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_ld = self._search_regex(
|
json_ld = self._search_regex(
|
||||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
|
||||||
default = kwargs.get('default', NO_DEFAULT)
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
return default if default is not NO_DEFAULT else {}
|
return default if default is not NO_DEFAULT else {}
|
||||||
@ -1859,9 +1859,7 @@ class InfoExtractor(object):
|
|||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
continue
|
elif src_ext == 'f4m':
|
||||||
|
|
||||||
if src_ext == 'f4m':
|
|
||||||
f4m_url = src_url
|
f4m_url = src_url
|
||||||
if not f4m_params:
|
if not f4m_params:
|
||||||
f4m_params = {
|
f4m_params = {
|
||||||
@ -1871,9 +1869,13 @@ class InfoExtractor(object):
|
|||||||
f4m_url += '&' if '?' in f4m_url else '?'
|
f4m_url += '&' if '?' in f4m_url else '?'
|
||||||
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
||||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||||
continue
|
elif src_ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
if src_url.startswith('http') and self._is_valid_url(src, video_id):
|
src_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
src_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||||
http_count += 1
|
http_count += 1
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': src_url,
|
'url': src_url,
|
||||||
@ -1884,7 +1886,6 @@ class InfoExtractor(object):
|
|||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
continue
|
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_HTTPError
|
||||||
compat_str,
|
|
||||||
compat_HTTPError,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
url_or_none,
|
||||||
ExtractorError
|
ExtractorError
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
|
|||||||
for e in media['MediaURLs']:
|
for e in media['MediaURLs']:
|
||||||
if e.get('UseDRM') is True:
|
if e.get('UseDRM') is True:
|
||||||
continue
|
continue
|
||||||
format_url = e.get('Path')
|
format_url = url_or_none(e.get('Path'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
|
|||||||
for cc_file in cc_files:
|
for cc_file in cc_files:
|
||||||
if not isinstance(cc_file, dict):
|
if not isinstance(cc_file, dict):
|
||||||
continue
|
continue
|
||||||
cc_url = cc_file.get('Path')
|
cc_url = url_or_none(cc_file.get('Path'))
|
||||||
if not cc_url or not isinstance(cc_url, compat_str):
|
if not cc_url:
|
||||||
continue
|
continue
|
||||||
lang = cc_file.get('Locale') or 'en'
|
lang = cc_file.get('Locale') or 'en'
|
||||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||||
|
@ -144,7 +144,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
'description', webpage, 'description')
|
'description', webpage, 'description')
|
||||||
|
|
||||||
view_count_str = self._search_regex(
|
view_count_str = self._search_regex(
|
||||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -69,7 +70,7 @@ class DctpTvIE(InfoExtractor):
|
|||||||
endpoint = next(
|
endpoint = next(
|
||||||
server['endpoint']
|
server['endpoint']
|
||||||
for server in servers
|
for server in servers
|
||||||
if isinstance(server.get('endpoint'), compat_str) and
|
if url_or_none(server.get('endpoint')) and
|
||||||
'cloudfront' in server['endpoint'])
|
'cloudfront' in server['endpoint'])
|
||||||
else:
|
else:
|
||||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||||
@ -92,8 +93,8 @@ class DctpTvIE(InfoExtractor):
|
|||||||
for image in images:
|
for image in images:
|
||||||
if not isinstance(image, dict):
|
if not isinstance(image, dict):
|
||||||
continue
|
continue
|
||||||
image_url = image.get('url')
|
image_url = url_or_none(image.get('url'))
|
||||||
if not image_url or not isinstance(image_url, compat_str):
|
if not image_url:
|
||||||
continue
|
continue
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
'url': image_url,
|
'url': image_url,
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
@ -12,6 +11,7 @@ from ..utils import (
|
|||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
remove_end,
|
remove_end,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
|||||||
captions = stream.get('captions')
|
captions = stream.get('captions')
|
||||||
if isinstance(captions, list):
|
if isinstance(captions, list):
|
||||||
for caption in captions:
|
for caption in captions:
|
||||||
subtitle_url = caption.get('fileUrl')
|
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||||
not subtitle_url.startswith('http')):
|
|
||||||
continue
|
continue
|
||||||
lang = caption.get('fileLang', 'en')
|
lang = caption.get('fileLang', 'en')
|
||||||
ext = determine_ext(subtitle_url)
|
ext = determine_ext(subtitle_url)
|
||||||
|
@ -21,6 +21,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -310,9 +311,11 @@ class DPlayItIE(InfoExtractor):
|
|||||||
|
|
||||||
if not info:
|
if not info:
|
||||||
info_url = self._search_regex(
|
info_url = self._search_regex(
|
||||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
webpage, 'info url')
|
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||||
|
webpage, 'info url', group='url')
|
||||||
|
|
||||||
|
info_url = urljoin(url, info_url)
|
||||||
video_id = info_url.rpartition('/')[-1]
|
video_id = info_url.rpartition('/')[-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -322,6 +325,8 @@ class DPlayItIE(InfoExtractor):
|
|||||||
'dplayit_token').value,
|
'dplayit_token').value,
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
})
|
})
|
||||||
|
if isinstance(info, compat_str):
|
||||||
|
info = self._parse_json(info, display_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||||
@ -337,6 +342,7 @@ class DPlayItIE(InfoExtractor):
|
|||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls')
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||||
|
@ -7,7 +7,6 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -17,6 +16,7 @@ from ..utils import (
|
|||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
for sub in subs:
|
for sub in subs:
|
||||||
if not isinstance(sub, dict):
|
if not isinstance(sub, dict):
|
||||||
continue
|
continue
|
||||||
sub_url = sub.get('url')
|
sub_url = url_or_none(sub.get('url'))
|
||||||
if not sub_url or not isinstance(sub_url, compat_str):
|
if not sub_url:
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(
|
subtitles.setdefault(
|
||||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||||
@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
for format_id, format_dict in download_assets.items():
|
for format_id, format_dict in download_assets.items():
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = format_dict.get('url')
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_HTTPError
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||||
if mp4_data:
|
if mp4_data:
|
||||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||||
if not isinstance(format_url, compat_str):
|
if not url_or_none(format_url):
|
||||||
continue
|
continue
|
||||||
height = int_or_none(format_id)
|
height = int_or_none(format_id)
|
||||||
if height is not None and m3u8_formats_dict.get(height):
|
if height is not None and m3u8_formats_dict.get(height):
|
||||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for lesson in lessons:
|
for lesson in lessons:
|
||||||
lesson_url = lesson.get('http_url')
|
lesson_url = url_or_none(lesson.get('http_url'))
|
||||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
if not lesson_url:
|
||||||
continue
|
continue
|
||||||
lesson_id = lesson.get('id')
|
lesson_id = lesson.get('id')
|
||||||
if lesson_id:
|
if lesson_id:
|
||||||
@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for _, format_url in lesson['media_urls'].items():
|
for _, format_url in lesson['media_urls'].items():
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
|
|||||||
for format_id, format_dict in formats_dict.items():
|
for format_id, format_dict in formats_dict.items():
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
src = format_dict.get('src')
|
src = url_or_none(format_dict.get('src'))
|
||||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
if not src or not src.startswith('http'):
|
||||||
continue
|
continue
|
||||||
if kind == 'hls':
|
if kind == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
@ -390,6 +390,11 @@ from .francetv import (
|
|||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .freespeech import FreespeechIE
|
from .freespeech import FreespeechIE
|
||||||
from .freshlive import FreshLiveIE
|
from .freshlive import FreshLiveIE
|
||||||
|
from .frontendmasters import (
|
||||||
|
FrontendMastersIE,
|
||||||
|
FrontendMastersLessonIE,
|
||||||
|
FrontendMastersCourseIE
|
||||||
|
)
|
||||||
from .funimation import FunimationIE
|
from .funimation import FunimationIE
|
||||||
from .funk import (
|
from .funk import (
|
||||||
FunkMixIE,
|
FunkMixIE,
|
||||||
@ -763,7 +768,9 @@ from .nrk import (
|
|||||||
NRKSkoleIE,
|
NRKSkoleIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
NRKTVDirekteIE,
|
NRKTVDirekteIE,
|
||||||
|
NRKTVEpisodeIE,
|
||||||
NRKTVEpisodesIE,
|
NRKTVEpisodesIE,
|
||||||
|
NRKTVSeasonIE,
|
||||||
NRKTVSeriesIE,
|
NRKTVSeriesIE,
|
||||||
)
|
)
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
@ -853,6 +860,10 @@ from .pornhub import (
|
|||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornovoisines import PornoVoisinesIE
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .puhutv import (
|
||||||
|
PuhuTVIE,
|
||||||
|
PuhuTVSerieIE,
|
||||||
|
)
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
from .primesharetv import PrimeShareTVIE
|
from .primesharetv import PrimeShareTVIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
|
parse_count,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
try_get,
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '274175099429670',
|
'id': '274175099429670',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
'title': 're:^Asif Nawab Butt posted a video',
|
||||||
'uploader': 'Asif Nawab Butt',
|
'uploader': 'Asif Nawab Butt',
|
||||||
'upload_date': '20140506',
|
'upload_date': '20140506',
|
||||||
'timestamp': 1399398998,
|
'timestamp': 1399398998,
|
||||||
@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# have 1080P, but only up to 720p in swf params
|
# have 1080P, but only up to 720p in swf params
|
||||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10155529876156509',
|
'id': '10155529876156509',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'upload_date': '20161030',
|
'upload_date': '20161030',
|
||||||
'uploader': 'CNN',
|
'uploader': 'CNN',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||||
@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1417995061575415',
|
'id': '1417995061575415',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
'title': 'md5:1db063d6a8c13faa8da727817339c857',
|
||||||
'timestamp': 1486648217,
|
'timestamp': 1486648217,
|
||||||
'upload_date': '20170209',
|
'upload_date': '20170209',
|
||||||
'uploader': 'Yaroslav Korpan',
|
'uploader': 'Yaroslav Korpan',
|
||||||
@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1396382447100162',
|
'id': '1396382447100162',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
'title': 'md5:19a428bbde91364e3de815383b54a235',
|
||||||
'timestamp': 1486035494,
|
'timestamp': 1486035494,
|
||||||
'upload_date': '20170202',
|
'upload_date': '20170202',
|
||||||
'uploader': 'Elisabeth Ahtn',
|
'uploader': 'Elisabeth Ahtn',
|
||||||
@ -426,6 +428,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
view_count = parse_count(self._search_regex(
|
||||||
|
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||||
|
default=None))
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
@ -433,6 +439,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
return webpage, info_dict
|
return webpage, info_dict
|
||||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
path = None
|
path = None
|
||||||
for f in item.get('mbr', []):
|
for f in item.get('mbr', []):
|
||||||
src = f.get('src')
|
src = url_or_none(f.get('src'))
|
||||||
if not src or not isinstance(src, compat_str):
|
if not src:
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(self._search_regex(
|
tbr = int_or_none(self._search_regex(
|
||||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
|
||||||
@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def sign(manifest_url, manifest_id):
|
def sign(manifest_url, manifest_id):
|
||||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||||
signed_url = self._download_webpage(
|
signed_url = url_or_none(self._download_webpage(
|
||||||
'https://%s/esi/TA' % host, video_id,
|
'https://%s/esi/TA' % host, video_id,
|
||||||
'Downloading signed %s manifest URL' % manifest_id,
|
'Downloading signed %s manifest URL' % manifest_id,
|
||||||
fatal=False, query={
|
fatal=False, query={
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
})
|
}))
|
||||||
if (signed_url and isinstance(signed_url, compat_str) and
|
if signed_url:
|
||||||
re.search(r'^(?:https?:)?//', signed_url)):
|
|
||||||
return signed_url
|
return signed_url
|
||||||
return manifest_url
|
return manifest_url
|
||||||
|
|
||||||
|
263
youtube_dl/extractor/frontendmasters.py
Normal file
263
youtube_dl/extractor/frontendmasters.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_duration,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FrontendMastersBaseIE(InfoExtractor):
|
||||||
|
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
|
||||||
|
_LOGIN_URL = 'https://frontendmasters.com/login/'
|
||||||
|
|
||||||
|
_NETRC_MACHINE = 'frontendmasters'
|
||||||
|
|
||||||
|
_QUALITIES = {
|
||||||
|
'low': {'width': 480, 'height': 360},
|
||||||
|
'mid': {'width': 1280, 'height': 720},
|
||||||
|
'high': {'width': 1920, 'height': 1080}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post_url', default=self._LOGIN_URL, group='url')
|
||||||
|
|
||||||
|
if not post_url.startswith('http'):
|
||||||
|
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
|
response = self._download_webpage(
|
||||||
|
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||||
|
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
|
|
||||||
|
# Successful login
|
||||||
|
if any(p in response for p in (
|
||||||
|
'wp-login.php?action=logout', '>Logout')):
|
||||||
|
return
|
||||||
|
|
||||||
|
error = self._html_search_regex(
|
||||||
|
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
|
||||||
|
response, 'error message', default=None, group='error')
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||||
|
def _download_course(self, course_name, url):
|
||||||
|
return self._download_json(
|
||||||
|
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
|
||||||
|
'Downloading course JSON', headers={'Referer': url})
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_chapters(course):
|
||||||
|
chapters = []
|
||||||
|
lesson_elements = course.get('lessonElements')
|
||||||
|
if isinstance(lesson_elements, list):
|
||||||
|
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||||
|
return chapters
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_lesson(chapters, lesson_id, lesson):
|
||||||
|
title = lesson.get('title') or lesson_id
|
||||||
|
display_id = lesson.get('slug')
|
||||||
|
description = lesson.get('description')
|
||||||
|
thumbnail = lesson.get('thumbnail')
|
||||||
|
|
||||||
|
chapter_number = None
|
||||||
|
index = lesson.get('index')
|
||||||
|
element_index = lesson.get('elementIndex')
|
||||||
|
if (isinstance(index, int) and isinstance(element_index, int) and
|
||||||
|
index < element_index):
|
||||||
|
chapter_number = element_index - index
|
||||||
|
chapter = (chapters[chapter_number - 1]
|
||||||
|
if chapter_number - 1 < len(chapters) else None)
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
timestamp = lesson.get('timestamp')
|
||||||
|
if isinstance(timestamp, compat_str):
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
|
||||||
|
timestamp)
|
||||||
|
if mobj:
|
||||||
|
duration = parse_duration(mobj.group('end')) - parse_duration(
|
||||||
|
mobj.group('start'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'frontendmasters:%s' % lesson_id,
|
||||||
|
'ie_key': FrontendMastersIE.ie_key(),
|
||||||
|
'id': lesson_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'chapter': chapter,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FrontendMastersIE(FrontendMastersBaseIE):
|
||||||
|
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
|
||||||
|
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a2qogef6ba',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'a2qogef6ba',
|
||||||
|
},
|
||||||
|
'skip': 'Requires FrontendMasters account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'frontendmasters:a2qogef6ba',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lesson_id = self._match_id(url)
|
||||||
|
|
||||||
|
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for ext in ('webm', 'mp4'):
|
||||||
|
for quality in ('low', 'mid', 'high'):
|
||||||
|
resolution = self._QUALITIES[quality].copy()
|
||||||
|
format_id = '%s-%s' % (ext, quality)
|
||||||
|
format_url = self._download_json(
|
||||||
|
source_url, lesson_id,
|
||||||
|
'Downloading %s source JSON' % format_id, query={
|
||||||
|
'f': ext,
|
||||||
|
'r': resolution['height'],
|
||||||
|
}, headers={
|
||||||
|
'Referer': url,
|
||||||
|
}, fatal=False)['url']
|
||||||
|
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
f = resolution.copy()
|
||||||
|
f.update({
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {
|
||||||
|
'en': [{
|
||||||
|
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': lesson_id,
|
||||||
|
'title': lesson_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://frontendmasters.com/courses/web-development/tools',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a2qogef6ba',
|
||||||
|
'display_id': 'tools',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tools',
|
||||||
|
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'chapter': 'Introduction',
|
||||||
|
'chapter_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires FrontendMasters account credentials',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
|
||||||
|
|
||||||
|
course = self._download_course(course_name, url)
|
||||||
|
|
||||||
|
lesson_id, lesson = next(
|
||||||
|
(video_id, data)
|
||||||
|
for video_id, data in course['lessonData'].items()
|
||||||
|
if data.get('slug') == lesson_name)
|
||||||
|
|
||||||
|
chapters = self._extract_chapters(course)
|
||||||
|
return self._extract_lesson(chapters, lesson_id, lesson)
|
||||||
|
|
||||||
|
|
||||||
|
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://frontendmasters.com/courses/web-development/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'web-development',
|
||||||
|
'title': 'Introduction to Web Development',
|
||||||
|
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
|
||||||
|
},
|
||||||
|
'playlist_count': 81,
|
||||||
|
'skip': 'Requires FrontendMasters account credentials',
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if FrontendMastersLessonIE.suitable(url) else super(
|
||||||
|
FrontendMastersBaseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_name = self._match_id(url)
|
||||||
|
|
||||||
|
course = self._download_course(course_name, url)
|
||||||
|
|
||||||
|
chapters = self._extract_chapters(course)
|
||||||
|
|
||||||
|
lessons = sorted(
|
||||||
|
course['lessonData'].values(), key=lambda data: data['index'])
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for lesson in lessons:
|
||||||
|
lesson_name = lesson.get('slug')
|
||||||
|
if not lesson_name:
|
||||||
|
continue
|
||||||
|
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||||
|
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||||
|
|
||||||
|
title = course.get('title')
|
||||||
|
description = course.get('description')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_name, title, description)
|
@ -5,6 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .nexx import NexxIE
|
from .nexx import NexxIE
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
@ -12,6 +13,19 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class FunkBaseIE(InfoExtractor):
|
class FunkBaseIE(InfoExtractor):
|
||||||
|
_HEADERS = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||||
|
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||||
|
}
|
||||||
|
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_headers(referer):
|
||||||
|
headers = FunkBaseIE._HEADERS.copy()
|
||||||
|
headers['Referer'] = referer
|
||||||
|
return headers
|
||||||
|
|
||||||
def _make_url_result(self, video):
|
def _make_url_result(self, video):
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
@ -48,19 +62,19 @@ class FunkMixIE(FunkBaseIE):
|
|||||||
|
|
||||||
lists = self._download_json(
|
lists = self._download_json(
|
||||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||||
mix_id, headers={
|
mix_id, headers=self._make_headers(url), query={
|
||||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
|
||||||
'Referer': url,
|
|
||||||
}, query={
|
|
||||||
'size': 100,
|
'size': 100,
|
||||||
})['result']['lists']
|
})['_embedded']['curatedListList']
|
||||||
|
|
||||||
metas = next(
|
metas = next(
|
||||||
l for l in lists
|
l for l in lists
|
||||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||||
video = next(
|
video = next(
|
||||||
meta['videoDataDelegate']
|
meta['videoDataDelegate']
|
||||||
for meta in metas if meta.get('alias') == alias)
|
for meta in metas
|
||||||
|
if try_get(
|
||||||
|
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||||
|
compat_str) == alias)
|
||||||
|
|
||||||
return self._make_url_result(video)
|
return self._make_url_result(video)
|
||||||
|
|
||||||
@ -104,25 +118,39 @@ class FunkChannelIE(FunkBaseIE):
|
|||||||
channel_id = mobj.group('id')
|
channel_id = mobj.group('id')
|
||||||
alias = mobj.group('alias')
|
alias = mobj.group('alias')
|
||||||
|
|
||||||
headers = {
|
headers = self._make_headers(url)
|
||||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
|
||||||
'Referer': url,
|
|
||||||
}
|
|
||||||
|
|
||||||
video = None
|
video = None
|
||||||
|
|
||||||
by_id_list = self._download_json(
|
# Id-based channels are currently broken on their side: webplayer
|
||||||
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
|
# tries to process them via byChannelAlias endpoint and fails
|
||||||
headers=headers, query={
|
# predictably.
|
||||||
'ids': alias,
|
by_channel_alias = self._download_json(
|
||||||
|
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||||
|
% channel_id,
|
||||||
|
'Downloading byChannelAlias JSON', headers=headers, query={
|
||||||
|
'size': 100,
|
||||||
}, fatal=False)
|
}, fatal=False)
|
||||||
if by_id_list:
|
if by_channel_alias:
|
||||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
video_list = try_get(
|
||||||
|
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||||
|
if video_list:
|
||||||
|
video = next(r for r in video_list if r.get('alias') == alias)
|
||||||
|
|
||||||
|
if not video:
|
||||||
|
by_id_list = self._download_json(
|
||||||
|
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||||
|
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||||
|
query={
|
||||||
|
'ids': alias,
|
||||||
|
}, fatal=False)
|
||||||
|
if by_id_list:
|
||||||
|
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||||
|
|
||||||
if not video:
|
if not video:
|
||||||
results = self._download_json(
|
results = self._download_json(
|
||||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||||
headers=headers, query={
|
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||||
'channelId': channel_id,
|
'channelId': channel_id,
|
||||||
'size': 100,
|
'size': 100,
|
||||||
})['result']
|
})['result']
|
||||||
|
@ -32,6 +32,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
@ -3130,8 +3131,8 @@ class GenericIE(InfoExtractor):
|
|||||||
sources = [sources]
|
sources = [sources]
|
||||||
formats = []
|
formats = []
|
||||||
for source in sources:
|
for source in sources:
|
||||||
src = source.get('src')
|
src = url_or_none(source.get('src'))
|
||||||
if not src or not isinstance(src, compat_str):
|
if not src:
|
||||||
continue
|
continue
|
||||||
src = compat_urlparse.urljoin(url, src)
|
src = compat_urlparse.urljoin(url, src)
|
||||||
src_type = source.get('type')
|
src_type = source.get('type')
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -14,8 +15,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class Go90IE(InfoExtractor):
|
class Go90IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
|
|||||||
'upload_date': '20170411',
|
'upload_date': '20170411',
|
||||||
'age_limit': 14,
|
'age_limit': 14,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(
|
|
||||||
'https://www.go90.com/api/view/items/' + video_id,
|
try:
|
||||||
video_id, headers={
|
headers = self.geo_verification_headers()
|
||||||
|
headers.update({
|
||||||
'Content-Type': 'application/json; charset=utf-8',
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
}, data=b'{"client":"web","device_type":"pc"}')
|
})
|
||||||
|
video_data = self._download_json(
|
||||||
|
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
||||||
|
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
||||||
|
if 'region unavailable' in message:
|
||||||
|
self.raise_geo_restricted(countries=['US'])
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
if video_data.get('requires_drm'):
|
if video_data.get('requires_drm'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
main_video_asset = video_data['main_video_asset']
|
main_video_asset = video_data['main_video_asset']
|
||||||
|
@ -8,6 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
|
|||||||
bitrates = rendition.get('bitrates')
|
bitrates = rendition.get('bitrates')
|
||||||
if not isinstance(bitrates, dict):
|
if not isinstance(bitrates, dict):
|
||||||
continue
|
continue
|
||||||
m3u8_url = bitrates.get('hls')
|
m3u8_url = url_or_none(bitrates.get('hls'))
|
||||||
if not isinstance(m3u8_url, compat_str):
|
if not m3u8_url:
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
|
|||||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||||
continue
|
continue
|
||||||
cc_lang = cc_file[0]
|
cc_lang = cc_file[0]
|
||||||
cc_url = cc_file[2]
|
cc_url = url_or_none(cc_file[2])
|
||||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
if not isinstance(cc_lang, compat_str) or not cc_url:
|
||||||
cc_url, compat_str):
|
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(cc_lang, []).append({
|
subtitles.setdefault(cc_lang, []).append({
|
||||||
'url': cc_url,
|
'url': cc_url,
|
||||||
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -61,8 +61,8 @@ class ImdbIE(InfoExtractor):
|
|||||||
for encoding in video_metadata.get('encodings', []):
|
for encoding in video_metadata.get('encodings', []):
|
||||||
if not encoding or not isinstance(encoding, dict):
|
if not encoding or not isinstance(encoding, dict):
|
||||||
continue
|
continue
|
||||||
video_url = encoding.get('videoUrl')
|
video_url = url_or_none(encoding.get('videoUrl'))
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
std_headers,
|
std_headers,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
|
|||||||
node = try_get(edge, lambda x: x['node'], dict)
|
node = try_get(edge, lambda x: x['node'], dict)
|
||||||
if not node:
|
if not node:
|
||||||
continue
|
continue
|
||||||
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
node_video_url = url_or_none(node.get('video_url'))
|
||||||
if not node_video_url:
|
if not node_video_url:
|
||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
url_or_none,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -250,8 +251,8 @@ class ITVIE(InfoExtractor):
|
|||||||
for sub in subs:
|
for sub in subs:
|
||||||
if not isinstance(sub, dict):
|
if not isinstance(sub, dict):
|
||||||
continue
|
continue
|
||||||
href = sub.get('Href')
|
href = url_or_none(sub.get('Href'))
|
||||||
if isinstance(href, compat_str):
|
if href:
|
||||||
extract_subtitle(href)
|
extract_subtitle(href)
|
||||||
if not info.get('duration'):
|
if not info.get('duration'):
|
||||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for a_format in video_data:
|
for a_format in video_data:
|
||||||
|
format_uri = url_or_none(a_format.get('uri'))
|
||||||
|
if not format_uri:
|
||||||
|
continue
|
||||||
format_id = a_format.get('resolution')
|
format_id = a_format.get('resolution')
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
r'(\d+)p', format_id, 'height', default=None))
|
r'(\d+)p', format_id, 'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': a_format['uri'],
|
'url': self._proto_relative_url(format_uri, 'https:'),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||||
'height': height,
|
'height': height,
|
||||||
|
@ -4,16 +4,14 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_decrypt_text
|
from ..aes import aes_decrypt_text
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
|
|||||||
encrypted = False
|
encrypted = False
|
||||||
|
|
||||||
def extract_format(format_url, height=None):
|
def extract_format(format_url, height=None):
|
||||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url or not format_url.startswith(('http', '//')):
|
||||||
return
|
return
|
||||||
if format_url in format_urls:
|
if format_url in format_urls:
|
||||||
return
|
return
|
||||||
|
@ -2,11 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
|
|||||||
captions = source.get('captionsAvailableLanguages')
|
captions = source.get('captionsAvailableLanguages')
|
||||||
if isinstance(captions, dict):
|
if isinstance(captions, dict):
|
||||||
for lang, subtitle_url in captions.items():
|
for lang, subtitle_url in captions.items():
|
||||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
subtitle_url = url_or_none(subtitle_url)
|
||||||
|
if lang != 'none' and subtitle_url:
|
||||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -4,7 +4,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
|
|||||||
form_data = self._hidden_inputs(form_html)
|
form_data = self._hidden_inputs(form_html)
|
||||||
form_data.update(extra_form_data)
|
form_data.update(extra_form_data)
|
||||||
|
|
||||||
try:
|
response = self._download_json(
|
||||||
response = self._download_json(
|
action_url, None, note,
|
||||||
action_url, None, note,
|
data=urlencode_postdata(form_data),
|
||||||
data=urlencode_postdata(form_data),
|
headers={
|
||||||
headers={
|
'Referer': referrer_url,
|
||||||
'Referer': referrer_url,
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
}, expected_status=(418, 500, ))
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
|
||||||
response = self._parse_json(e.cause.read().decode('utf-8'), None)
|
|
||||||
self._check_error(response, ('email', 'password'))
|
|
||||||
raise
|
|
||||||
|
|
||||||
self._check_error(response, 'ErrorMessage')
|
self._check_error(response, ('email', 'password', 'ErrorMessage'))
|
||||||
|
|
||||||
return response, action_url
|
return response, action_url
|
||||||
|
|
||||||
|
@ -3,75 +3,75 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .theplatform import ThePlatformBaseIE
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
ExtractorError,
|
||||||
parse_duration,
|
int_or_none,
|
||||||
try_get,
|
update_url_query,
|
||||||
unified_strdate,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MediasetIE(InfoExtractor):
|
class MediasetIE(ThePlatformBaseIE):
|
||||||
|
_TP_TLD = 'eu'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
mediaset:|
|
mediaset:|
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?video\.mediaset\.it/
|
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||||
(?:
|
(?:
|
||||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||||
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
|
player/index\.html\?.*?\bprogramGuid=
|
||||||
)
|
)
|
||||||
)(?P<id>[0-9]+)
|
)(?P<id>[0-9A-Z]{16})
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# full episode
|
# full episode
|
||||||
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
|
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
|
||||||
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '661824',
|
'id': 'FAFU000000661824',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Quarta puntata',
|
'title': 'Quarta puntata',
|
||||||
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1414,
|
'duration': 1414.26,
|
||||||
'creator': 'mediaset',
|
|
||||||
'upload_date': '20161107',
|
'upload_date': '20161107',
|
||||||
'series': 'Hello Goodbye',
|
'series': 'Hello Goodbye',
|
||||||
'categories': ['reality'],
|
'timestamp': 1478532900,
|
||||||
|
'uploader': 'Rete 4',
|
||||||
|
'uploader_id': 'R4',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
|
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
|
||||||
'md5': '1276f966ac423d16ba255ce867de073e',
|
'md5': '288532f0ad18307705b01e581304cd7b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '846685',
|
'id': 'F309013801000501',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Puntata del 25 maggio',
|
'title': 'Puntata del 25 maggio',
|
||||||
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 6565,
|
'duration': 6565.007,
|
||||||
'creator': 'mediaset',
|
'upload_date': '20180526',
|
||||||
'upload_date': '20180525',
|
|
||||||
'series': 'Matrix',
|
'series': 'Matrix',
|
||||||
'categories': ['infotainment'],
|
'timestamp': 1527326245,
|
||||||
|
'uploader': 'Canale 5',
|
||||||
|
'uploader_id': 'C5',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
}, {
|
}, {
|
||||||
# clip
|
# clip
|
||||||
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# iframe simple
|
# iframe simple
|
||||||
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
|
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||||
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
|
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'mediaset:661824',
|
'url': 'mediaset:FAFU000000665924',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor):
|
|||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
guid = self._match_id(url)
|
||||||
|
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||||
video = self._download_json(
|
info = self._extract_theplatform_metadata(tp_path, guid)
|
||||||
'https://www.video.mediaset.it/html/metainfo.sjson',
|
|
||||||
video_id, 'Downloading media info', query={
|
|
||||||
'id': video_id
|
|
||||||
})['video']
|
|
||||||
|
|
||||||
title = video['title']
|
|
||||||
media_id = video.get('guid') or video_id
|
|
||||||
|
|
||||||
video_list = self._download_json(
|
|
||||||
'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
|
|
||||||
video_id, 'Downloading video CDN JSON', query={
|
|
||||||
'streamid': media_id,
|
|
||||||
'format': 'json',
|
|
||||||
})['videoList']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_url in video_list:
|
subtitles = {}
|
||||||
ext = determine_ext(format_url)
|
first_e = None
|
||||||
if ext == 'm3u8':
|
for asset_type in ('SD', 'HD'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
|
||||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
try:
|
||||||
m3u8_id='hls', fatal=False))
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
elif ext == 'mpd':
|
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||||
formats.extend(self._extract_mpd_formats(
|
'mbr': 'true',
|
||||||
format_url, video_id, mpd_id='dash', fatal=False))
|
'formats': f,
|
||||||
elif ext == 'ism' or '.ism' in format_url:
|
'assetTypes': asset_type,
|
||||||
formats.extend(self._extract_ism_formats(
|
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
|
||||||
format_url, video_id, ism_id='mss', fatal=False))
|
except ExtractorError as e:
|
||||||
else:
|
if not first_e:
|
||||||
formats.append({
|
first_e = e
|
||||||
'url': format_url,
|
break
|
||||||
'format_id': determine_ext(format_url),
|
for tp_f in tp_formats:
|
||||||
})
|
tp_f['quality'] = 1 if asset_type == 'HD' else 0
|
||||||
|
formats.extend(tp_formats)
|
||||||
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
|
if first_e and not formats:
|
||||||
|
raise first_e
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
creator = try_get(
|
fields = []
|
||||||
video, lambda x: x['brand-info']['publisher'], compat_str)
|
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
|
||||||
category = try_get(
|
fields.extend(templ % repl for repl in repls)
|
||||||
video, lambda x: x['brand-info']['category'], compat_str)
|
feed_data = self._download_json(
|
||||||
categories = [category] if category else None
|
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
|
||||||
|
guid, fatal=False, query={'fields': ','.join(fields)})
|
||||||
|
if feed_data:
|
||||||
|
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
|
||||||
|
info.update({
|
||||||
|
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
|
||||||
|
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
|
||||||
|
'series': feed_data.get('mediasetprogram$brandTitle'),
|
||||||
|
'uploader': publish_info.get('description'),
|
||||||
|
'uploader_id': publish_info.get('channel'),
|
||||||
|
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
info.update({
|
||||||
'id': video_id,
|
'id': guid,
|
||||||
'title': title,
|
|
||||||
'description': video.get('short-description'),
|
|
||||||
'thumbnail': video.get('thumbnail'),
|
|
||||||
'duration': parse_duration(video.get('duration')),
|
|
||||||
'creator': creator,
|
|
||||||
'upload_date': unified_strdate(video.get('production-date')),
|
|
||||||
'webpage_url': video.get('url'),
|
|
||||||
'series': video.get('brand-value'),
|
|
||||||
'season': video.get('season'),
|
|
||||||
'categories': categories,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
|
|||||||
|
|
||||||
stream_formats = []
|
stream_formats = []
|
||||||
for unum, VideoUrl in enumerate(video_urls):
|
for unum, VideoUrl in enumerate(video_urls):
|
||||||
video_url = VideoUrl.get('Location')
|
video_url = url_or_none(VideoUrl.get('Location'))
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||||
|
|
||||||
|
@ -4,12 +4,18 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
JSON_LD_RE,
|
||||||
|
NO_DEFAULT,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -359,6 +365,182 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVEpisodeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSUI14000816AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Backstage 8:30',
|
||||||
|
'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
|
||||||
|
'duration': 1320,
|
||||||
|
'series': 'Backstage',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 8,
|
||||||
|
'episode': '8:30',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
nrk_id = self._parse_json(
|
||||||
|
self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
|
||||||
|
display_id)['@id']
|
||||||
|
|
||||||
|
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||||
|
return self.url_result(
|
||||||
|
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVSerieBaseIE(InfoExtractor):
|
||||||
|
def _extract_series(self, webpage, display_id, fatal=True):
|
||||||
|
config = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
||||||
|
default='{}' if not fatal else NO_DEFAULT),
|
||||||
|
display_id, fatal=False)
|
||||||
|
if not config:
|
||||||
|
return
|
||||||
|
return try_get(config, lambda x: x['series'], dict)
|
||||||
|
|
||||||
|
def _extract_episodes(self, season):
|
||||||
|
entries = []
|
||||||
|
if not isinstance(season, dict):
|
||||||
|
return entries
|
||||||
|
episodes = season.get('episodes')
|
||||||
|
if not isinstance(episodes, list):
|
||||||
|
return entries
|
||||||
|
for episode in episodes:
|
||||||
|
nrk_id = episode.get('prfId')
|
||||||
|
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
|
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1',
|
||||||
|
'title': 'Sesong 1',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
|
||||||
|
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
series = self._extract_series(webpage, display_id)
|
||||||
|
|
||||||
|
season = next(
|
||||||
|
s for s in series['seasons']
|
||||||
|
if int(display_id) == s.get('seasonNumber'))
|
||||||
|
|
||||||
|
title = try_get(season, lambda x: x['titles']['title'], compat_str)
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_episodes(season), display_id, title)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||||
|
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# new layout
|
||||||
|
'url': 'https://tv.nrk.no/serie/backstage',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'backstage',
|
||||||
|
'title': 'Backstage',
|
||||||
|
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 60,
|
||||||
|
}, {
|
||||||
|
# old layout
|
||||||
|
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'groenn-glede',
|
||||||
|
'title': 'Grønn glede',
|
||||||
|
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'labyrint',
|
||||||
|
'title': 'Labyrint',
|
||||||
|
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (
|
||||||
|
False if any(ie.suitable(url)
|
||||||
|
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
|
||||||
|
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, series_id)
|
||||||
|
|
||||||
|
# New layout (e.g. https://tv.nrk.no/serie/backstage)
|
||||||
|
series = self._extract_series(webpage, series_id, fatal=False)
|
||||||
|
if series:
|
||||||
|
title = try_get(series, lambda x: x['titles']['title'], compat_str)
|
||||||
|
description = try_get(
|
||||||
|
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||||
|
entries = []
|
||||||
|
for season in series['seasons']:
|
||||||
|
entries.extend(self._extract_episodes(season))
|
||||||
|
return self.playlist_result(entries, series_id, title, description)
|
||||||
|
|
||||||
|
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||||
|
series=series_id, season=season_id))
|
||||||
|
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'seriestitle', webpage,
|
||||||
|
'title', default=None) or self._og_search_title(
|
||||||
|
webpage, fatal=False)
|
||||||
|
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'series_description', webpage,
|
||||||
|
'description', default=None) or self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, series_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVDirekteIE(NRKTVIE):
|
class NRKTVDirekteIE(NRKTVIE):
|
||||||
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
||||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
||||||
@ -438,64 +620,6 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
|||||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSeriesIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
|
||||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'groenn-glede',
|
|
||||||
'title': 'Grønn glede',
|
|
||||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 9,
|
|
||||||
}, {
|
|
||||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'labyrint',
|
|
||||||
'title': 'Labyrint',
|
|
||||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 3,
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
series_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, series_id)
|
|
||||||
|
|
||||||
entries = [
|
|
||||||
self.url_result(
|
|
||||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
|
||||||
series=series_id, season=season_id))
|
|
||||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
|
||||||
]
|
|
||||||
|
|
||||||
title = self._html_search_meta(
|
|
||||||
'seriestitle', webpage,
|
|
||||||
'title', default=None) or self._og_search_title(
|
|
||||||
webpage, fatal=False)
|
|
||||||
|
|
||||||
description = self._html_search_meta(
|
|
||||||
'series_description', webpage,
|
|
||||||
'description', default=None) or self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, series_id, title, description)
|
|
||||||
|
|
||||||
|
|
||||||
class NRKSkoleIE(InfoExtractor):
|
class NRKSkoleIE(InfoExtractor):
|
||||||
IE_DESC = 'NRK Skole'
|
IE_DESC = 'NRK Skole'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
parse_resolution,
|
parse_resolution,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -200,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
for file_ in video['files']:
|
for file_ in video['files']:
|
||||||
if not isinstance(file_, dict):
|
if not isinstance(file_, dict):
|
||||||
continue
|
continue
|
||||||
file_url = file_.get('fileUrl')
|
file_url = url_or_none(file_.get('fileUrl'))
|
||||||
if not file_url or not isinstance(file_url, compat_str):
|
if not file_url:
|
||||||
continue
|
continue
|
||||||
file_size = int_or_none(file_.get('size'))
|
file_size = int_or_none(file_.get('size'))
|
||||||
format_id = try_get(
|
format_id = try_get(
|
||||||
|
@ -27,6 +27,60 @@ from ..utils import (
|
|||||||
class PluralsightBaseIE(InfoExtractor):
|
class PluralsightBaseIE(InfoExtractor):
|
||||||
_API_BASE = 'https://app.pluralsight.com'
|
_API_BASE = 'https://app.pluralsight.com'
|
||||||
|
|
||||||
|
_GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
|
||||||
|
_GRAPHQL_HEADERS = {
|
||||||
|
'Content-Type': 'application/json;charset=UTF-8',
|
||||||
|
}
|
||||||
|
_GRAPHQL_COURSE_TMPL = '''
|
||||||
|
query BootstrapPlayer {
|
||||||
|
rpc {
|
||||||
|
bootstrapPlayer {
|
||||||
|
profile {
|
||||||
|
firstName
|
||||||
|
lastName
|
||||||
|
email
|
||||||
|
username
|
||||||
|
userHandle
|
||||||
|
authed
|
||||||
|
isAuthed
|
||||||
|
plan
|
||||||
|
}
|
||||||
|
course(courseId: "%s") {
|
||||||
|
name
|
||||||
|
title
|
||||||
|
courseHasCaptions
|
||||||
|
translationLanguages {
|
||||||
|
code
|
||||||
|
name
|
||||||
|
}
|
||||||
|
supportsWideScreenVideoFormats
|
||||||
|
timestamp
|
||||||
|
modules {
|
||||||
|
name
|
||||||
|
title
|
||||||
|
duration
|
||||||
|
formattedDuration
|
||||||
|
author
|
||||||
|
authorized
|
||||||
|
clips {
|
||||||
|
authorized
|
||||||
|
clipId
|
||||||
|
duration
|
||||||
|
formattedDuration
|
||||||
|
id
|
||||||
|
index
|
||||||
|
moduleIndex
|
||||||
|
moduleTitle
|
||||||
|
name
|
||||||
|
title
|
||||||
|
watched
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
|
||||||
def _download_course(self, course_id, url, display_id):
|
def _download_course(self, course_id, url, display_id):
|
||||||
try:
|
try:
|
||||||
return self._download_course_rpc(course_id, url, display_id)
|
return self._download_course_rpc(course_id, url, display_id)
|
||||||
@ -39,20 +93,14 @@ class PluralsightBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
def _download_course_rpc(self, course_id, url, display_id):
|
def _download_course_rpc(self, course_id, url, display_id):
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'%s/player/functions/rpc' % self._API_BASE, display_id,
|
self._GRAPHQL_EP, display_id, data=json.dumps({
|
||||||
'Downloading course JSON',
|
'query': self._GRAPHQL_COURSE_TMPL % course_id,
|
||||||
data=json.dumps({
|
'variables': {}
|
||||||
'fn': 'bootstrapPlayer',
|
}).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
|
||||||
'payload': {
|
|
||||||
'courseId': course_id,
|
|
||||||
},
|
|
||||||
}).encode('utf-8'),
|
|
||||||
headers={
|
|
||||||
'Content-Type': 'application/json;charset=utf-8',
|
|
||||||
'Referer': url,
|
|
||||||
})
|
|
||||||
|
|
||||||
course = try_get(response, lambda x: x['payload']['course'], dict)
|
course = try_get(
|
||||||
|
response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
|
||||||
|
dict)
|
||||||
if course:
|
if course:
|
||||||
return course
|
return course
|
||||||
|
|
||||||
@ -90,6 +138,28 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
GRAPHQL_VIEWCLIP_TMPL = '''
|
||||||
|
query viewClip {
|
||||||
|
viewClip(input: {
|
||||||
|
author: "%(author)s",
|
||||||
|
clipIndex: %(clipIndex)d,
|
||||||
|
courseName: "%(courseName)s",
|
||||||
|
includeCaptions: %(includeCaptions)s,
|
||||||
|
locale: "%(locale)s",
|
||||||
|
mediaType: "%(mediaType)s",
|
||||||
|
moduleName: "%(moduleName)s",
|
||||||
|
quality: "%(quality)s"
|
||||||
|
}) {
|
||||||
|
urls {
|
||||||
|
url
|
||||||
|
cdn
|
||||||
|
rank
|
||||||
|
source
|
||||||
|
},
|
||||||
|
status
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -277,7 +347,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
f = QUALITIES[quality].copy()
|
f = QUALITIES[quality].copy()
|
||||||
clip_post = {
|
clip_post = {
|
||||||
'author': author,
|
'author': author,
|
||||||
'includeCaptions': False,
|
'includeCaptions': 'false',
|
||||||
'clipIndex': int(clip_idx),
|
'clipIndex': int(clip_idx),
|
||||||
'courseName': course_name,
|
'courseName': course_name,
|
||||||
'locale': 'en',
|
'locale': 'en',
|
||||||
@ -286,11 +356,23 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
'quality': '%dx%d' % (f['width'], f['height']),
|
'quality': '%dx%d' % (f['width'], f['height']),
|
||||||
}
|
}
|
||||||
format_id = '%s-%s' % (ext, quality)
|
format_id = '%s-%s' % (ext, quality)
|
||||||
viewclip = self._download_json(
|
|
||||||
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
try:
|
||||||
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
viewclip = self._download_json(
|
||||||
data=json.dumps(clip_post).encode('utf-8'),
|
self._GRAPHQL_EP, display_id,
|
||||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
'Downloading %s viewclip graphql' % format_id,
|
||||||
|
data=json.dumps({
|
||||||
|
'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
|
||||||
|
'variables': {}
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers=self._GRAPHQL_HEADERS)['data']['viewClip']
|
||||||
|
except ExtractorError:
|
||||||
|
# Still works but most likely will go soon
|
||||||
|
viewclip = self._download_json(
|
||||||
|
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
||||||
|
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
||||||
|
data=json.dumps(clip_post).encode('utf-8'),
|
||||||
|
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||||
|
|
||||||
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
||||||
# to return 429 HTTP errors after some time (see
|
# to return 429 HTTP errors after some time (see
|
||||||
|
@ -4,28 +4,21 @@ from __future__ import unicode_literals
|
|||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import operator
|
import operator
|
||||||
# import os
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
# compat_urllib_parse_unquote,
|
compat_str,
|
||||||
# compat_urllib_parse_unquote_plus,
|
|
||||||
# compat_urllib_parse_urlparse,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
# sanitized_Request,
|
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
# from ..aes import (
|
|
||||||
# aes_decrypt_text
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubIE(InfoExtractor):
|
||||||
@ -62,7 +55,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
'id': '1331683002',
|
'id': '1331683002',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '重庆婷婷女王足交',
|
'title': '重庆婷婷女王足交',
|
||||||
'uploader': 'cj397186295',
|
'uploader': 'Unknown',
|
||||||
'duration': 1753,
|
'duration': 1753,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@ -121,7 +114,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
self._set_cookie('pornhub.com', 'platform', platform)
|
self._set_cookie('pornhub.com', 'platform', platform)
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||||
video_id)
|
video_id, 'Downloading %s webpage' % platform)
|
||||||
|
|
||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
@ -134,48 +127,19 @@ class PornHubIE(InfoExtractor):
|
|||||||
'PornHub said: %s' % error_msg,
|
'PornHub said: %s' % error_msg,
|
||||||
expected=True, video_id=video_id)
|
expected=True, video_id=video_id)
|
||||||
|
|
||||||
tv_webpage = dl_webpage('tv')
|
|
||||||
|
|
||||||
assignments = self._search_regex(
|
|
||||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
|
||||||
'encoded url').split(';')
|
|
||||||
|
|
||||||
js_vars = {}
|
|
||||||
|
|
||||||
def parse_js_value(inp):
|
|
||||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
|
||||||
if '+' in inp:
|
|
||||||
inps = inp.split('+')
|
|
||||||
return functools.reduce(
|
|
||||||
operator.concat, map(parse_js_value, inps))
|
|
||||||
inp = inp.strip()
|
|
||||||
if inp in js_vars:
|
|
||||||
return js_vars[inp]
|
|
||||||
return remove_quotes(inp)
|
|
||||||
|
|
||||||
for assn in assignments:
|
|
||||||
assn = assn.strip()
|
|
||||||
if not assn:
|
|
||||||
continue
|
|
||||||
assn = re.sub(r'var\s+', '', assn)
|
|
||||||
vname, value = assn.split('=', 1)
|
|
||||||
js_vars[vname] = parse_js_value(value)
|
|
||||||
|
|
||||||
video_url = js_vars['mediastring']
|
|
||||||
|
|
||||||
title = self._search_regex(
|
|
||||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
|
||||||
|
|
||||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||||
# on that anymore.
|
# on that anymore.
|
||||||
title = title or self._html_search_meta(
|
title = self._html_search_meta(
|
||||||
'twitter:title', webpage, default=None) or self._search_regex(
|
'twitter:title', webpage, default=None) or self._search_regex(
|
||||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||||
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title')
|
||||||
|
|
||||||
|
video_urls = []
|
||||||
|
video_urls_set = set()
|
||||||
|
|
||||||
flashvars = self._parse_json(
|
flashvars = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||||
@ -183,8 +147,78 @@ class PornHubIE(InfoExtractor):
|
|||||||
if flashvars:
|
if flashvars:
|
||||||
thumbnail = flashvars.get('image_url')
|
thumbnail = flashvars.get('image_url')
|
||||||
duration = int_or_none(flashvars.get('video_duration'))
|
duration = int_or_none(flashvars.get('video_duration'))
|
||||||
|
media_definitions = flashvars.get('mediaDefinitions')
|
||||||
|
if isinstance(media_definitions, list):
|
||||||
|
for definition in media_definitions:
|
||||||
|
if not isinstance(definition, dict):
|
||||||
|
continue
|
||||||
|
video_url = definition.get('videoUrl')
|
||||||
|
if not video_url or not isinstance(video_url, compat_str):
|
||||||
|
continue
|
||||||
|
if video_url in video_urls_set:
|
||||||
|
continue
|
||||||
|
video_urls_set.add(video_url)
|
||||||
|
video_urls.append(
|
||||||
|
(video_url, int_or_none(definition.get('quality'))))
|
||||||
else:
|
else:
|
||||||
title, thumbnail, duration = [None] * 3
|
thumbnail, duration = [None] * 2
|
||||||
|
|
||||||
|
if not video_urls:
|
||||||
|
tv_webpage = dl_webpage('tv')
|
||||||
|
|
||||||
|
assignments = self._search_regex(
|
||||||
|
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||||
|
'encoded url').split(';')
|
||||||
|
|
||||||
|
js_vars = {}
|
||||||
|
|
||||||
|
def parse_js_value(inp):
|
||||||
|
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||||
|
if '+' in inp:
|
||||||
|
inps = inp.split('+')
|
||||||
|
return functools.reduce(
|
||||||
|
operator.concat, map(parse_js_value, inps))
|
||||||
|
inp = inp.strip()
|
||||||
|
if inp in js_vars:
|
||||||
|
return js_vars[inp]
|
||||||
|
return remove_quotes(inp)
|
||||||
|
|
||||||
|
for assn in assignments:
|
||||||
|
assn = assn.strip()
|
||||||
|
if not assn:
|
||||||
|
continue
|
||||||
|
assn = re.sub(r'var\s+', '', assn)
|
||||||
|
vname, value = assn.split('=', 1)
|
||||||
|
js_vars[vname] = parse_js_value(value)
|
||||||
|
|
||||||
|
video_url = js_vars['mediastring']
|
||||||
|
if video_url not in video_urls_set:
|
||||||
|
video_urls.append((video_url, None))
|
||||||
|
video_urls_set.add(video_url)
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage):
|
||||||
|
video_url = mobj.group('url')
|
||||||
|
if video_url not in video_urls_set:
|
||||||
|
video_urls.append((video_url, None))
|
||||||
|
video_urls_set.add(video_url)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_url, height in video_urls:
|
||||||
|
tbr = None
|
||||||
|
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||||
|
if mobj:
|
||||||
|
if not height:
|
||||||
|
height = int(mobj.group('height'))
|
||||||
|
tbr = int(mobj.group('tbr'))
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
@ -210,7 +244,6 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
@ -219,7 +252,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'dislike_count': dislike_count,
|
'dislike_count': dislike_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
# 'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
247
youtube_dl/extractor/puhutv.py
Normal file
247
youtube_dl/extractor/puhutv.py
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PuhuTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
|
||||||
|
IE_NAME = 'puhutv'
|
||||||
|
_TESTS = [{
|
||||||
|
# film
|
||||||
|
'url': 'https://puhutv.com/sut-kardesler-izle',
|
||||||
|
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5085',
|
||||||
|
'display_id': 'sut-kardesler',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Süt Kardeşler',
|
||||||
|
'description': 'md5:405fd024df916ca16731114eb18e511a',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 4832.44,
|
||||||
|
'creator': 'Arzu Film',
|
||||||
|
'timestamp': 1469778212,
|
||||||
|
'upload_date': '20160729',
|
||||||
|
'release_year': 1976,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['Aile', 'Komedi', 'Klasikler'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode, geo restricted, bypassable with --geo-verification-proxy
|
||||||
|
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# 4k, with subtitles
|
||||||
|
'url': 'https://puhutv.com/dip-1-bolum-izle',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_SUBTITLE_LANGS = {
|
||||||
|
'English': 'en',
|
||||||
|
'Deutsch': 'de',
|
||||||
|
'عربى': 'ar'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
urljoin(url, '/api/slug/%s-izle' % display_id),
|
||||||
|
display_id)['data']
|
||||||
|
|
||||||
|
video_id = compat_str(info['id'])
|
||||||
|
title = info.get('name') or info['title']['name']
|
||||||
|
if info.get('display_name'):
|
||||||
|
title = '%s %s' % (title, info.get('display_name'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://puhutv.com/api/assets/%s/videos' % video_id,
|
||||||
|
display_id, 'Downloading video JSON',
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
raise
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos['data']['videos']:
|
||||||
|
media_url = url_or_none(video.get('url'))
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
playlist = video.get('is_playlist')
|
||||||
|
if video.get('stream_type') == 'hls' and playlist is True:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
quality = int_or_none(video.get('quality'))
|
||||||
|
f = {
|
||||||
|
'url': media_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'height': quality
|
||||||
|
}
|
||||||
|
video_format = video.get('video_format')
|
||||||
|
if video_format == 'hls' and playlist is False:
|
||||||
|
format_id = 'hls'
|
||||||
|
f['protocol'] = 'm3u8_native'
|
||||||
|
elif video_format == 'mp4':
|
||||||
|
format_id = 'http'
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if quality:
|
||||||
|
format_id += '-%sp' % quality
|
||||||
|
f['format_id'] = format_id
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = try_get(
|
||||||
|
info, lambda x: x['title']['description'],
|
||||||
|
compat_str) or info.get('description')
|
||||||
|
timestamp = unified_timestamp(info.get('created_at'))
|
||||||
|
creator = try_get(
|
||||||
|
info, lambda x: x['title']['producer']['name'], compat_str)
|
||||||
|
|
||||||
|
duration = float_or_none(
|
||||||
|
try_get(info, lambda x: x['content']['duration_in_ms'], int),
|
||||||
|
scale=1000)
|
||||||
|
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
|
||||||
|
|
||||||
|
images = try_get(
|
||||||
|
info, lambda x: x['content']['images']['wide'], dict) or {}
|
||||||
|
thumbnails = []
|
||||||
|
for image_id, image_url in images.items():
|
||||||
|
if not isinstance(image_url, compat_str):
|
||||||
|
continue
|
||||||
|
if not image_url.startswith(('http', '//')):
|
||||||
|
image_url = 'https://%s' % image_url
|
||||||
|
t = parse_resolution(image_id)
|
||||||
|
t.update({
|
||||||
|
'id': image_id,
|
||||||
|
'url': image_url
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
|
||||||
|
release_year = try_get(info, lambda x: x['title']['released_at'], int)
|
||||||
|
|
||||||
|
season_number = int_or_none(info.get('season_number'))
|
||||||
|
season_id = str_or_none(info.get('season_id'))
|
||||||
|
episode_number = int_or_none(info.get('episode_number'))
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
|
||||||
|
if not isinstance(genre, dict):
|
||||||
|
continue
|
||||||
|
genre_name = genre.get('name')
|
||||||
|
if genre_name and isinstance(genre_name, compat_str):
|
||||||
|
tags.append(genre_name)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in try_get(
|
||||||
|
info, lambda x: x['content']['subtitles'], list) or []:
|
||||||
|
if not isinstance(subtitle, dict):
|
||||||
|
continue
|
||||||
|
lang = subtitle.get('language')
|
||||||
|
sub_url = url_or_none(subtitle.get('url'))
|
||||||
|
if not lang or not isinstance(lang, compat_str) or not sub_url:
|
||||||
|
continue
|
||||||
|
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||||
|
'url': sub_url
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'season_id': season_id,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'release_year': release_year,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'creator': creator,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'tags': tags,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PuhuTVSerieIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
|
||||||
|
IE_NAME = 'puhutv:serie'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://puhutv.com/deniz-yildizi-detay',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Deniz Yıldızı',
|
||||||
|
'id': 'deniz-yildizi',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 205,
|
||||||
|
}, {
|
||||||
|
# a film detail page which is using same url with serie page
|
||||||
|
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_entries(self, seasons):
|
||||||
|
for season in seasons:
|
||||||
|
season_id = season.get('id')
|
||||||
|
if not season_id:
|
||||||
|
continue
|
||||||
|
page = 1
|
||||||
|
has_more = True
|
||||||
|
while has_more is True:
|
||||||
|
season = self._download_json(
|
||||||
|
'https://galadriel.puhutv.com/seasons/%s' % season_id,
|
||||||
|
season_id, 'Downloading page %s' % page, query={
|
||||||
|
'page': page,
|
||||||
|
'per': 40,
|
||||||
|
})
|
||||||
|
episodes = season.get('episodes')
|
||||||
|
if isinstance(episodes, list):
|
||||||
|
for ep in episodes:
|
||||||
|
slug_path = str_or_none(ep.get('slugPath'))
|
||||||
|
if not slug_path:
|
||||||
|
continue
|
||||||
|
video_id = str_or_none(int_or_none(ep.get('id')))
|
||||||
|
yield self.url_result(
|
||||||
|
'https://puhutv.com/%s' % slug_path,
|
||||||
|
ie=PuhuTVIE.ie_key(), video_id=video_id,
|
||||||
|
video_title=ep.get('name') or ep.get('eventLabel'))
|
||||||
|
page += 1
|
||||||
|
has_more = season.get('hasMore')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
urljoin(url, '/api/slug/%s-detay' % playlist_id),
|
||||||
|
playlist_id)['data']
|
||||||
|
|
||||||
|
seasons = info.get('seasons')
|
||||||
|
if seasons:
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_entries(seasons), playlist_id, info.get('name'))
|
||||||
|
|
||||||
|
# For films, these are using same url with series
|
||||||
|
video_id = info.get('slug') or info['assets'][0]['slug']
|
||||||
|
return self.url_result(
|
||||||
|
'https://puhutv.com/%s-izle' % video_id,
|
||||||
|
PuhuTVIE.ie_key(), video_id)
|
@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
|
|||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
if medias and isinstance(medias, list):
|
if medias and isinstance(medias, list):
|
||||||
for media in medias:
|
for media in medias:
|
||||||
format_url = media.get('videoUrl')
|
format_url = url_or_none(media.get('videoUrl'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
format_id = media.get('quality')
|
format_id = media.get('quality')
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@ -6,6 +6,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
|
|||||||
title = config['title']
|
title = config['title']
|
||||||
formats = []
|
formats = []
|
||||||
for video in config['src']:
|
for video in config['src']:
|
||||||
src = video.get('src')
|
src = url_or_none(video.get('src'))
|
||||||
if not src or not isinstance(src, compat_str):
|
if not src:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(src)
|
ext = determine_ext(src)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
|||||||
break
|
break
|
||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
video_url = result.get('video_url')
|
video_url = url_or_none(result.get('video_url'))
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
entry = self._extract_video(result, require_title=False)
|
entry = self._extract_video(result, require_title=False)
|
||||||
entry.update({
|
entry.update({
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SlutloadIE(InfoExtractor):
|
class SlutloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
_VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||||
'md5': '868309628ba00fd488cf516a113fd717',
|
'md5': '868309628ba00fd488cf516a113fd717',
|
||||||
@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
|
|||||||
'title': 'virginie baisee en cam',
|
'title': 'virginie baisee en cam',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'thumbnail': r're:https?://.*?\.jpg'
|
'thumbnail': r're:https?://.*?\.jpg'
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# mobile site
|
# mobile site
|
||||||
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
|
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
|
embed_page = self._download_webpage(
|
||||||
webpage = self._download_webpage(desktop_url, video_id)
|
'http://www.slutload.com/embed_player/%s' % video_id, video_id,
|
||||||
|
'Downloading embed page', fatal=False)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
if embed_page:
|
||||||
webpage, 'title').strip()
|
def extract(what):
|
||||||
|
return self._html_search_regex(
|
||||||
|
r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
|
||||||
|
embed_page, 'video %s' % what, default=None, group='url')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = extract('url')
|
||||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
if video_url:
|
||||||
webpage, 'video URL')
|
title = self._html_search_regex(
|
||||||
thumbnail = self._html_search_regex(
|
r'<title>([^<]+)', embed_page, 'title', default=video_id)
|
||||||
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
|
return {
|
||||||
webpage, 'thumbnail', fatal=False)
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': extract('preview'),
|
||||||
|
'age_limit': 18
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
webpage = self._download_webpage(
|
||||||
|
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
|
||||||
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'title': video_title,
|
'age_limit': 18,
|
||||||
'thumbnail': thumbnail,
|
})
|
||||||
'age_limit': 18
|
return info
|
||||||
}
|
|
||||||
|
@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
|||||||
|
|
||||||
|
|
||||||
class ThePlatformBaseIE(OnceIE):
|
class ThePlatformBaseIE(OnceIE):
|
||||||
|
_TP_TLD = 'com'
|
||||||
|
|
||||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||||
meta = self._download_xml(
|
meta = self._download_xml(
|
||||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||||
headers=self.geo_verification_headers())
|
headers=self.geo_verification_headers())
|
||||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||||
if error_element is not None and error_element.attrib['src'].startswith(
|
if error_element is not None and error_element.attrib['src'].startswith(
|
||||||
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
|
||||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||||
|
|
||||||
smil_formats = self._parse_smil_formats(
|
smil_formats = self._parse_smil_formats(
|
||||||
@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
|
|||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
def _download_theplatform_metadata(self, path, video_id):
|
def _download_theplatform_metadata(self, path, video_id):
|
||||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
|
||||||
return self._download_json(info_url, video_id)
|
return self._download_json(info_url, video_id)
|
||||||
|
|
||||||
def _parse_theplatform_metadata(self, info):
|
def _parse_theplatform_metadata(self, info):
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
update_url_query,
|
update_url_query,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
|
|||||||
subtitles = {}
|
subtitles = {}
|
||||||
for source in video_data.findall('closedCaptions/source'):
|
for source in video_data.findall('closedCaptions/source'):
|
||||||
for track in source.findall('track'):
|
for track in source.findall('track'):
|
||||||
track_url = track.get('url')
|
track_url = url_or_none(track.get('url'))
|
||||||
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
|
if not track_url or track_url.endswith('/big'):
|
||||||
continue
|
continue
|
||||||
lang = track.get('lang') or track.get('label') or 'en'
|
lang = track.get('lang') or track.get('label') or 'en'
|
||||||
subtitles.setdefault(lang, []).append({
|
subtitles.setdefault(lang, []).append({
|
||||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
|
|||||||
for stream in self._download_json(data_file, video_id):
|
for stream in self._download_json(data_file, video_id):
|
||||||
if not isinstance(stream, dict):
|
if not isinstance(stream, dict):
|
||||||
continue
|
continue
|
||||||
stream_url = stream.get('url')
|
stream_url = url_or_none(stream.get('url'))
|
||||||
if (stream_url in stream_urls or not stream_url or
|
if stream_url in stream_urls or not stream_url:
|
||||||
not isinstance(stream_url, compat_str)):
|
|
||||||
continue
|
continue
|
||||||
stream_urls.add(stream_url)
|
stream_urls.add(stream_url)
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
@ -19,6 +19,7 @@ from ..utils import (
|
|||||||
try_get,
|
try_get,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -255,7 +256,8 @@ class TVPlayIE(InfoExtractor):
|
|||||||
quality = qualities(['hls', 'medium', 'high'])
|
quality = qualities(['hls', 'medium', 'high'])
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, video_url in streams.get('streams', {}).items():
|
for format_id, video_url in streams.get('streams', {}).items():
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
video_url = url_or_none(video_url)
|
||||||
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
|
@ -27,6 +27,7 @@ from ..utils import (
|
|||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -663,8 +664,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
|||||||
for option in status['quality_options']:
|
for option in status['quality_options']:
|
||||||
if not isinstance(option, dict):
|
if not isinstance(option, dict):
|
||||||
continue
|
continue
|
||||||
source = option.get('source')
|
source = url_or_none(option.get('source'))
|
||||||
if not source or not isinstance(source, compat_str):
|
if not source:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source,
|
'url': source,
|
||||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
|
|||||||
if not isinstance(source_list, list):
|
if not isinstance(source_list, list):
|
||||||
return
|
return
|
||||||
for source in source_list:
|
for source in source_list:
|
||||||
video_url = source.get('file') or source.get('src')
|
video_url = url_or_none(source.get('file') or source.get('src'))
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
|
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
if track.get('kind') != 'captions':
|
if track.get('kind') != 'captions':
|
||||||
continue
|
continue
|
||||||
src = track.get('src')
|
src = url_or_none(track.get('src'))
|
||||||
if not src or not isinstance(src, compat_str):
|
if not src:
|
||||||
continue
|
continue
|
||||||
lang = track.get('language') or track.get(
|
lang = track.get('language') or track.get(
|
||||||
'srclang') or track.get('label')
|
'srclang') or track.get('label')
|
||||||
@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
|
|||||||
for cc in captions:
|
for cc in captions:
|
||||||
if not isinstance(cc, dict):
|
if not isinstance(cc, dict):
|
||||||
continue
|
continue
|
||||||
cc_url = cc.get('url')
|
cc_url = url_or_none(cc.get('url'))
|
||||||
if not cc_url or not isinstance(cc_url, compat_str):
|
if not cc_url:
|
||||||
continue
|
continue
|
||||||
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||||
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||||
|
@ -3,15 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_HTTPError
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video.get('formats', []):
|
for f in video.get('formats', []):
|
||||||
format_url = f.get('uri')
|
format_url = url_or_none(f.get('uri'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
format_type = f.get('type')
|
format_type = f.get('type')
|
||||||
if format_type == 'dash':
|
if format_type == 'dash':
|
||||||
|
@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
# We try to find out to which variable is assigned the config dic
|
# We try to find out to which variable is assigned the config dic
|
||||||
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
|
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
|
||||||
if m_variable_name is not None:
|
if m_variable_name is not None:
|
||||||
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
|
||||||
else:
|
else:
|
||||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||||
|
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
|
||||||
config = self._search_regex(config_re, webpage, 'info section',
|
config = self._search_regex(config_re, webpage, 'info section',
|
||||||
flags=re.DOTALL)
|
flags=re.DOTALL)
|
||||||
config = json.loads(config)
|
config = json.loads(config)
|
||||||
|
@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
|
|||||||
'skip': 'Geo-restricted to Hong Kong',
|
'skip': 'Geo-restricted to Hong Kong',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_AREA_ID = {
|
||||||
|
'HK': 1,
|
||||||
|
'SG': 2,
|
||||||
|
'TH': 4,
|
||||||
|
'PH': 5,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'r': 'vod/ajax-detail',
|
||||||
|
'platform_flag_label': 'web',
|
||||||
|
'product_id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
area_id = self._AREA_ID.get(country_code.upper())
|
||||||
|
if area_id:
|
||||||
|
query['area_id'] = area_id
|
||||||
|
|
||||||
product_data = self._download_json(
|
product_data = self._download_json(
|
||||||
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
||||||
'Downloading video info', query={
|
'Downloading video info', query=query)['data']
|
||||||
'r': 'vod/ajax-detail',
|
|
||||||
'platform_flag_label': 'web',
|
|
||||||
'product_id': video_id,
|
|
||||||
})['data']
|
|
||||||
|
|
||||||
video_data = product_data.get('current_product')
|
video_data = product_data.get('current_product')
|
||||||
if not video_data:
|
if not video_data:
|
||||||
@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
|
|||||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||||
video_id, 'Downloading stream info', query={
|
video_id, 'Downloading stream info', query={
|
||||||
'ccs_product_id': video_data['ccs_product_id'],
|
'ccs_product_id': video_data['ccs_product_id'],
|
||||||
|
}, headers={
|
||||||
|
'Referer': url,
|
||||||
|
'Origin': re.search(r'https?://[^/]+', url).group(0),
|
||||||
})['data']['stream']
|
})['data']['stream']
|
||||||
|
|
||||||
stream_sizes = stream_data.get('size', {})
|
stream_sizes = stream_data.get('size', {})
|
||||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
@ -423,7 +424,8 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in data.items():
|
for format_id, format_url in data.items():
|
||||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
|
||||||
continue
|
continue
|
||||||
if (format_id.startswith(('url', 'cache')) or
|
if (format_id.startswith(('url', 'cache')) or
|
||||||
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
|
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
|
||||||
|
@ -67,11 +67,12 @@ class WatchBoxIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
source = self._parse_json(
|
source = (self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
|
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||||
default='{}'),
|
default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
video_id, transform_source=js_to_json,
|
||||||
|
fatal=False) or {}).get('source') or {}
|
||||||
|
|
||||||
video_id = compat_str(source.get('videoId') or video_id)
|
video_id = compat_str(source.get('videoId') or video_id)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
format_url = format_item
|
format_url = format_item
|
||||||
filesize = None
|
filesize = None
|
||||||
if not isinstance(format_url, compat_str):
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%s-%s' % (format_id, quality),
|
'format_id': '%s-%s' % (format_id, quality),
|
||||||
@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
default='{}'),
|
default='{}'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
for format_id, format_url in sources.items():
|
for format_id, format_url in sources.items():
|
||||||
if not isinstance(format_url, compat_str):
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
if format_url in format_urls:
|
if format_url in format_urls:
|
||||||
continue
|
continue
|
||||||
|
@ -4,12 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for format_id in QUALITIES:
|
for format_id in QUALITIES:
|
||||||
is_hd = format_id == 'hd'
|
is_hd = format_id == 'hd'
|
||||||
format_url = playlist.get(
|
format_url = url_or_none(playlist.get(
|
||||||
'file%s' % ('_hd' if is_hd else ''))
|
'file%s' % ('_hd' if is_hd else '')))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -3,11 +3,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
|
|||||||
for encoding in encodings:
|
for encoding in encodings:
|
||||||
if not isinstance(encoding, dict):
|
if not isinstance(encoding, dict):
|
||||||
continue
|
continue
|
||||||
format_url = encoding.get('filename')
|
format_url = url_or_none(encoding.get('filename'))
|
||||||
if not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
if determine_ext(format_url) == 'm3u8':
|
if determine_ext(format_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
from ..aes import aes_decrypt_text
|
from ..aes import aes_decrypt_text
|
||||||
|
|
||||||
@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
|
|||||||
for definition in definitions:
|
for definition in definitions:
|
||||||
if not isinstance(definition, dict):
|
if not isinstance(definition, dict):
|
||||||
continue
|
continue
|
||||||
video_url = definition.get('videoUrl')
|
video_url = url_or_none(definition.get('videoUrl'))
|
||||||
if isinstance(video_url, compat_str) and video_url:
|
if video_url:
|
||||||
links.append(video_url)
|
links.append(video_url)
|
||||||
|
|
||||||
# Fallback #1, this also contains extra low quality 180p format
|
# Fallback #1, this also contains extra low quality 180p format
|
||||||
|
@ -178,13 +178,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
warn('Unable to extract result entry')
|
warn('Unable to extract result entry')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
tfa = try_get(res, lambda x: x[0][0], list)
|
login_challenge = try_get(res, lambda x: x[0][0], list)
|
||||||
if tfa:
|
if login_challenge:
|
||||||
tfa_str = try_get(tfa, lambda x: x[2], compat_str)
|
challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
|
||||||
if tfa_str == 'TWO_STEP_VERIFICATION':
|
if challenge_str == 'TWO_STEP_VERIFICATION':
|
||||||
# SEND_SUCCESS - TFA code has been successfully sent to phone
|
# SEND_SUCCESS - TFA code has been successfully sent to phone
|
||||||
# QUOTA_EXCEEDED - reached the limit of TFA codes
|
# QUOTA_EXCEEDED - reached the limit of TFA codes
|
||||||
status = try_get(tfa, lambda x: x[5], compat_str)
|
status = try_get(login_challenge, lambda x: x[5], compat_str)
|
||||||
if status == 'QUOTA_EXCEEDED':
|
if status == 'QUOTA_EXCEEDED':
|
||||||
warn('Exceeded the limit of TFA codes, try later')
|
warn('Exceeded the limit of TFA codes, try later')
|
||||||
return False
|
return False
|
||||||
@ -228,6 +228,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
check_cookie_url = try_get(
|
check_cookie_url = try_get(
|
||||||
tfa_results, lambda x: x[0][-1][2], compat_str)
|
tfa_results, lambda x: x[0][-1][2], compat_str)
|
||||||
|
else:
|
||||||
|
CHALLENGES = {
|
||||||
|
'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
|
||||||
|
'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
|
||||||
|
'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
|
||||||
|
}
|
||||||
|
challenge = CHALLENGES.get(
|
||||||
|
challenge_str,
|
||||||
|
'%s returned error %s.' % (self.IE_NAME, challenge_str))
|
||||||
|
warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
check_cookie_url = try_get(res, lambda x: x[2], compat_str)
|
check_cookie_url = try_get(res, lambda x: x[2], compat_str)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
|
|||||||
for watch in watch_urls:
|
for watch in watch_urls:
|
||||||
if not isinstance(watch, dict):
|
if not isinstance(watch, dict):
|
||||||
continue
|
continue
|
||||||
watch_url = watch.get('url')
|
watch_url = url_or_none(watch.get('url'))
|
||||||
if not watch_url or not isinstance(watch_url, compat_str):
|
if not watch_url:
|
||||||
continue
|
continue
|
||||||
format_id_list = [stream_type]
|
format_id_list = [stream_type]
|
||||||
maxrate = watch.get('maxrate')
|
maxrate = watch.get('maxrate')
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
def _extract_subtitles(src):
|
def _extract_subtitles(src):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption in try_get(src, lambda x: x['captions'], list) or []:
|
for caption in try_get(src, lambda x: x['captions'], list) or []:
|
||||||
subtitle_url = caption.get('uri')
|
subtitle_url = url_or_none(caption.get('uri'))
|
||||||
if subtitle_url and isinstance(subtitle_url, compat_str):
|
if subtitle_url:
|
||||||
lang = caption.get('language', 'deu')
|
lang = caption.get('language', 'deu')
|
||||||
subtitles.setdefault(lang, []).append({
|
subtitles.setdefault(lang, []).append({
|
||||||
'url': subtitle_url,
|
'url': subtitle_url,
|
||||||
@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _extract_format(self, video_id, formats, format_urls, meta):
|
def _extract_format(self, video_id, formats, format_urls, meta):
|
||||||
format_url = meta.get('url')
|
format_url = url_or_none(meta.get('url'))
|
||||||
if not format_url or not isinstance(format_url, compat_str):
|
if not format_url:
|
||||||
return
|
return
|
||||||
if format_url in format_urls:
|
if format_url in format_urls:
|
||||||
return
|
return
|
||||||
@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
content, lambda x: x['teaserImageRef']['layouts'], dict)
|
content, lambda x: x['teaserImageRef']['layouts'], dict)
|
||||||
if layouts:
|
if layouts:
|
||||||
for layout_key, layout_url in layouts.items():
|
for layout_key, layout_url in layouts.items():
|
||||||
if not isinstance(layout_url, compat_str):
|
layout_url = url_or_none(layout_url)
|
||||||
|
if not layout_url:
|
||||||
continue
|
continue
|
||||||
thumbnail = {
|
thumbnail = {
|
||||||
'url': layout_url,
|
'url': layout_url,
|
||||||
|
@ -184,6 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
|
|||||||
])
|
])
|
||||||
|
|
||||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||||
|
JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
||||||
|
|
||||||
|
|
||||||
def preferredencoding():
|
def preferredencoding():
|
||||||
@ -1900,6 +1901,13 @@ def strip_or_none(v):
|
|||||||
return None if v is None else v.strip()
|
return None if v is None else v.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def url_or_none(url):
|
||||||
|
if not url or not isinstance(url, compat_str):
|
||||||
|
return None
|
||||||
|
url = url.strip()
|
||||||
|
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
|
||||||
|
|
||||||
|
|
||||||
def parse_duration(s):
|
def parse_duration(s):
|
||||||
if not isinstance(s, compat_basestring):
|
if not isinstance(s, compat_basestring):
|
||||||
return None
|
return None
|
||||||
@ -2316,7 +2324,7 @@ def parse_age_limit(s):
|
|||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'''(?sx)^
|
r'''(?sx)^
|
||||||
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
|
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
|
||||||
(?:\s*&&\s*(?P=func_name))?
|
(?:\s*&&\s*(?P=func_name))?
|
||||||
\s*\(\s*(?P<callback_data>.*)\);?
|
\s*\(\s*(?P<callback_data>.*)\);?
|
||||||
\s*?(?://[^\n]*)*$''',
|
\s*?(?://[^\n]*)*$''',
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.06.25'
|
__version__ = '2018.07.21'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user