mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-04 02:22:52 +08:00
commit
8f3d795ecd
16
.github/ISSUE_TEMPLATE.md
vendored
16
.github/ISSUE_TEMPLATE.md
vendored
@ -1,16 +1,16 @@
|
|||||||
## Please follow the guide below
|
## Please follow the guide below
|
||||||
|
|
||||||
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
|
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
|
||||||
- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
|
- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
|
||||||
- Use *Preview* tab to see how your issue will actually look like
|
- Use the *Preview* tab to see what your issue will actually look like
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.01**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
|
||||||
|
|
||||||
### What is the purpose of your *issue*?
|
### What is the purpose of your *issue*?
|
||||||
@ -28,14 +28,14 @@
|
|||||||
|
|
||||||
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
|
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
|
||||||
|
|
||||||
Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
|
Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
|
||||||
|
|
||||||
```
|
```
|
||||||
$ youtube-dl -v <your command line>
|
|
||||||
[debug] System config: []
|
[debug] System config: []
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.07.23
|
[debug] youtube-dl version 2017.10.01
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -22,6 +22,7 @@ cover/
|
|||||||
updates_key.pem
|
updates_key.pem
|
||||||
*.egg-info
|
*.egg-info
|
||||||
*.srt
|
*.srt
|
||||||
|
*.ttml
|
||||||
*.sbv
|
*.sbv
|
||||||
*.vtt
|
*.vtt
|
||||||
*.flv
|
*.flv
|
||||||
|
8
AUTHORS
8
AUTHORS
@ -223,3 +223,11 @@ Jan Kundrát
|
|||||||
Giuseppe Fabiano
|
Giuseppe Fabiano
|
||||||
Örn Guðjónsson
|
Örn Guðjónsson
|
||||||
Parmjit Virk
|
Parmjit Virk
|
||||||
|
Genki Sky
|
||||||
|
Ľuboš Katrinec
|
||||||
|
Corey Nicholson
|
||||||
|
Ashutosh Chaudhary
|
||||||
|
John Dong
|
||||||
|
Tatsuyuki Ishi
|
||||||
|
Daniel Weber
|
||||||
|
Kay Bouché
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
$ youtube-dl -v <your command line>
|
$ youtube-dl -v <your command line>
|
||||||
[debug] System config: []
|
[debug] System config: []
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2015.12.06
|
[debug] youtube-dl version 2015.12.06
|
||||||
[debug] Git HEAD: 135392e
|
[debug] Git HEAD: 135392e
|
||||||
@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
|
|||||||
|
|
||||||
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
|
|||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||||
|
|
||||||
@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
|
||||||
* python
|
* python
|
||||||
@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
class YourExtractorIE(InfoExtractor):
|
class YourExtractorIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'https://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
|
259
ChangeLog
259
ChangeLog
@ -1,8 +1,263 @@
|
|||||||
version <unreleased>
|
version 2017.10.01
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
* [YoutubeDL] Document youtube_include_dash_manifest
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tvp] Add support for new URL schema (#14368)
|
||||||
|
+ [generic] Add support for single format Video.js embeds (#14371)
|
||||||
|
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||||
|
* [yahoo] Use extracted brightcove account id (#14210)
|
||||||
|
* [rtve:alacarta] Fix extraction (#14290)
|
||||||
|
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
||||||
|
+ [generic] Add support for Video.js embeds
|
||||||
|
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||||
|
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||||
|
* [xhamsterembed] Fix extraction (#14308)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [options] Accept lrc as a subtitle conversion target format (#14292)
|
||||||
|
* [utils] Fix handling raw TTML subtitles (#14191)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [24video] Fix timestamp extraction and make non fatal (#14295)
|
||||||
|
+ [24video] Add support for 24video.adult (#14295)
|
||||||
|
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
|
||||||
|
+ [twitter] Add support for URLs without user id (#14270)
|
||||||
|
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
|
||||||
|
#13996)
|
||||||
|
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
|
||||||
|
* [mixcloud] Fix extraction (#14088, #14132)
|
||||||
|
+ [lynda] Add support for educourse.ga (#14286)
|
||||||
|
* [beeg] Fix extraction (#14275)
|
||||||
|
* [nbcsports:vplayer] Correct theplatform URL (#13873)
|
||||||
|
* [twitter] Fix duration extraction (#14141)
|
||||||
|
* [tvplay] Bypass geo restriction
|
||||||
|
+ [heise] Add support for YouTube embeds (#14109)
|
||||||
|
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
|
||||||
|
* [viki] Update app data (#14181)
|
||||||
|
* [morningstar] Relax URL regular expression (#14222)
|
||||||
|
* [openload] Fix extraction (#14225, #14257)
|
||||||
|
* [noovo] Fix extraction (#14214)
|
||||||
|
* [dailymotion:playlist] Relax URL regular expression (#14219)
|
||||||
|
+ [twitch] Add support for go.twitch.tv URLs (#14215)
|
||||||
|
* [vgtv] Relax URL regular expression (#14223)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
|
||||||
|
(#13731)
|
||||||
|
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [condenast] Fix extraction (#14196, #14207)
|
||||||
|
+ [orf] Add support for f4m stories
|
||||||
|
* [tv4] Relax URL regular expression (#14206)
|
||||||
|
* [animeondemand] Bypass geo restriction
|
||||||
|
+ [animeondemand] Add support for flash videos (#9944)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rutube:playlist] Fix suitable (#14166)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce bool_or_none
|
||||||
|
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [fox] Fix extraction (#14147)
|
||||||
|
* [rutube] Use bool_or_none
|
||||||
|
* [rutube] Rework and generalize playlist extractors (#13565)
|
||||||
|
+ [rutube:playlist] Add support for playlists (#13534, #13565)
|
||||||
|
+ [radiocanada] Add fallback for title extraction (#14145)
|
||||||
|
* [vk] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [vice] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [cracked] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [chilloutzone] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [abcnews] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [youtube] Separate methods for embeds extraction
|
||||||
|
* [redtube] Fix formats extraction (#14122)
|
||||||
|
* [arte] Relax unavailability check (#14112)
|
||||||
|
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
|
||||||
|
* [vidme:user] Relax URL regular expression (#14054)
|
||||||
|
* [bpb] Fix extraction (#14043, #14086)
|
||||||
|
* [soundcloud] Fix download URL with private tracks (#14093)
|
||||||
|
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
|
||||||
|
* [viidea] Capture and output lecture error message (#14099)
|
||||||
|
* [radiocanada] Skip unsupported platforms (#14100)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
|
||||||
|
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
|
||||||
|
* [youtube] Fix upload date extraction (#14065)
|
||||||
|
+ [charlierose] Add support for episodes (#14062)
|
||||||
|
+ [bbccouk] Add support for w-prefixed ids (#14056)
|
||||||
|
* [googledrive] Extend URL regular expression (#9785)
|
||||||
|
+ [googledrive] Add support for source format (#14046)
|
||||||
|
* [pornhd] Fix extraction (#14005)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
|
||||||
|
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
|
||||||
|
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
|
||||||
|
#8625, #9483)
|
||||||
|
* Simplify code and split into separate routines to facilitate maintaining
|
||||||
|
* Make retry mechanism work on errors during actual download not only
|
||||||
|
during connection establishment phase
|
||||||
|
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
|
||||||
|
* Retry on content too short
|
||||||
|
* Show error description on retry
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [generic] Lower preference for extraction from LD-JSON
|
||||||
|
* [rai] Fix audio formats extraction (#14024)
|
||||||
|
* [youtube] Fix controversy videos extraction (#14027, #14029)
|
||||||
|
* [mixcloud] Fix extraction (#14015, #14020)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Introduce _parse_xml
|
||||||
|
* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
|
||||||
|
non fatal (#13970)
|
||||||
|
* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [cbc:watch] Bypass geo restriction (#13993)
|
||||||
|
* [toutv] Relax DRM check (#13994)
|
||||||
|
+ [googledrive] Add support for subtitles (#13619, #13638)
|
||||||
|
* [pornhub] Relax uploader regular expression (#13906, #13975)
|
||||||
|
* [bandcamp:album] Extract track titles (#13962)
|
||||||
|
+ [bbccouk] Add support for events URLs (#13893)
|
||||||
|
+ [liveleak] Support multi-video pages (#6542)
|
||||||
|
+ [liveleak] Support another liveleak embedding pattern (#13336)
|
||||||
|
* [cda] Fix extraction (#13935)
|
||||||
|
+ [laola1tv] Add support for tv.ittf.com (#13965)
|
||||||
|
* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Sanitize byte string format URLs (#13951)
|
||||||
|
+ [extractor/common] Add support for float durations in _parse_mpd_formats
|
||||||
|
(#13919)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [arte] Detect unavailable videos (#13945)
|
||||||
|
* [generic] Convert redirect URLs to unicode strings (#13951)
|
||||||
|
* [udemy] Fix paid course detection (#13943)
|
||||||
|
* [pluralsight] Use RPC API for course extraction (#13937)
|
||||||
|
+ [clippit] Add support for clippituser.tv
|
||||||
|
+ [qqmusic] Support new URL schemes (#13805)
|
||||||
|
* [periscope] Renew HLS extraction (#13917)
|
||||||
|
* [mixcloud] Extract decrypt key
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.13
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Make sure format id is not empty
|
||||||
|
* [extractor/common] Make _family_friendly_search optional
|
||||||
|
* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
|
||||||
|
+ [fourtube] Add support pornerbros.com (#6022)
|
||||||
|
+ [fourtube] Add support porntube.com (#7859, #13901)
|
||||||
|
+ [fourtube] Add support fux.com
|
||||||
|
* [limelight] Improve embeds detection (#13895)
|
||||||
|
+ [reddit] Add support for v.redd.it and reddit.com (#13847)
|
||||||
|
* [aparat] Extract all formats (#13887)
|
||||||
|
* [mixcloud] Fix play info decryption (#13885)
|
||||||
|
+ [generic] Add support for vzaar embeds (#13876)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Skip missing params in cli_bool_option (#13865)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [xxxymovies] Fix title extraction (#13868)
|
||||||
|
+ [nick] Add support for nick.com.pl (#13860)
|
||||||
|
* [mixcloud] Fix play info decryption (#13867)
|
||||||
|
* [20min] Fix embeds extraction (#13852)
|
||||||
|
* [dplayit] Fix extraction (#13851)
|
||||||
|
+ [niconico] Support videos with multiple formats (#13522)
|
||||||
|
+ [niconico] Support HTML5-only videos (#13806)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Use relative paths for DASH fragments (#12990)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pluralsight] Fix format selection
|
||||||
|
- [mpora] Remove extractor (#13826)
|
||||||
|
+ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
|
||||||
|
* [vlive:channel] Limit number of videos per page to 100 (#13830)
|
||||||
|
* [podomatic] Extend URL regular expression (#13827)
|
||||||
|
* [cinchcast] Extend URL regular expression
|
||||||
|
* [yandexdisk] Relax URL regular expression (#13824)
|
||||||
|
* [vidme] Extract DASH and HLS formats
|
||||||
|
- [teamfour] Remove extractor (#13782)
|
||||||
|
* [pornhd] Fix extraction (#13783)
|
||||||
|
* [udemy] Fix subtitles extraction (#13812)
|
||||||
|
* [mlb] Extend URL regular expression (#13740, #13773)
|
||||||
|
+ [pbs] Add support for new URL schema (#13801)
|
||||||
|
* [nrktv] Update API host (#13796)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.07.30.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/hls] Use redirect URL as manifest base (#13755)
|
||||||
* [options] Correctly hide login info from debug outputs (#13696)
|
* [options] Correctly hide login info from debug outputs (#13696)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [watchbox] Add support for watchbox.de (#13739)
|
||||||
|
- [clipfish] Remove extractor
|
||||||
|
+ [youjizz] Fix extraction (#13744)
|
||||||
|
+ [generic] Add support for another ooyala embed pattern (#13727)
|
||||||
|
+ [ard] Add support for lives (#13771)
|
||||||
|
* [soundcloud] Update client id
|
||||||
|
+ [soundcloud:trackstation] Add support for track stations (#13733)
|
||||||
|
* [svtplay] Use geo verification proxy for API request
|
||||||
|
* [svtplay] Update API URL (#13767)
|
||||||
|
+ [yandexdisk] Add support for yadi.sk (#13755)
|
||||||
|
+ [megaphone] Add support for megaphone.fm
|
||||||
|
* [amcnetworks] Make rating optional (#12453)
|
||||||
|
* [cloudy] Fix extraction (#13737)
|
||||||
|
+ [nickru] Add support for nickelodeon.ru
|
||||||
|
* [mtv] Improve thumbnal extraction
|
||||||
|
* [nick] Automate geo-restriction bypass (#13711)
|
||||||
|
* [niconico] Improve error reporting (#13696)
|
||||||
|
|
||||||
|
|
||||||
version 2017.07.23
|
version 2017.07.23
|
||||||
|
|
||||||
@ -24,7 +279,7 @@ Extractors
|
|||||||
* [youku:show] Fix playlist extraction (#13248)
|
* [youku:show] Fix playlist extraction (#13248)
|
||||||
+ [dispeak] Recognize sevt subdomain (#13276)
|
+ [dispeak] Recognize sevt subdomain (#13276)
|
||||||
* [adn] Improve error reporting (#13663)
|
* [adn] Improve error reporting (#13663)
|
||||||
* [crunchyroll] Relax series and season regex (#13659)
|
* [crunchyroll] Relax series and season regular expression (#13659)
|
||||||
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
|
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
|
||||||
+ [nexx:embed] Add support for iframe embeds
|
+ [nexx:embed] Add support for iframe embeds
|
||||||
* [nexx] Improve JS embed extraction
|
* [nexx] Improve JS embed extraction
|
||||||
|
11
Makefile
11
Makefile
@ -46,8 +46,15 @@ tar: youtube-dl.tar.gz
|
|||||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||||
|
|
||||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
mkdir -p zip
|
||||||
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
|
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
||||||
|
mkdir -p zip/$$d ;\
|
||||||
|
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||||
|
done
|
||||||
|
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
||||||
|
mv zip/youtube_dl/__main__.py zip/
|
||||||
|
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||||
|
rm -rf zip
|
||||||
echo '#!$(PYTHON)' > youtube-dl
|
echo '#!$(PYTHON)' > youtube-dl
|
||||||
cat youtube-dl.zip >> youtube-dl
|
cat youtube-dl.zip >> youtube-dl
|
||||||
rm youtube-dl.zip
|
rm youtube-dl.zip
|
||||||
|
62
README.md
62
README.md
@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
|
|||||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||||
|
|
||||||
You can also use pip:
|
You can also use pip:
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ You can also use pip:
|
|||||||
|
|
||||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||||
|
|
||||||
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
|
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||||
|
|
||||||
brew install youtube-dl
|
brew install youtube-dl
|
||||||
|
|
||||||
@ -426,7 +426,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
syntax. Example: --exec 'adb push {}
|
syntax. Example: --exec 'adb push {}
|
||||||
/sdcard/Music/ && rm {}'
|
/sdcard/Music/ && rm {}'
|
||||||
--convert-subs FORMAT Convert the subtitles to other format
|
--convert-subs FORMAT Convert the subtitles to other format
|
||||||
(currently supported: srt|ass|vtt)
|
(currently supported: srt|ass|vtt|lrc)
|
||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
@ -457,7 +457,7 @@ You can also use `--config-location` if you want to use custom configuration fil
|
|||||||
|
|
||||||
### Authentication with `.netrc` file
|
### Authentication with `.netrc` file
|
||||||
|
|
||||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||||
```
|
```
|
||||||
touch $HOME/.netrc
|
touch $HOME/.netrc
|
||||||
chmod a-rwx,u+rw $HOME/.netrc
|
chmod a-rwx,u+rw $HOME/.netrc
|
||||||
@ -484,7 +484,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
|||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||||
|
|
||||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||||
|
|
||||||
- `id` (string): Video identifier
|
- `id` (string): Video identifier
|
||||||
- `title` (string): Video title
|
- `title` (string): Video title
|
||||||
@ -599,7 +599,7 @@ If you are using an output template inside a Windows batch file then you must es
|
|||||||
|
|
||||||
#### Output template examples
|
#### Output template examples
|
||||||
|
|
||||||
Note on Windows you may need to use double quotes instead of single.
|
Note that on Windows you may need to use double quotes instead of single.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||||
@ -618,7 +618,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
|
|||||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||||
|
|
||||||
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||||
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||||
|
|
||||||
# Stream the video being downloaded to stdout
|
# Stream the video being downloaded to stdout
|
||||||
$ youtube-dl -o - BaW_jenozKc
|
$ youtube-dl -o - BaW_jenozKc
|
||||||
@ -686,7 +686,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
|
|||||||
|
|
||||||
#### Format selection examples
|
#### Format selection examples
|
||||||
|
|
||||||
Note on Windows you may need to use double quotes instead of single.
|
Note that on Windows you may need to use double quotes instead of single.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Download best mp4 format available or any other best if no mp4 available
|
# Download best mp4 format available or any other best if no mp4 available
|
||||||
@ -731,17 +731,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
|
|||||||
|
|
||||||
### How do I update youtube-dl?
|
### How do I update youtube-dl?
|
||||||
|
|
||||||
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
||||||
|
|
||||||
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
||||||
|
|
||||||
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
||||||
|
|
||||||
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
||||||
|
|
||||||
sudo apt-get remove -y youtube-dl
|
sudo apt-get remove -y youtube-dl
|
||||||
|
|
||||||
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
|
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
@ -781,11 +781,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
|
|||||||
|
|
||||||
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
|
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
|
||||||
|
|
||||||
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
|
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
|
||||||
|
|
||||||
### I have downloaded a video but how can I play it?
|
### I have downloaded a video but how can I play it?
|
||||||
|
|
||||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
|
||||||
|
|
||||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
|
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
|
||||||
|
|
||||||
@ -860,10 +860,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
|
|||||||
|
|
||||||
### How do I download a video starting with a `-`?
|
### How do I download a video starting with a `-`?
|
||||||
|
|
||||||
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
||||||
|
|
||||||
youtube-dl -- -wNyEUrxzFU
|
youtube-dl -- -wNyEUrxzFU
|
||||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||||
|
|
||||||
### How do I pass cookies to youtube-dl?
|
### How do I pass cookies to youtube-dl?
|
||||||
|
|
||||||
@ -877,9 +877,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula
|
|||||||
|
|
||||||
### How do I stream directly to media player?
|
### How do I stream directly to media player?
|
||||||
|
|
||||||
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
|
||||||
|
|
||||||
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||||
|
|
||||||
### How do I download only new videos from a playlist?
|
### How do I download only new videos from a playlist?
|
||||||
|
|
||||||
@ -899,7 +899,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
|
|||||||
|
|
||||||
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
||||||
|
|
||||||
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
||||||
|
|
||||||
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
||||||
|
|
||||||
@ -925,7 +925,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
|
|||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
@ -939,7 +939,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
|
|||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||||
|
|
||||||
@ -951,6 +951,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
|
||||||
* python
|
* python
|
||||||
@ -987,7 +989,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
class YourExtractorIE(InfoExtractor):
|
class YourExtractorIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'https://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
@ -1018,10 +1020,10 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
@ -1177,10 +1179,10 @@ import youtube_dl
|
|||||||
|
|
||||||
ydl_opts = {}
|
ydl_opts = {}
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
```
|
```
|
||||||
|
|
||||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||||
|
|
||||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||||
|
|
||||||
@ -1216,19 +1218,19 @@ ydl_opts = {
|
|||||||
'progress_hooks': [my_hook],
|
'progress_hooks': [my_hook],
|
||||||
}
|
}
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
```
|
```
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||||
|
|
||||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||||
```
|
```
|
||||||
$ youtube-dl -v <your command line>
|
$ youtube-dl -v <your command line>
|
||||||
[debug] System config: []
|
[debug] System config: []
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2015.12.06
|
[debug] youtube-dl version 2015.12.06
|
||||||
[debug] Git HEAD: 135392e
|
[debug] Git HEAD: 135392e
|
||||||
@ -1259,7 +1261,7 @@ For bug reports, this means that your report should contain the *complete* outpu
|
|||||||
|
|
||||||
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_testcases
|
from test.helper import gettestcases
|
||||||
from youtube_dl.utils import compat_urllib_parse_urlparse
|
from youtube_dl.utils import compat_urllib_parse_urlparse
|
||||||
from youtube_dl.utils import compat_urllib_request
|
from youtube_dl.utils import compat_urllib_request
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ if len(sys.argv) > 1:
|
|||||||
else:
|
else:
|
||||||
METHOD = 'EURISTIC'
|
METHOD = 'EURISTIC'
|
||||||
|
|
||||||
for test in get_testcases():
|
for test in gettestcases():
|
||||||
if METHOD == 'EURISTIC':
|
if METHOD == 'EURISTIC':
|
||||||
try:
|
try:
|
||||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
|
@ -38,10 +38,12 @@
|
|||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
- **afreecatv:global**: afreecatv.com
|
- **afreecatv:global**: afreecatv.com
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
|
- **AliExpressLive**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
- **AMCNetworks**
|
- **AMCNetworks**
|
||||||
|
- **AmericasTestKitchen**
|
||||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **AnimeOnDemand**
|
- **AnimeOnDemand**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
@ -155,8 +157,8 @@
|
|||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **CJSW**
|
- **CJSW**
|
||||||
- **Clipfish**
|
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
|
- **Clippit**
|
||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
- **Clipsyndicate**
|
- **Clipsyndicate**
|
||||||
- **CloserToTruth**
|
- **CloserToTruth**
|
||||||
@ -295,6 +297,7 @@
|
|||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **FunnyOrDie**
|
- **FunnyOrDie**
|
||||||
- **Fusion**
|
- **Fusion**
|
||||||
|
- **Fux**
|
||||||
- **FXNetworks**
|
- **FXNetworks**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **GameOne**
|
- **GameOne**
|
||||||
@ -362,6 +365,7 @@
|
|||||||
- **IPrima**
|
- **IPrima**
|
||||||
- **iqiyi**: 爱奇艺
|
- **iqiyi**: 爱奇艺
|
||||||
- **Ir90Tv**
|
- **Ir90Tv**
|
||||||
|
- **ITTF**
|
||||||
- **ITV**
|
- **ITV**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
@ -375,6 +379,7 @@
|
|||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
|
- **Kakao**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
- **Kamcord**
|
- **Kamcord**
|
||||||
- **KanalPlay**: Kanal 5/9/11 Play
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
@ -418,6 +423,7 @@
|
|||||||
- **limelight:channel_list**
|
- **limelight:channel_list**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
|
- **LiveLeakEmbed**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
- **LnkGo**
|
- **LnkGo**
|
||||||
@ -434,12 +440,14 @@
|
|||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
|
- **ManyVids**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **Medialaan**
|
- **Medialaan**
|
||||||
- **Mediaset**
|
- **Mediaset**
|
||||||
- **Medici**
|
- **Medici**
|
||||||
|
- **megaphone.fm**: megaphone.fm embedded players
|
||||||
- **Meipai**: 美拍
|
- **Meipai**: 美拍
|
||||||
- **MelonVOD**
|
- **MelonVOD**
|
||||||
- **META**
|
- **META**
|
||||||
@ -472,7 +480,6 @@
|
|||||||
- **MovieFap**
|
- **MovieFap**
|
||||||
- **Moviezine**
|
- **Moviezine**
|
||||||
- **MovingImage**
|
- **MovingImage**
|
||||||
- **MPORA**
|
|
||||||
- **MSN**
|
- **MSN**
|
||||||
- **mtg**: MTG services
|
- **mtg**: MTG services
|
||||||
- **mtv**
|
- **mtv**
|
||||||
@ -588,6 +595,7 @@
|
|||||||
- **Openload**
|
- **Openload**
|
||||||
- **OraTV**
|
- **OraTV**
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
|
- **orf:fm4:story**: fm4.orf.at stories
|
||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
@ -621,7 +629,9 @@
|
|||||||
- **Pokemon**
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PolskieRadioCategory**
|
- **PolskieRadioCategory**
|
||||||
|
- **PopcornTV**
|
||||||
- **PornCom**
|
- **PornCom**
|
||||||
|
- **PornerBros**
|
||||||
- **PornFlip**
|
- **PornFlip**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**: PornHub and Thumbzilla
|
- **PornHub**: PornHub and Thumbzilla
|
||||||
@ -630,6 +640,7 @@
|
|||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoVoisines**
|
- **PornoVoisines**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
|
- **PornTube**
|
||||||
- **PressTV**
|
- **PressTV**
|
||||||
- **PrimeShareTV**
|
- **PrimeShareTV**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
@ -655,6 +666,8 @@
|
|||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
- **RedBullTV**
|
- **RedBullTV**
|
||||||
|
- **Reddit**
|
||||||
|
- **RedditR**
|
||||||
- **RedTube**
|
- **RedTube**
|
||||||
- **RegioTV**
|
- **RegioTV**
|
||||||
- **RENTV**
|
- **RENTV**
|
||||||
@ -694,6 +707,7 @@
|
|||||||
- **rutube:embed**: Rutube embedded videos
|
- **rutube:embed**: Rutube embedded videos
|
||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
|
- **rutube:playlist**: Rutube playlists
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
- **Ruutu**
|
- **Ruutu**
|
||||||
- **Ruv**
|
- **Ruv**
|
||||||
@ -735,6 +749,7 @@
|
|||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:search**: Soundcloud search
|
- **soundcloud:search**: Soundcloud search
|
||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
|
- **soundcloud:trackstation**
|
||||||
- **soundcloud:user**
|
- **soundcloud:user**
|
||||||
- **soundgasm**
|
- **soundgasm**
|
||||||
- **soundgasm:profile**
|
- **soundgasm:profile**
|
||||||
@ -782,7 +797,6 @@
|
|||||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
- **TeachingChannel**
|
- **TeachingChannel**
|
||||||
- **Teamcoco**
|
- **Teamcoco**
|
||||||
- **TeamFourStar**
|
|
||||||
- **TechTalks**
|
- **TechTalks**
|
||||||
- **techtv.mit.edu**
|
- **techtv.mit.edu**
|
||||||
- **ted**
|
- **ted**
|
||||||
@ -952,6 +966,7 @@
|
|||||||
- **VODPl**
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
|
- **Voot**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
@ -969,6 +984,7 @@
|
|||||||
- **washingtonpost**
|
- **washingtonpost**
|
||||||
- **washingtonpost:article**
|
- **washingtonpost:article**
|
||||||
- **wat.tv**
|
- **wat.tv**
|
||||||
|
- **WatchBox**
|
||||||
- **WatchIndianPorn**: Watch Indian Porn
|
- **WatchIndianPorn**: Watch Indian Porn
|
||||||
- **WDR**
|
- **WDR**
|
||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
@ -1004,6 +1020,7 @@
|
|||||||
- **XVideos**
|
- **XVideos**
|
||||||
- **XXXYMovies**
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
|
- **YandexDisk**
|
||||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
|
@ -10,6 +10,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, expect_dict, expect_value
|
from test.helper import FakeYDL, expect_dict, expect_value
|
||||||
|
from youtube_dl.compat import compat_etree_fromstring
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||||
@ -488,6 +489,91 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
self.ie._sort_formats(formats)
|
self.ie._sort_formats(formats)
|
||||||
expect_value(self, formats, expected_formats, None)
|
expect_value(self, formats, expected_formats, None)
|
||||||
|
|
||||||
|
def test_parse_mpd_formats(self):
|
||||||
|
_TEST_CASES = [
|
||||||
|
(
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/13919
|
||||||
|
'float_duration',
|
||||||
|
'http://unknown/manifest.mpd',
|
||||||
|
[{
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '318597',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.42001f',
|
||||||
|
'tbr': 318.597,
|
||||||
|
'width': 340,
|
||||||
|
'height': 192,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '638590',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.42001f',
|
||||||
|
'tbr': 638.59,
|
||||||
|
'width': 512,
|
||||||
|
'height': 288,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '1022565',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.4d001f',
|
||||||
|
'tbr': 1022.565,
|
||||||
|
'width': 688,
|
||||||
|
'height': 384,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '2046506',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.4d001f',
|
||||||
|
'tbr': 2046.506,
|
||||||
|
'width': 1024,
|
||||||
|
'height': 576,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '3998017',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.640029',
|
||||||
|
'tbr': 3998.017,
|
||||||
|
'width': 1280,
|
||||||
|
'height': 720,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '5997485',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.640032',
|
||||||
|
'tbr': 5997.485,
|
||||||
|
'width': 1920,
|
||||||
|
'height': 1080,
|
||||||
|
}]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
for mpd_file, mpd_url, expected_formats in _TEST_CASES:
|
||||||
|
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
|
||||||
|
mode='r', encoding='utf-8') as f:
|
||||||
|
formats = self.ie._parse_mpd_formats(
|
||||||
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
|
mpd_url=mpd_url)
|
||||||
|
self.ie._sort_formats(formats)
|
||||||
|
expect_value(self, formats, expected_formats, None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -371,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL({'format': 'best[height>360]'})
|
ydl = YDL({'format': 'best[height>360]'})
|
||||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
|
def test_format_selection_issue_10083(self):
|
||||||
|
# See https://github.com/rg3/youtube-dl/issues/10083
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
|
||||||
|
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||||
|
{'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
|
||||||
|
|
||||||
def test_invalid_format_specs(self):
|
def test_invalid_format_specs(self):
|
||||||
def assert_syntax_error(format_spec):
|
def assert_syntax_error(format_spec):
|
||||||
ydl = YDL({'format': format_spec})
|
ydl = YDL({'format': format_spec})
|
||||||
|
@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unescapeHTML('/'), '/')
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
self.assertEqual(unescapeHTML('é'), 'é')
|
self.assertEqual(unescapeHTML('é'), 'é')
|
||||||
self.assertEqual(unescapeHTML('�'), '�')
|
self.assertEqual(unescapeHTML('�'), '�')
|
||||||
|
self.assertEqual(unescapeHTML('&a"'), '&a"')
|
||||||
# HTML5 entities
|
# HTML5 entities
|
||||||
self.assertEqual(unescapeHTML('.''), '.\'')
|
self.assertEqual(unescapeHTML('.''), '.\'')
|
||||||
|
|
||||||
@ -1063,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
<p begin="3" dur="-1">Ignored, three</p>
|
<p begin="3" dur="-1">Ignored, three</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:00,000 --> 00:00:01,000
|
00:00:00,000 --> 00:00:01,000
|
||||||
The following line contains Chinese characters and special symbols
|
The following line contains Chinese characters and special symbols
|
||||||
@ -1088,7 +1089,7 @@ Line
|
|||||||
<p begin="0" end="1">The first line</p>
|
<p begin="0" end="1">The first line</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:00,000 --> 00:00:01,000
|
00:00:00,000 --> 00:00:01,000
|
||||||
The first line
|
The first line
|
||||||
@ -1114,7 +1115,7 @@ The first line
|
|||||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:02,080 --> 00:00:05,839
|
00:00:02,080 --> 00:00:05,839
|
||||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||||
@ -1137,6 +1138,26 @@ part 3</font></u>
|
|||||||
'''
|
'''
|
||||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||||
|
|
||||||
|
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
|
||||||
|
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||||
|
<body>
|
||||||
|
<div xml:lang="en">
|
||||||
|
<p begin="0" end="1">Line 1</p>
|
||||||
|
<p begin="1" end="2">第二行</p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</tt>'''.encode('utf-16')
|
||||||
|
srt_data = '''1
|
||||||
|
00:00:00,000 --> 00:00:01,000
|
||||||
|
Line 1
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:01,000 --> 00:00:02,000
|
||||||
|
第二行
|
||||||
|
|
||||||
|
'''
|
||||||
|
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
|
||||||
|
|
||||||
def test_cli_option(self):
|
def test_cli_option(self):
|
||||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||||
@ -1182,6 +1203,10 @@ part 3</font></u>
|
|||||||
cli_bool_option(
|
cli_bool_option(
|
||||||
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
['--check-certificate=true'])
|
['--check-certificate=true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
|
[])
|
||||||
|
|
||||||
def test_ohdave_rsa_encrypt(self):
|
def test_ohdave_rsa_encrypt(self):
|
||||||
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
|
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
|
||||||
|
18
test/testdata/mpd/float_duration.mpd
vendored
Normal file
18
test/testdata/mpd/float_duration.mpd
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
|
||||||
|
<Period bitstreamSwitching="true">
|
||||||
|
<AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
|
||||||
|
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
|
||||||
|
<Representation id="318597" bandwidth="61587"></Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
|
||||||
|
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
|
||||||
|
<Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
|
||||||
|
<Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
|
||||||
|
<Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
|
||||||
|
<Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
|
||||||
|
<Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
|
||||||
|
<Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
@ -92,6 +92,7 @@ from .utils import (
|
|||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
|
from .extractor.openload import PhantomJSwrapper
|
||||||
from .downloader import get_suitable_downloader
|
from .downloader import get_suitable_downloader
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
@ -303,6 +304,12 @@ class YoutubeDL(object):
|
|||||||
otherwise prefer avconv.
|
otherwise prefer avconv.
|
||||||
postprocessor_args: A list of additional command-line arguments for the
|
postprocessor_args: A list of additional command-line arguments for the
|
||||||
postprocessor.
|
postprocessor.
|
||||||
|
|
||||||
|
The following options are used by the Youtube extractor:
|
||||||
|
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
||||||
|
data will be downloaded and processed by extractor.
|
||||||
|
You can reduce network I/O by disabling it if you don't
|
||||||
|
care about DASH.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_NUMERIC_FIELDS = set((
|
_NUMERIC_FIELDS = set((
|
||||||
@ -1515,12 +1522,14 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def is_wellformed(f):
|
def is_wellformed(f):
|
||||||
url = f.get('url')
|
url = f.get('url')
|
||||||
valid_url = url and isinstance(url, compat_str)
|
if not url:
|
||||||
if not valid_url:
|
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'"url" field is missing or empty - skipping format, '
|
'"url" field is missing or empty - skipping format, '
|
||||||
'there is an error in extractor')
|
'there is an error in extractor')
|
||||||
return valid_url
|
return False
|
||||||
|
if isinstance(url, bytes):
|
||||||
|
sanitize_string_field(f, 'url')
|
||||||
|
return True
|
||||||
|
|
||||||
# Filter out malformed formats for better extraction robustness
|
# Filter out malformed formats for better extraction robustness
|
||||||
formats = list(filter(is_wellformed, formats))
|
formats = list(filter(is_wellformed, formats))
|
||||||
@ -1532,7 +1541,7 @@ class YoutubeDL(object):
|
|||||||
sanitize_string_field(format, 'format_id')
|
sanitize_string_field(format, 'format_id')
|
||||||
sanitize_numeric_fields(format)
|
sanitize_numeric_fields(format)
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
if format.get('format_id') is None:
|
if not format.get('format_id'):
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
else:
|
else:
|
||||||
# Sanitize format_id from characters used in format selector expression
|
# Sanitize format_id from characters used in format selector expression
|
||||||
@ -1740,12 +1749,17 @@ class YoutubeDL(object):
|
|||||||
if filename is None:
|
if filename is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def ensure_dir_exists(path):
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
dn = os.path.dirname(path)
|
||||||
if dn and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
|
return True
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
@ -1788,29 +1802,30 @@ class YoutubeDL(object):
|
|||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
if sub_info.get('data') is not None:
|
|
||||||
sub_data = sub_info['data']
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
sub_data = ie._download_webpage(
|
|
||||||
sub_info['url'], info_dict['id'], note=False)
|
|
||||||
except ExtractorError as err:
|
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
|
||||||
(sub_lang, error_to_compat_str(err.cause)))
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
|
if sub_info.get('data') is not None:
|
||||||
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_info['data'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
sub_data = ie._request_webpage(
|
||||||
|
sub_info['url'], info_dict['id'], note=False).read()
|
||||||
|
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||||
|
subfile.write(sub_data)
|
||||||
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
|
(sub_lang, error_to_compat_str(err)))
|
||||||
|
continue
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||||
@ -1883,8 +1898,11 @@ class YoutubeDL(object):
|
|||||||
for f in requested_formats:
|
for f in requested_formats:
|
||||||
new_info = dict(info_dict)
|
new_info = dict(info_dict)
|
||||||
new_info.update(f)
|
new_info.update(f)
|
||||||
fname = self.prepare_filename(new_info)
|
fname = prepend_extension(
|
||||||
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
|
self.prepare_filename(new_info),
|
||||||
|
'f%s' % f['format_id'], new_info['ext'])
|
||||||
|
if not ensure_dir_exists(fname):
|
||||||
|
return
|
||||||
downloaded.append(fname)
|
downloaded.append(fname)
|
||||||
partial_success = dl(fname, new_info)
|
partial_success = dl(fname, new_info)
|
||||||
success = success and partial_success
|
success = success and partial_success
|
||||||
@ -2238,6 +2256,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
|
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||||
exe_str = ', '.join(
|
exe_str = ', '.join(
|
||||||
'%s %s' % (exe, v)
|
'%s %s' % (exe, v)
|
||||||
for exe, v in sorted(exe_versions.items())
|
for exe, v in sorted(exe_versions.items())
|
||||||
|
@ -206,7 +206,7 @@ def _real_main(argv=None):
|
|||||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||||
parser.error('invalid video recode format specified')
|
parser.error('invalid video recode format specified')
|
||||||
if opts.convertsubtitles is not None:
|
if opts.convertsubtitles is not None:
|
||||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||||
parser.error('invalid subtitle format specified')
|
parser.error('invalid subtitle format specified')
|
||||||
|
|
||||||
if opts.date is not None:
|
if opts.date is not None:
|
||||||
|
@ -6,6 +6,7 @@ import collections
|
|||||||
import email
|
import email
|
||||||
import getpass
|
import getpass
|
||||||
import io
|
import io
|
||||||
|
import itertools
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -15,7 +16,6 @@ import socket
|
|||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import itertools
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
|
||||||
@ -2898,6 +2898,13 @@ else:
|
|||||||
compat_struct_pack = struct.pack
|
compat_struct_pack = struct.pack
|
||||||
compat_struct_unpack = struct.unpack
|
compat_struct_unpack = struct.unpack
|
||||||
|
|
||||||
|
try:
|
||||||
|
from future_builtins import zip as compat_zip
|
||||||
|
except ImportError: # not 2.6+ or is 3.x
|
||||||
|
try:
|
||||||
|
from itertools import izip as compat_zip # < 2.5 or 3.x
|
||||||
|
except ImportError:
|
||||||
|
compat_zip = zip
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
@ -2948,5 +2955,6 @@ __all__ = [
|
|||||||
'compat_urlretrieve',
|
'compat_urlretrieve',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_parse_error',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
|
'compat_zip',
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
]
|
]
|
||||||
|
@ -304,11 +304,11 @@ class FileDownloader(object):
|
|||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||||
|
|
||||||
def report_retry(self, count, retries):
|
def report_retry(self, err, count, retries):
|
||||||
"""Report retry in case of HTTP error 5xx"""
|
"""Report retry in case of HTTP error 5xx"""
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||||
% (count, self.format_retries(retries)))
|
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
|
from ..utils import urljoin
|
||||||
|
|
||||||
|
|
||||||
class DashSegmentsFD(FragmentFD):
|
class DashSegmentsFD(FragmentFD):
|
||||||
@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
FD_NAME = 'dashsegments'
|
FD_NAME = 'dashsegments'
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
segments = info_dict['fragments'][:1] if self.params.get(
|
fragment_base_url = info_dict.get('fragment_base_url')
|
||||||
|
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||||
'test', False) else info_dict['fragments']
|
'test', False) else info_dict['fragments']
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': len(segments),
|
'total_frags': len(fragments),
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
frag_index = 0
|
frag_index = 0
|
||||||
for i, segment in enumerate(segments):
|
for i, fragment in enumerate(fragments):
|
||||||
frag_index += 1
|
frag_index += 1
|
||||||
if frag_index <= ctx['fragment_index']:
|
if frag_index <= ctx['fragment_index']:
|
||||||
continue
|
continue
|
||||||
@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
fragment_url = fragment.get('url')
|
||||||
|
if not fragment_url:
|
||||||
|
assert fragment_base_url
|
||||||
|
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
|
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
self._append_fragment(ctx, frag_content)
|
self._append_fragment(ctx, frag_content)
|
||||||
|
@ -151,10 +151,15 @@ class FragmentFD(FileDownloader):
|
|||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||||
self._read_ytdl_file(ctx)
|
self._read_ytdl_file(ctx)
|
||||||
|
if ctx['fragment_index'] > 0 and resume_len == 0:
|
||||||
|
self.report_error(
|
||||||
|
'Inconsistent state of incomplete fragment download. '
|
||||||
|
'Restarting from the beginning...')
|
||||||
|
ctx['fragment_index'] = resume_len = 0
|
||||||
|
self._write_ytdl_file(ctx)
|
||||||
else:
|
else:
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
if ctx['fragment_index'] > 0:
|
assert ctx['fragment_index'] == 0
|
||||||
assert resume_len > 0
|
|
||||||
|
|
||||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||||
|
|
||||||
|
@ -59,9 +59,9 @@ class HlsFD(FragmentFD):
|
|||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||||
|
|
||||||
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
|
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||||
|
man_url = urlh.geturl()
|
||||||
s = manifest.decode('utf-8', 'ignore')
|
s = urlh.read().decode('utf-8', 'ignore')
|
||||||
|
|
||||||
if not self.can_download(s, info_dict):
|
if not self.can_download(s, info_dict):
|
||||||
if info_dict.get('extra_param_to_segment_url'):
|
if info_dict.get('extra_param_to_segment_url'):
|
||||||
|
@ -22,8 +22,16 @@ from ..utils import (
|
|||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
tmpfilename = self.temp_name(filename)
|
|
||||||
stream = None
|
class DownloadContext(dict):
|
||||||
|
__getattr__ = dict.get
|
||||||
|
__setattr__ = dict.__setitem__
|
||||||
|
__delattr__ = dict.__delitem__
|
||||||
|
|
||||||
|
ctx = DownloadContext()
|
||||||
|
ctx.filename = filename
|
||||||
|
ctx.tmpfilename = self.temp_name(filename)
|
||||||
|
ctx.stream = None
|
||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Do not include the Accept-Encoding header
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
|
|||||||
if is_test:
|
if is_test:
|
||||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||||
|
|
||||||
# Establish possible resume length
|
ctx.open_mode = 'wb'
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
ctx.resume_len = 0
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
open_mode = 'wb'
|
|
||||||
if resume_len != 0:
|
|
||||||
if self.params.get('continuedl', True):
|
if self.params.get('continuedl', True):
|
||||||
self.report_resuming_byte(resume_len)
|
# Establish possible resume length
|
||||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||||
open_mode = 'ab'
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
retries = self.params.get('retries', 0)
|
retries = self.params.get('retries', 0)
|
||||||
while count <= retries:
|
|
||||||
|
class SucceedDownload(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RetryDownload(Exception):
|
||||||
|
def __init__(self, source_error):
|
||||||
|
self.source_error = source_error
|
||||||
|
|
||||||
|
def establish_connection():
|
||||||
|
if ctx.resume_len != 0:
|
||||||
|
self.report_resuming_byte(ctx.resume_len)
|
||||||
|
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||||
|
ctx.open_mode = 'ab'
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
data = self.ydl.urlopen(request)
|
ctx.data = self.ydl.urlopen(request)
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
# set in response despite of requested Range (see
|
# set in response despite of requested Range (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||||
if resume_len > 0:
|
if ctx.resume_len > 0:
|
||||||
content_range = data.headers.get('Content-Range')
|
content_range = ctx.data.headers.get('Content-Range')
|
||||||
if content_range:
|
if content_range:
|
||||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||||
# Content-Range is present and matches requested Range, resume is possible
|
# Content-Range is present and matches requested Range, resume is possible
|
||||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||||
break
|
return
|
||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||||
# Unexpected HTTP error
|
# Unexpected HTTP error
|
||||||
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
|
|||||||
# Unable to resume (requested range not satisfiable)
|
# Unable to resume (requested range not satisfiable)
|
||||||
try:
|
try:
|
||||||
# Open the connection again without the range header
|
# Open the connection again without the range header
|
||||||
data = self.ydl.urlopen(basic_request)
|
ctx.data = self.ydl.urlopen(basic_request)
|
||||||
content_length = data.info()['Content-Length']
|
content_length = ctx.data.info()['Content-Length']
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if err.code < 500 or err.code >= 600:
|
if err.code < 500 or err.code >= 600:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
# Examine the reported length
|
# Examine the reported length
|
||||||
if (content_length is not None and
|
if (content_length is not None and
|
||||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||||
# The file had already been fully downloaded.
|
# The file had already been fully downloaded.
|
||||||
# Explanation to the above condition: in issue #175 it was revealed that
|
# Explanation to the above condition: in issue #175 it was revealed that
|
||||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||||
@ -102,36 +115,30 @@ class HttpFD(FileDownloader):
|
|||||||
# I decided to implement a suggested change and consider the file
|
# I decided to implement a suggested change and consider the file
|
||||||
# completely downloaded if the file size differs less than 100 bytes from
|
# completely downloaded if the file size differs less than 100 bytes from
|
||||||
# the one in the hard drive.
|
# the one in the hard drive.
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(ctx.filename)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': ctx.filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
'downloaded_bytes': resume_len,
|
'downloaded_bytes': ctx.resume_len,
|
||||||
'total_bytes': resume_len,
|
'total_bytes': ctx.resume_len,
|
||||||
})
|
})
|
||||||
return True
|
raise SucceedDownload()
|
||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except socket.error as e:
|
raise RetryDownload(err)
|
||||||
if e.errno != errno.ECONNRESET:
|
except socket.error as err:
|
||||||
|
if err.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
|
raise RetryDownload(err)
|
||||||
|
|
||||||
# Retry
|
def download():
|
||||||
count += 1
|
data_len = ctx.data.info().get('Content-length', None)
|
||||||
if count <= retries:
|
|
||||||
self.report_retry(count, retries)
|
|
||||||
|
|
||||||
if count > retries:
|
|
||||||
self.report_error('giving up after %s retries' % retries)
|
|
||||||
return False
|
|
||||||
|
|
||||||
data_len = data.info().get('Content-length', None)
|
|
||||||
|
|
||||||
# Range HTTP header may be ignored/unsupported by a webserver
|
# Range HTTP header may be ignored/unsupported by a webserver
|
||||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||||
@ -142,7 +149,7 @@ class HttpFD(FileDownloader):
|
|||||||
data_len = self._TEST_FILE_SIZE
|
data_len = self._TEST_FILE_SIZE
|
||||||
|
|
||||||
if data_len is not None:
|
if data_len is not None:
|
||||||
data_len = int(data_len) + resume_len
|
data_len = int(data_len) + ctx.resume_len
|
||||||
min_data_len = self.params.get('min_filesize')
|
min_data_len = self.params.get('min_filesize')
|
||||||
max_data_len = self.params.get('max_filesize')
|
max_data_len = self.params.get('max_filesize')
|
||||||
if min_data_len is not None and data_len < min_data_len:
|
if min_data_len is not None and data_len < min_data_len:
|
||||||
@ -152,17 +159,34 @@ class HttpFD(FileDownloader):
|
|||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + ctx.resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||||
now = None # needed for slow_down() in the first loop run
|
now = None # needed for slow_down() in the first loop run
|
||||||
before = start # start measuring
|
before = start # start measuring
|
||||||
while True:
|
|
||||||
|
|
||||||
|
def retry(e):
|
||||||
|
if ctx.tmpfilename != '-':
|
||||||
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
|
raise RetryDownload(e)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
# Download and write
|
# Download and write
|
||||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||||
|
# socket.timeout is a subclass of socket.error but may not have
|
||||||
|
# errno set
|
||||||
|
except socket.timeout as e:
|
||||||
|
retry(e)
|
||||||
|
except socket.error as e:
|
||||||
|
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||||
|
raise
|
||||||
|
retry(e)
|
||||||
|
|
||||||
byte_counter += len(data_block)
|
byte_counter += len(data_block)
|
||||||
|
|
||||||
# exit loop when download is finished
|
# exit loop when download is finished
|
||||||
@ -170,31 +194,32 @@ class HttpFD(FileDownloader):
|
|||||||
break
|
break
|
||||||
|
|
||||||
# Open destination file just in time
|
# Open destination file just in time
|
||||||
if stream is None:
|
if ctx.stream is None:
|
||||||
try:
|
try:
|
||||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||||
assert stream is not None
|
ctx.tmpfilename, ctx.open_mode)
|
||||||
filename = self.undo_temp_name(tmpfilename)
|
assert ctx.stream is not None
|
||||||
self.report_destination(filename)
|
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||||
|
self.report_destination(ctx.filename)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
self.report_error('unable to open for writing: %s' % str(err))
|
self.report_error('unable to open for writing: %s' % str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||||
try:
|
try:
|
||||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream.write(data_block)
|
ctx.stream.write(data_block)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('unable to write data: %s' % str(err))
|
self.report_error('unable to write data: %s' % str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Apply rate limit
|
# Apply rate limit
|
||||||
self.slow_down(start, now, byte_counter - resume_len)
|
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
# end measuring of one loop run
|
# end measuring of one loop run
|
||||||
now = time.time()
|
now = time.time()
|
||||||
@ -207,18 +232,18 @@ class HttpFD(FileDownloader):
|
|||||||
before = after
|
before = after
|
||||||
|
|
||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
eta = None
|
eta = None
|
||||||
else:
|
else:
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': ctx.tmpfilename,
|
||||||
'filename': filename,
|
'filename': ctx.filename,
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
'elapsed': now - start,
|
'elapsed': now - start,
|
||||||
@ -227,27 +252,47 @@ class HttpFD(FileDownloader):
|
|||||||
if is_test and byte_counter == data_len:
|
if is_test and byte_counter == data_len:
|
||||||
break
|
break
|
||||||
|
|
||||||
if stream is None:
|
if ctx.stream is None:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('Did not get any data blocks')
|
self.report_error('Did not get any data blocks')
|
||||||
return False
|
return False
|
||||||
if tmpfilename != '-':
|
if ctx.tmpfilename != '-':
|
||||||
stream.close()
|
ctx.stream.close()
|
||||||
|
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
err = ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
if count <= retries:
|
||||||
|
retry(err)
|
||||||
|
raise err
|
||||||
|
|
||||||
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
|
|
||||||
# Update file modification time
|
# Update file modification time
|
||||||
if self.params.get('updatetime', True):
|
if self.params.get('updatetime', True):
|
||||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': byte_counter,
|
'total_bytes': byte_counter,
|
||||||
'filename': filename,
|
'filename': ctx.filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
'elapsed': time.time() - start,
|
'elapsed': time.time() - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
while count <= retries:
|
||||||
|
try:
|
||||||
|
establish_connection()
|
||||||
|
download()
|
||||||
|
return True
|
||||||
|
except RetryDownload as e:
|
||||||
|
count += 1
|
||||||
|
if count <= retries:
|
||||||
|
self.report_retry(e.source_error, count, retries)
|
||||||
|
continue
|
||||||
|
except SucceedDownload:
|
||||||
|
return True
|
||||||
|
|
||||||
|
self.report_error('giving up after %s retries' % retries)
|
||||||
|
return False
|
||||||
|
@ -7,6 +7,7 @@ import time
|
|||||||
|
|
||||||
from .amp import AMPIE
|
from .amp import AMPIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
|
|||||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
|
|
||||||
youtube_url = self._html_search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
|
||||||
webpage, 'YouTube URL', default=None)
|
|
||||||
|
|
||||||
timestamp = None
|
timestamp = None
|
||||||
date_str = self._html_search_regex(
|
date_str = self._html_search_regex(
|
||||||
@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# adult video
|
||||||
|
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20171001_F1AE1711_196617479_1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[생]서아 초심 찾기 방송 (part 1)',
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'BJ서아',
|
||||||
|
'uploader_id': 'bjdyrksu',
|
||||||
|
'upload_date': '20171001',
|
||||||
|
'duration': 3600,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
|
|||||||
|
|
||||||
video_xml = self._download_xml(
|
video_xml = self._download_xml(
|
||||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||||
video_id, query={'nTitleNo': video_id})
|
video_id, query={
|
||||||
|
'nTitleNo': video_id,
|
||||||
|
'partialView': 'SKIP_ADULT',
|
||||||
|
})
|
||||||
|
|
||||||
|
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||||
|
if flag and flag != 'SUCCEED':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||||
|
|
||||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||||
if video_element is None or video_element.text is None:
|
if video_element is None or video_element.text is None:
|
||||||
|
53
youtube_dl/extractor/aliexpress.py
Normal file
53
youtube_dl/extractor/aliexpress.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AliExpressLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://live.aliexpress.com/live/2800002704436634',
|
||||||
|
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2800002704436634',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CASIMA7.22',
|
||||||
|
'thumbnail': r're:http://.*\.jpg',
|
||||||
|
'uploader': 'CASIMA Official Store',
|
||||||
|
'timestamp': 1500717600,
|
||||||
|
'upload_date': '20170722',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
|
||||||
|
webpage, 'runParams'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = data['title']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
data['replyStreamUrl'], video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': data.get('coverUrl'),
|
||||||
|
'uploader': try_get(
|
||||||
|
data, lambda x: x['followBar']['name'], compat_str),
|
||||||
|
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
85
youtube_dl/extractor/americastestkitchen.py
Executable file
85
youtube_dl/extractor/americastestkitchen.py
Executable file
@ -0,0 +1,85 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
||||||
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_5g5zua6e',
|
||||||
|
'title': 'Summer Dinner Party',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1497285541,
|
||||||
|
'upload_date': '20170612',
|
||||||
|
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
||||||
|
'release_date': '20170617',
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': 17,
|
||||||
|
'episode': 'Summer Dinner Party',
|
||||||
|
'episode_number': 24,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||||
|
webpage, 'kaltura partner id')
|
||||||
|
|
||||||
|
video_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||||
|
webpage, 'initial context'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
ep_data = try_get(
|
||||||
|
video_data,
|
||||||
|
(lambda x: x['episodeDetail']['content']['data'],
|
||||||
|
lambda x: x['videoDetail']['content']['data']), dict)
|
||||||
|
ep_meta = ep_data.get('full_video', {})
|
||||||
|
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||||
|
|
||||||
|
title = ep_data.get('title') or ep_meta.get('title')
|
||||||
|
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||||
|
'description') or ep_meta.get('description'))
|
||||||
|
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||||
|
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||||
|
|
||||||
|
season_number = int_or_none(ep_meta.get('season_number'))
|
||||||
|
episode = ep_meta.get('title')
|
||||||
|
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'release_date': release_date,
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
}
|
@ -3,16 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urlparse,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||||
_NETRC_MACHINE = 'animeondemand'
|
_NETRC_MACHINE = 'animeondemand'
|
||||||
|
# German-speaking countries of Europe
|
||||||
|
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# jap, OmU
|
# jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||||
@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Flash videos
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -72,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'post url', default=self._LOGIN_URL, group='url')
|
'post url', default=self._LOGIN_URL, group='url')
|
||||||
|
|
||||||
if not post_url.startswith('http'):
|
if not post_url.startswith('http'):
|
||||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
post_url, urlencode_postdata(login_form))
|
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
|
||||||
|
|
||||||
response = self._download_webpage(
|
response = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
post_url, None, 'Logging in as %s' % username,
|
||||||
|
data=urlencode_postdata(login_form), headers={
|
||||||
|
'Referer': self._LOGIN_URL,
|
||||||
|
})
|
||||||
|
|
||||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for input_ in re.findall(
|
for input_ in re.findall(
|
||||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||||
attributes = extract_attributes(input_)
|
attributes = extract_attributes(input_)
|
||||||
|
title = attributes.get('data-dialog-header')
|
||||||
playlist_urls = []
|
playlist_urls = []
|
||||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||||
playlist_url = attributes.get(playlist_key)
|
playlist_url = attributes.get(playlist_key)
|
||||||
if isinstance(playlist_url, compat_str) and re.match(
|
if isinstance(playlist_url, compat_str) and re.match(
|
||||||
r'/?[\da-zA-Z]+', playlist_url):
|
r'/?[\da-zA-Z]+', playlist_url):
|
||||||
@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
format_id_list.append(compat_str(num))
|
format_id_list.append(compat_str(num))
|
||||||
format_id = '-'.join(format_id_list)
|
format_id = '-'.join(format_id_list)
|
||||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||||
request = sanitized_Request(
|
item_id_list = []
|
||||||
compat_urlparse.urljoin(url, playlist_url),
|
if format_id:
|
||||||
|
item_id_list.append(format_id)
|
||||||
|
item_id_list.append('videomaterial')
|
||||||
|
playlist = self._download_json(
|
||||||
|
urljoin(url, playlist_url), video_id,
|
||||||
|
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||||
headers={
|
headers={
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'X-CSRF-Token': csrf_token,
|
'X-CSRF-Token': csrf_token,
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||||
})
|
}, fatal=False)
|
||||||
playlist = self._download_json(
|
|
||||||
request, video_id, 'Downloading %s playlist JSON' % format_id,
|
|
||||||
fatal=False)
|
|
||||||
if not playlist:
|
if not playlist:
|
||||||
continue
|
continue
|
||||||
|
stream_url = playlist.get('streamurl')
|
||||||
|
if stream_url:
|
||||||
|
rtmp = re.search(
|
||||||
|
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||||
|
stream_url)
|
||||||
|
if rtmp:
|
||||||
|
formats.append({
|
||||||
|
'url': rtmp.group('url'),
|
||||||
|
'app': rtmp.group('app'),
|
||||||
|
'play_path': rtmp.group('playpath'),
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
continue
|
||||||
start_video = playlist.get('startvideo', 0)
|
start_video = playlist.get('startvideo', 0)
|
||||||
playlist = playlist.get('playlist')
|
playlist = playlist.get('playlist')
|
||||||
if not playlist or not isinstance(playlist, list):
|
if not playlist or not isinstance(playlist, list):
|
||||||
@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
f.update({
|
f.update({
|
||||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
'url': urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
|
@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
int_or_none,
|
||||||
HEADRequest,
|
mimetype2ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AparatIE(InfoExtractor):
|
class AparatIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aparat.com/v/wP8On',
|
'url': 'http://www.aparat.com/v/wP8On',
|
||||||
@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
|
|||||||
# Note: There is an easier-to-parse configuration at
|
# Note: There is an easier-to-parse configuration at
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
# but the URL in there does not work
|
# but the URL in there does not work
|
||||||
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
|
webpage = self._download_webpage(
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||||
|
video_id)
|
||||||
file_list = self._parse_json(self._search_regex(
|
|
||||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
|
|
||||||
for i, item in enumerate(file_list[0]):
|
|
||||||
video_url = item['file']
|
|
||||||
req = HEADRequest(video_url)
|
|
||||||
res = self._request_webpage(
|
|
||||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
|
||||||
if res:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('No working video URLs found')
|
|
||||||
|
|
||||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||||
|
|
||||||
|
file_list = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
||||||
|
'file list'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for item in file_list[0]:
|
||||||
|
file_url = item.get('file')
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
ext = mimetype2ext(item.get('type'))
|
||||||
|
label = item.get('label')
|
||||||
|
formats.append({
|
||||||
|
'url': file_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': label or ext,
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'(\d+)[pP]', label or '', 'height', default=None)),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'age_limit': self._family_friendly_search(webpage),
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
|
|
||||||
duration = int_or_none(media_info.get('_duration'))
|
duration = int_or_none(media_info.get('_duration'))
|
||||||
thumbnail = media_info.get('_previewImage')
|
thumbnail = media_info.get('_previewImage')
|
||||||
|
is_live = media_info.get('_isLive') is True
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitle_url = media_info.get('_subtitleUrl')
|
subtitle_url = media_info.get('_subtitleUrl')
|
||||||
@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
@ -166,9 +168,11 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
# determine video id from url
|
# determine video id from url
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
document_id = None
|
||||||
|
|
||||||
numid = re.search(r'documentId=([0-9]+)', url)
|
numid = re.search(r'documentId=([0-9]+)', url)
|
||||||
if numid:
|
if numid:
|
||||||
video_id = numid.group(1)
|
document_id = video_id = numid.group(1)
|
||||||
else:
|
else:
|
||||||
video_id = m.group('video_id')
|
video_id = m.group('video_id')
|
||||||
|
|
||||||
@ -228,12 +232,16 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
else: # request JSON file
|
else: # request JSON file
|
||||||
|
if not document_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
|
||||||
info = self._extract_media_info(
|
info = self._extract_media_info(
|
||||||
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
|
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||||
|
webpage, video_id)
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if info.get('is_live') else title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
})
|
})
|
||||||
|
@ -9,12 +9,13 @@ from ..compat import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
unified_strdate,
|
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
qualities,
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
# There are different sources of video in arte.tv, the extraction process
|
||||||
@ -79,6 +80,13 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
info = self._download_json(json_url, video_id)
|
info = self._download_json(json_url, video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
|
vsr = player_info['VSR']
|
||||||
|
|
||||||
|
if not vsr:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Video %s is not available' % player_info.get('VID') or video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
upload_date_str = player_info.get('shootingDate')
|
upload_date_str = player_info.get('shootingDate')
|
||||||
if not upload_date_str:
|
if not upload_date_str:
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
@ -107,7 +115,7 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
langcode = LANGS.get(lang, lang)
|
langcode = LANGS.get(lang, lang)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in player_info['VSR'].items():
|
for format_id, format_dict in vsr.items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
l = re.escape(langcode)
|
l = re.escape(langcode)
|
||||||
|
@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, t_path),
|
||||||
|
ie=BandcampIE.ie_key(),
|
||||||
|
video_title=self._search_regex(
|
||||||
|
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||||
|
elem_content, 'track title', fatal=False))
|
||||||
for elem_content, t_path in track_elements
|
for elem_content, t_path in track_elements
|
||||||
if self._html_search_meta('duration', elem_content, default=None)]
|
if self._html_search_meta('duration', elem_content, default=None)]
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ from ..compat import (
|
|||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?bbc\.co\.uk/
|
(?:www\.)?bbc\.co\.uk/
|
||||||
@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
programmes/(?!articles/)|
|
programmes/(?!articles/)|
|
||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/(?:clips|audiovideo/popular)[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/
|
radio/player/|
|
||||||
|
events/[^/]+/play/[^/]+/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
@ -232,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
@ -9,6 +9,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
cpl_url = self._search_regex(
|
cpl_url = self._search_regex(
|
||||||
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
|
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||||
webpage, 'cpl', default=None, group='url')
|
webpage, 'cpl', default=None, group='url')
|
||||||
|
|
||||||
|
cpl_url = urljoin(url, cpl_url)
|
||||||
|
|
||||||
beeg_version, beeg_salt = [None] * 2
|
beeg_version, beeg_salt = [None] * 2
|
||||||
|
|
||||||
if cpl_url:
|
if cpl_url:
|
||||||
@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
|
|||||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||||
default=None, group='beeg_salt')
|
default=None, group='beeg_salt')
|
||||||
|
|
||||||
beeg_version = beeg_version or '2000'
|
beeg_version = beeg_version or '2185'
|
||||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||||
|
|
||||||
|
for api_path in ('', 'api.'):
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||||
video_id)
|
% (api_path, beeg_version, video_id), video_id,
|
||||||
|
fatal=api_path == 'api.')
|
||||||
|
if video:
|
||||||
|
break
|
||||||
|
|
||||||
def split(o, e):
|
def split(o, e):
|
||||||
def cut(s, x):
|
def cut(s, x):
|
||||||
|
@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||||
video_info_dicts = re.findall(
|
video_info_dicts = re.findall(
|
||||||
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_info in video_info_dicts:
|
for video_info in video_info_dicts:
|
||||||
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
video_info = self._parse_json(
|
||||||
quality = video_info['quality']
|
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||||
video_url = video_info['src']
|
if not video_info:
|
||||||
|
continue
|
||||||
|
video_url = video_info.get('src')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
quality = 'high' if '_high' in video_url else 'low'
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'preference': 10 if quality == 'high' else 0,
|
'preference': 10 if quality == 'high' else 0,
|
||||||
|
@ -3,24 +3,104 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
class CanvasIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
|
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
|
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Nachtwacht: De Greystook',
|
||||||
|
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1468.03,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||||
|
% (site_id, video_id), video_id)
|
||||||
|
|
||||||
|
title = data['title']
|
||||||
|
description = data.get('description')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for target in data['targetUrls']:
|
||||||
|
format_url, format_type = target.get('url'), target.get('type')
|
||||||
|
if not format_url or not format_type:
|
||||||
|
continue
|
||||||
|
if format_type == 'HLS':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_type, fatal=False))
|
||||||
|
elif format_type == 'HDS':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||||
|
elif format_type == 'MPEG_DASH':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id=format_type, fatal=False))
|
||||||
|
elif format_type == 'HSS':
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_type,
|
||||||
|
'url': format_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitle_urls = data.get('subtitleUrls')
|
||||||
|
if isinstance(subtitle_urls, list):
|
||||||
|
for subtitle in subtitle_urls:
|
||||||
|
subtitle_url = subtitle.get('url')
|
||||||
|
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||||
|
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': float_or_none(data.get('duration'), 1000),
|
||||||
|
'thumbnail': data.get('posterImageUrl'),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CanvasEenIE(InfoExtractor):
|
||||||
IE_DESC = 'canvas.be and een.be'
|
IE_DESC = 'canvas.be and een.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
'md5': 'ed66976748d12350b118455979cca293',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'De afspraak veilt voor de Warmste Week',
|
'title': 'De afspraak veilt voor de Warmste Week',
|
||||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 49.02,
|
'duration': 49.02,
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
}, {
|
}, {
|
||||||
# with subtitles
|
# with subtitles
|
||||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||||
@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'Pagina niet gevonden',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'Episode no longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
title = (self._search_regex(
|
title = strip_or_none(self._search_regex(
|
||||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
webpage, 'title', default=None) or self._og_search_title(
|
||||||
webpage)).strip()
|
webpage, default=None))
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||||
|
group='id')
|
||||||
data = self._download_json(
|
|
||||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
|
||||||
% (site_id, video_id), display_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for target in data['targetUrls']:
|
|
||||||
format_url, format_type = target.get('url'), target.get('type')
|
|
||||||
if not format_url or not format_type:
|
|
||||||
continue
|
|
||||||
if format_type == 'HLS':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, display_id, entry_protocol='m3u8_native',
|
|
||||||
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
|
|
||||||
elif format_type == 'HDS':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
|
||||||
elif format_type == 'MPEG_DASH':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
format_url, display_id, mpd_id=format_type, fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_type,
|
|
||||||
'url': format_url,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitle_urls = data.get('subtitleUrls')
|
|
||||||
if isinstance(subtitle_urls, list):
|
|
||||||
for subtitle in subtitle_urls:
|
|
||||||
subtitle_url = subtitle.get('url')
|
|
||||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
|
||||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
||||||
|
'ie_key': CanvasIE.ie_key(),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'formats': formats,
|
|
||||||
'duration': float_or_none(data.get('duration'), 1000),
|
|
||||||
'thumbnail': data.get('posterImageUrl'),
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
}
|
||||||
|
@ -200,6 +200,7 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
'media': 'http://search.yahoo.com/mrss/',
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||||
}
|
}
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id):
|
||||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||||
@ -287,6 +288,11 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
class CBCWatchVideoIE(CBCWatchBaseIE):
|
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||||
IE_NAME = 'cbc.ca:watch:video'
|
IE_NAME = 'cbc.ca:watch:video'
|
||||||
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
|
'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -323,9 +329,10 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
IE_NAME = 'cbc.ca:watch'
|
IE_NAME = 'cbc.ca:watch'
|
||||||
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38e815a-009e3ab12e4',
|
'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Customer (Dis)Service',
|
'title': 'Customer (Dis)Service',
|
||||||
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||||
@ -337,8 +344,8 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}, {
|
}, {
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
@ -346,7 +353,6 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 30,
|
'playlist_mincount': 30,
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -124,7 +124,7 @@ class CDAIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
json_str = self._search_regex(
|
json_str = self._html_search_regex(
|
||||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||||
'%s player_json' % version, fatal=False, group='player_data')
|
'%s player_json' % version, fatal=False, group='player_data')
|
||||||
if not json_str:
|
if not json_str:
|
||||||
|
@ -5,7 +5,7 @@ from ..utils import remove_end
|
|||||||
|
|
||||||
|
|
||||||
class CharlieRoseIE(InfoExtractor):
|
class CharlieRoseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||||
@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||||
|
@ -5,6 +5,7 @@ import base64
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError
|
ExtractorError
|
||||||
@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
|
|||||||
|
|
||||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||||
if native_platform is None:
|
if native_platform is None:
|
||||||
youtube_url = self._html_search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
if youtube_url:
|
||||||
webpage, 'fallback video URL', default=None)
|
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||||
if youtube_url is not None:
|
|
||||||
return self.url_result(youtube_url, ie='Youtube')
|
|
||||||
|
|
||||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||||
# the own CDN
|
# the own CDN
|
||||||
|
@ -9,12 +9,20 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CinchcastIE(InfoExtractor):
|
class CinchcastIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5258197',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Train Your Brain to Up Your Game with Coach Mandy',
|
||||||
|
'upload_date': '20130816',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
# Actual test is run in generic, look for undergroundwellness
|
# Actual test is run in generic, look for undergroundwellness
|
||||||
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
|
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -1,67 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ClipfishIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
|
|
||||||
'md5': 'b9a5dc46294154c1193e2d10e0c95693',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4343170',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
|
|
||||||
'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
|
|
||||||
'upload_date': '20161005',
|
|
||||||
'duration': 1291,
|
|
||||||
'view_count': int,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
video_info = self._download_json(
|
|
||||||
'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
|
|
||||||
video_id)['items'][0]
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
m3u8_url = video_info.get('media_videourl_hls')
|
|
||||||
if m3u8_url:
|
|
||||||
formats.append({
|
|
||||||
'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': 'hls',
|
|
||||||
})
|
|
||||||
|
|
||||||
mp4_url = video_info.get('media_videourl')
|
|
||||||
if mp4_url:
|
|
||||||
formats.append({
|
|
||||||
'url': mp4_url,
|
|
||||||
'format_id': 'mp4',
|
|
||||||
'width': int_or_none(video_info.get('width')),
|
|
||||||
'height': int_or_none(video_info.get('height')),
|
|
||||||
'tbr': int_or_none(video_info.get('bitrate')),
|
|
||||||
})
|
|
||||||
|
|
||||||
descr = video_info.get('descr')
|
|
||||||
if descr:
|
|
||||||
descr = descr.strip()
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_info['title'],
|
|
||||||
'description': descr,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
|
|
||||||
'duration': int_or_none(video_info.get('media_length')),
|
|
||||||
'upload_date': unified_strdate(video_info.get('pubDate')),
|
|
||||||
'view_count': int_or_none(video_info.get('media_views'))
|
|
||||||
}
|
|
74
youtube_dl/extractor/clippit.py
Normal file
74
youtube_dl/extractor/clippit.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ClippitIE(InfoExtractor):
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.clippituser.tv/c/evmgm',
|
||||||
|
'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'evmgm',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bye bye Brutus. #BattleBots - Clippit',
|
||||||
|
'uploader': 'lizllove',
|
||||||
|
'uploader_url': 'https://www.clippituser.tv/p/lizllove',
|
||||||
|
'timestamp': 1472183818,
|
||||||
|
'upload_date': '20160826',
|
||||||
|
'description': 'BattleBots | ABC',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
|
||||||
|
|
||||||
|
FORMATS = ('sd', 'hd')
|
||||||
|
quality = qualities(FORMATS)
|
||||||
|
formats = []
|
||||||
|
for format_id in FORMATS:
|
||||||
|
url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
|
||||||
|
webpage, 'url', fatal=False)
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
match = re.search(r'/(?P<height>\d+)\.mp4', url)
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': quality(format_id),
|
||||||
|
'height': int(match.group('height')) if match else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
uploader_url = ('https://www.clippituser.tv/p/' + uploader
|
||||||
|
if uploader else None)
|
||||||
|
|
||||||
|
timestamp = self._html_search_regex(r'datetime="(.+?)"',
|
||||||
|
webpage, 'date', fatal=False)
|
||||||
|
thumbnail = self._html_search_regex(r'data-image="(.+?)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
'timestamp': parse_iso8601(timestamp),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ComedyCentralShortnameIE(InfoExtractor):
|
class ComedyCentralShortnameIE(InfoExtractor):
|
||||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': ':tds',
|
'url': ':tds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': ':thedailyshow',
|
'url': ':thedailyshow',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': ':theopposition',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
|
|||||||
shortcut_map = {
|
shortcut_map = {
|
||||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||||
|
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||||
}
|
}
|
||||||
return self.url_result(shortcut_map[video_id])
|
return self.url_result(shortcut_map[video_id])
|
||||||
|
@ -27,6 +27,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_xml_parse_error,
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import remove_encrypted_media
|
from ..downloader.f4m import remove_encrypted_media
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -662,15 +663,29 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note='Downloading XML', errnote='Unable to download XML',
|
note='Downloading XML', errnote='Unable to download XML',
|
||||||
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
transform_source=None, fatal=True, encoding=None,
|
||||||
|
data=None, headers={}, query={}):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(
|
xml_string = self._download_webpage(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
|
encoding=encoding, data=data, headers=headers, query=query)
|
||||||
if xml_string is False:
|
if xml_string is False:
|
||||||
return xml_string
|
return xml_string
|
||||||
|
return self._parse_xml(
|
||||||
|
xml_string, video_id, transform_source=transform_source,
|
||||||
|
fatal=fatal)
|
||||||
|
|
||||||
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||||
if transform_source:
|
if transform_source:
|
||||||
xml_string = transform_source(xml_string)
|
xml_string = transform_source(xml_string)
|
||||||
|
try:
|
||||||
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
||||||
|
except compat_xml_parse_error as ve:
|
||||||
|
errmsg = '%s: Failed to parse XML ' % video_id
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, cause=ve)
|
||||||
|
else:
|
||||||
|
self.report_warning(errmsg + str(ve))
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note='Downloading JSON metadata',
|
note='Downloading JSON metadata',
|
||||||
@ -956,7 +971,8 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _family_friendly_search(self, html):
|
def _family_friendly_search(self, html):
|
||||||
# See http://schema.org/VideoObject
|
# See http://schema.org/VideoObject
|
||||||
family_friendly = self._html_search_meta('isFamilyFriendly', html)
|
family_friendly = self._html_search_meta(
|
||||||
|
'isFamilyFriendly', html, default=None)
|
||||||
|
|
||||||
if not family_friendly:
|
if not family_friendly:
|
||||||
return None
|
return None
|
||||||
@ -1801,7 +1817,7 @@ class InfoExtractor(object):
|
|||||||
ms_info['timescale'] = int(timescale)
|
ms_info['timescale'] = int(timescale)
|
||||||
segment_duration = source.get('duration')
|
segment_duration = source.get('duration')
|
||||||
if segment_duration:
|
if segment_duration:
|
||||||
ms_info['segment_duration'] = int(segment_duration)
|
ms_info['segment_duration'] = float(segment_duration)
|
||||||
|
|
||||||
def extract_Initialization(source):
|
def extract_Initialization(source):
|
||||||
initialization = source.find(_add_ns('Initialization'))
|
initialization = source.find(_add_ns('Initialization'))
|
||||||
@ -1908,19 +1924,23 @@ class InfoExtractor(object):
|
|||||||
'Bandwidth': bandwidth,
|
'Bandwidth': bandwidth,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def location_key(location):
|
||||||
|
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||||
|
|
||||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||||
|
|
||||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||||
|
media_location_key = location_key(media_template)
|
||||||
|
|
||||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||||
# can't be used at the same time
|
# can't be used at the same time
|
||||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||||
segment_duration = None
|
segment_duration = None
|
||||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
representation_ms_info['fragments'] = [{
|
representation_ms_info['fragments'] = [{
|
||||||
'url': media_template % {
|
media_location_key: media_template % {
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
'Bandwidth': bandwidth,
|
'Bandwidth': bandwidth,
|
||||||
},
|
},
|
||||||
@ -1944,7 +1964,7 @@ class InfoExtractor(object):
|
|||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
}
|
}
|
||||||
representation_ms_info['fragments'].append({
|
representation_ms_info['fragments'].append({
|
||||||
'url': segment_url,
|
media_location_key: segment_url,
|
||||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -1968,8 +1988,9 @@ class InfoExtractor(object):
|
|||||||
for s in representation_ms_info['s']:
|
for s in representation_ms_info['s']:
|
||||||
duration = float_or_none(s['d'], timescale)
|
duration = float_or_none(s['d'], timescale)
|
||||||
for r in range(s.get('r', 0) + 1):
|
for r in range(s.get('r', 0) + 1):
|
||||||
|
segment_uri = representation_ms_info['segment_urls'][segment_index]
|
||||||
fragments.append({
|
fragments.append({
|
||||||
'url': representation_ms_info['segment_urls'][segment_index],
|
location_key(segment_uri): segment_uri,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
})
|
})
|
||||||
segment_index += 1
|
segment_index += 1
|
||||||
@ -1978,6 +1999,7 @@ class InfoExtractor(object):
|
|||||||
# No fragments key is present in this case.
|
# No fragments key is present in this case.
|
||||||
if 'fragments' in representation_ms_info:
|
if 'fragments' in representation_ms_info:
|
||||||
f.update({
|
f.update({
|
||||||
|
'fragment_base_url': base_url,
|
||||||
'fragments': [],
|
'fragments': [],
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
})
|
})
|
||||||
@ -1985,10 +2007,8 @@ class InfoExtractor(object):
|
|||||||
initialization_url = representation_ms_info['initialization_url']
|
initialization_url = representation_ms_info['initialization_url']
|
||||||
if not f.get('url'):
|
if not f.get('url'):
|
||||||
f['url'] = initialization_url
|
f['url'] = initialization_url
|
||||||
f['fragments'].append({'url': initialization_url})
|
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||||
f['fragments'].extend(representation_ms_info['fragments'])
|
f['fragments'].extend(representation_ms_info['fragments'])
|
||||||
for fragment in f['fragments']:
|
|
||||||
fragment['url'] = urljoin(base_url, fragment['url'])
|
|
||||||
try:
|
try:
|
||||||
existing_format = next(
|
existing_format = next(
|
||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
@ -2126,19 +2146,19 @@ class InfoExtractor(object):
|
|||||||
return f
|
return f
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _media_formats(src, cur_media_type):
|
def _media_formats(src, cur_media_type, type_info={}):
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
ext = determine_ext(full_url)
|
ext = type_info.get('ext') or determine_ext(full_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
full_url, video_id, ext='mp4',
|
full_url, video_id, ext='mp4',
|
||||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||||
preference=preference)
|
preference=preference, fatal=False)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_mpd_formats(
|
formats = self._extract_mpd_formats(
|
||||||
full_url, video_id, mpd_id=mpd_id)
|
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
||||||
else:
|
else:
|
||||||
is_plain_url = True
|
is_plain_url = True
|
||||||
formats = [{
|
formats = [{
|
||||||
@ -2177,9 +2197,15 @@ class InfoExtractor(object):
|
|||||||
src = source_attributes.get('src')
|
src = source_attributes.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
is_plain_url, formats = _media_formats(src, media_type)
|
|
||||||
if is_plain_url:
|
|
||||||
f = parse_content_type(source_attributes.get('type'))
|
f = parse_content_type(source_attributes.get('type'))
|
||||||
|
is_plain_url, formats = _media_formats(src, media_type, f)
|
||||||
|
if is_plain_url:
|
||||||
|
# res attribute is not standard but seen several times
|
||||||
|
# in the wild
|
||||||
|
f.update({
|
||||||
|
'height': int_or_none(source_attributes.get('res')),
|
||||||
|
'format_id': source_attributes.get('label'),
|
||||||
|
})
|
||||||
f.update(formats[0])
|
f.update(formats[0])
|
||||||
media_info['formats'].append(f)
|
media_info['formats'].append(f)
|
||||||
else:
|
else:
|
||||||
@ -2439,10 +2465,12 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning(msg)
|
self._downloader.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
|
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||||
cookie = compat_cookiejar.Cookie(
|
cookie = compat_cookiejar.Cookie(
|
||||||
0, name, value, None, None, domain, None,
|
0, name, value, port, port is not None, domain, True,
|
||||||
None, '/', True, False, expire_time, '', None, None, None)
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
|
discard, None, None, rest)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
|
@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video_params(self, webpage):
|
def _extract_video_params(self, webpage, display_id):
|
||||||
query = {}
|
query = self._parse_json(
|
||||||
params = self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
|
||||||
if params:
|
default='{}'),
|
||||||
query.update({
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
if query:
|
||||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
query['videoId'] = self._search_regex(
|
||||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
|
||||||
})
|
webpage, 'video id', default=None)
|
||||||
else:
|
else:
|
||||||
params = extract_attributes(self._search_regex(
|
params = extract_attributes(self._search_regex(
|
||||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||||
@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
|
|||||||
video_id = params['videoId']
|
video_id = params['videoId']
|
||||||
|
|
||||||
video_info = None
|
video_info = None
|
||||||
|
|
||||||
|
# New API path
|
||||||
|
query = params.copy()
|
||||||
|
query['embedType'] = 'inline'
|
||||||
|
info_page = self._download_json(
|
||||||
|
'http://player.cnevids.com/embed-api.json', video_id,
|
||||||
|
'Downloading embed info', fatal=False, query=query)
|
||||||
|
|
||||||
|
# Old fallbacks
|
||||||
|
if not info_page:
|
||||||
if params.get('playerId'):
|
if params.get('playerId'):
|
||||||
info_page = self._download_json(
|
info_page = self._download_json(
|
||||||
'http://player.cnevids.com/player/video.js',
|
'http://player.cnevids.com/player/video.js', video_id,
|
||||||
video_id, 'Downloading video info', fatal=False, query=params)
|
'Downloading video info', fatal=False, query=params)
|
||||||
if info_page:
|
if info_page:
|
||||||
video_info = info_page.get('video')
|
video_info = info_page.get('video')
|
||||||
if not video_info:
|
if not video_info:
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://player.cnevids.com/player/loader.js',
|
'http://player.cnevids.com/player/loader.js',
|
||||||
video_id, 'Downloading loader info', query=params)
|
video_id, 'Downloading loader info', query=params)
|
||||||
else:
|
if not video_info:
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||||
video_id, 'Downloading inline info', query={
|
video_id, 'Downloading inline info', query={
|
||||||
@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage)
|
params = self._extract_video_params(webpage, display_id)
|
||||||
info = self._search_json_ld(
|
info = self._search_json_ld(
|
||||||
webpage, display_id, fatal=False)
|
webpage, display_id, fatal=False)
|
||||||
info.update(self._extract_video(params))
|
info.update(self._extract_video(params))
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
youtube_url = self._search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
|
||||||
webpage, 'youtube url', default=None)
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
return self.url_result(youtube_url, 'Youtube')
|
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
||||||
|
@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||||
IE_NAME = 'dailymotion:playlist'
|
IE_NAME = 'dailymotion:playlist'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -7,16 +7,18 @@ import time
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
USER_AGENTS,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
|
||||||
remove_end,
|
remove_end,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
USER_AGENTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -183,11 +185,27 @@ class DPlayItIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||||
|
|
||||||
|
video_id = None
|
||||||
|
|
||||||
|
info = self._search_regex(
|
||||||
|
r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
|
||||||
|
webpage, 'playback JSON', default=None)
|
||||||
|
if info:
|
||||||
|
for _ in range(2):
|
||||||
|
info = self._parse_json(info, display_id, fatal=False)
|
||||||
|
if not info:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
video_id = try_get(info, lambda x: x['data']['id'])
|
||||||
|
|
||||||
|
if not info:
|
||||||
info_url = self._search_regex(
|
info_url = self._search_regex(
|
||||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||||
webpage, 'video id')
|
webpage, 'info url')
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
video_id = info_url.rpartition('/')[-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
@ -230,7 +248,7 @@ class DPlayItIE(InfoExtractor):
|
|||||||
season_number = episode_number = upload_date = None
|
season_number = episode_number = upload_date = None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info_url.rpartition('/')[-1],
|
'id': compat_str(video_id or display_id),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
|
@ -39,12 +39,14 @@ from .airmozilla import AirMozillaIE
|
|||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
|
from .americastestkitchen import AmericasTestKitchenIE
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .anysex import AnySexIE
|
from .anysex import AnySexIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
|
from .aliexpress import AliExpressLiveIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
from .appletrailers import (
|
from .appletrailers import (
|
||||||
@ -148,7 +150,10 @@ from .camdemy import (
|
|||||||
from .camwithher import CamWithHerIE
|
from .camwithher import CamWithHerIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .canvas import CanvasIE
|
from .canvas import (
|
||||||
|
CanvasIE,
|
||||||
|
CanvasEenIE,
|
||||||
|
)
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
CarambaTVPageIE,
|
CarambaTVPageIE,
|
||||||
@ -186,8 +191,8 @@ from .chirbit import (
|
|||||||
)
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
from .clipfish import ClipfishIE
|
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
|
from .clippit import ClippitIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .closertotruth import CloserToTruthIE
|
from .closertotruth import CloserToTruthIE
|
||||||
@ -351,7 +356,12 @@ from .flipagram import FlipagramIE
|
|||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
from .footyroom import FootyRoomIE
|
from .footyroom import FootyRoomIE
|
||||||
from .formula1 import Formula1IE
|
from .formula1 import Formula1IE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import (
|
||||||
|
FourTubeIE,
|
||||||
|
PornTubeIE,
|
||||||
|
PornerBrosIE,
|
||||||
|
FuxIE,
|
||||||
|
)
|
||||||
from .fox import FOXIE
|
from .fox import FOXIE
|
||||||
from .fox9 import FOX9IE
|
from .fox9 import FOX9IE
|
||||||
from .foxgay import FoxgayIE
|
from .foxgay import FoxgayIE
|
||||||
@ -476,6 +486,7 @@ from .jove import JoveIE
|
|||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kamcord import KamcordIE
|
from .kamcord import KamcordIE
|
||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
@ -504,6 +515,7 @@ from .la7 import LA7IE
|
|||||||
from .laola1tv import (
|
from .laola1tv import (
|
||||||
Laola1TvEmbedIE,
|
Laola1TvEmbedIE,
|
||||||
Laola1TvIE,
|
Laola1TvIE,
|
||||||
|
ITTFIE,
|
||||||
)
|
)
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
@ -531,7 +543,10 @@ from .limelight import (
|
|||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import (
|
||||||
|
LiveLeakIE,
|
||||||
|
LiveLeakEmbedIE,
|
||||||
|
)
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
LivestreamOriginalIE,
|
LivestreamOriginalIE,
|
||||||
@ -554,10 +569,12 @@ from .mangomolo import (
|
|||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
|
from .manyvids import ManyVidsIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
from .medici import MediciIE
|
from .medici import MediciIE
|
||||||
|
from .megaphone import MegaphoneIE
|
||||||
from .meipai import MeipaiIE
|
from .meipai import MeipaiIE
|
||||||
from .melonvod import MelonVODIE
|
from .melonvod import MelonVODIE
|
||||||
from .meta import METAIE
|
from .meta import METAIE
|
||||||
@ -584,7 +601,6 @@ from .mixcloud import (
|
|||||||
)
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .mpora import MporaIE
|
|
||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
@ -757,6 +773,7 @@ from .ora import OraTVIE
|
|||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
@ -796,6 +813,7 @@ from .polskieradio import (
|
|||||||
PolskieRadioIE,
|
PolskieRadioIE,
|
||||||
PolskieRadioCategoryIE,
|
PolskieRadioCategoryIE,
|
||||||
)
|
)
|
||||||
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
from .pornflip import PornFlipIE
|
from .pornflip import PornFlipIE
|
||||||
@ -841,6 +859,10 @@ from .rai import (
|
|||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import RedBullTVIE
|
from .redbulltv import RedBullTVIE
|
||||||
|
from .reddit import (
|
||||||
|
RedditIE,
|
||||||
|
RedditRIE,
|
||||||
|
)
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .regiotv import RegioTVIE
|
from .regiotv import RegioTVIE
|
||||||
from .rentv import (
|
from .rentv import (
|
||||||
@ -884,6 +906,7 @@ from .rutube import (
|
|||||||
RutubeEmbedIE,
|
RutubeEmbedIE,
|
||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
|
RutubePlaylistIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
@ -934,8 +957,9 @@ from .soundcloud import (
|
|||||||
SoundcloudIE,
|
SoundcloudIE,
|
||||||
SoundcloudSetIE,
|
SoundcloudSetIE,
|
||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
|
SoundcloudTrackStationIE,
|
||||||
SoundcloudPlaylistIE,
|
SoundcloudPlaylistIE,
|
||||||
SoundcloudSearchIE
|
SoundcloudSearchIE,
|
||||||
)
|
)
|
||||||
from .soundgasm import (
|
from .soundgasm import (
|
||||||
SoundgasmIE,
|
SoundgasmIE,
|
||||||
@ -993,7 +1017,6 @@ from .teachertube import (
|
|||||||
)
|
)
|
||||||
from .teachingchannel import TeachingChannelIE
|
from .teachingchannel import TeachingChannelIE
|
||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .teamfourstar import TeamFourStarIE
|
|
||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tele13 import Tele13IE
|
from .tele13 import Tele13IE
|
||||||
@ -1222,6 +1245,7 @@ from .vodlocker import VodlockerIE
|
|||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
from .vodplatform import VODPlatformIE
|
from .vodplatform import VODPlatformIE
|
||||||
from .voicerepublic import VoiceRepublicIE
|
from .voicerepublic import VoiceRepublicIE
|
||||||
|
from .voot import VootIE
|
||||||
from .voxmedia import VoxMediaIE
|
from .voxmedia import VoxMediaIE
|
||||||
from .vporn import VpornIE
|
from .vporn import VpornIE
|
||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
@ -1243,6 +1267,7 @@ from .washingtonpost import (
|
|||||||
WashingtonPostArticleIE,
|
WashingtonPostArticleIE,
|
||||||
)
|
)
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
|
from .watchbox import WatchBoxIE
|
||||||
from .watchindianporn import WatchIndianPornIE
|
from .watchindianporn import WatchIndianPornIE
|
||||||
from .wdr import (
|
from .wdr import (
|
||||||
WDRIE,
|
WDRIE,
|
||||||
@ -1297,6 +1322,7 @@ from .yandexmusic import (
|
|||||||
YandexMusicAlbumIE,
|
YandexMusicAlbumIE,
|
||||||
YandexMusicPlaylistIE,
|
YandexMusicPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .yandexdisk import YandexDiskIE
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
from .yinyuetai import YinYueTaiIE
|
from .yinyuetai import YinYueTaiIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
|
@ -3,39 +3,22 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FourTubeIE(InfoExtractor):
|
class FourTubeBaseIE(InfoExtractor):
|
||||||
IE_NAME = '4tube'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
|
|
||||||
'md5': '6516c8ac63b03de06bc8eac14362db4f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '209733',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
|
|
||||||
'uploader': 'WCP Club',
|
|
||||||
'uploader_id': 'wcp-club',
|
|
||||||
'upload_date': '20131031',
|
|
||||||
'timestamp': 1383263892,
|
|
||||||
'duration': 583,
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'categories': list,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
|
||||||
|
|
||||||
|
if kind == 'm' or not display_id:
|
||||||
|
url = self._URL_TEMPLATE % video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_meta('name', webpage)
|
title = self._html_search_meta('name', webpage)
|
||||||
@ -43,10 +26,10 @@ class FourTubeIE(InfoExtractor):
|
|||||||
'uploadDate', webpage))
|
'uploadDate', webpage))
|
||||||
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
|
||||||
webpage, 'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
categories_html = self._search_regex(
|
categories_html = self._search_regex(
|
||||||
@ -60,10 +43,10 @@ class FourTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
|
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', default=None))
|
||||||
like_count = str_to_int(self._search_regex(
|
like_count = str_to_int(self._search_regex(
|
||||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
|
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', default=None))
|
||||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
|
||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
@ -87,12 +70,12 @@ class FourTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||||
media_id, '+'.join(sources))
|
media_id, '+'.join(sources))
|
||||||
headers = {
|
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
b'Origin': b'https://www.4tube.com',
|
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||||
}
|
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
'Referer': url,
|
||||||
tokens = self._download_json(token_req, video_id)
|
})
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': tokens[format]['token'],
|
'url': tokens[format]['token'],
|
||||||
'format_id': format + 'p',
|
'format_id': format + 'p',
|
||||||
@ -115,3 +98,126 @@ class FourTubeIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FourTubeIE(FourTubeBaseIE):
|
||||||
|
IE_NAME = '4tube'
|
||||||
|
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||||
|
_URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
|
||||||
|
'md5': '6516c8ac63b03de06bc8eac14362db4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '209733',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
|
||||||
|
'uploader': 'WCP Club',
|
||||||
|
'uploader_id': 'wcp-club',
|
||||||
|
'upload_date': '20131031',
|
||||||
|
'timestamp': 1383263892,
|
||||||
|
'duration': 583,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.4tube.com/embed/209733',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class FuxIE(FourTubeBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||||
|
_URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '195359',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Awesome fucking in the kitchen ends with cum swallow',
|
||||||
|
'uploader': 'alenci2342',
|
||||||
|
'uploader_id': 'alenci2342',
|
||||||
|
'upload_date': '20131230',
|
||||||
|
'timestamp': 1388361660,
|
||||||
|
'duration': 289,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.fux.com/embed/195359',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class PornTubeIE(FourTubeBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||||
|
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7089759',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Teen couple doing anal',
|
||||||
|
'uploader': 'Alexy',
|
||||||
|
'uploader_id': 'Alexy',
|
||||||
|
'upload_date': '20150606',
|
||||||
|
'timestamp': 1433595647,
|
||||||
|
'duration': 5052,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.porntube.com/embed/7089759',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class PornerBrosIE(FourTubeBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||||
|
_URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
|
||||||
|
'md5': '6516c8ac63b03de06bc8eac14362db4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '181369',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Skinny brunette takes big cock down her anal hole',
|
||||||
|
'uploader': 'PornerBros HD',
|
||||||
|
'uploader_id': 'pornerbros-hd',
|
||||||
|
'upload_date': '20130130',
|
||||||
|
'timestamp': 1359527401,
|
||||||
|
'duration': 1224,
|
||||||
|
'view_count': int,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornerbros.com/embed/181369',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
@ -3,56 +3,99 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
int_or_none,
|
||||||
update_url_query,
|
parse_age_limit,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FOXIE(AdobePassIE):
|
class FOXIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
# clip
|
||||||
|
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '255180355939',
|
'id': '4b765a60490325103ea69888fb2bd4e8',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Official Trailer: Gotham',
|
'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
|
||||||
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
|
'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
|
||||||
'duration': 129,
|
'duration': 102,
|
||||||
'timestamp': 1400020798,
|
'timestamp': 1504291893,
|
||||||
'upload_date': '20140513',
|
'upload_date': '20170901',
|
||||||
'uploader': 'NEWA-FNG-FOXCOM',
|
'creator': 'FOX',
|
||||||
|
'series': 'Gotham',
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'params': {
|
||||||
}
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted
|
||||||
|
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted, tv provided required
|
||||||
|
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(self._search_regex(
|
video = self._download_json(
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||||
webpage, 'drupal settings'), video_id)
|
video_id, headers={
|
||||||
fox_pdk_player = settings['fox_pdk_player']
|
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||||
release_url = fox_pdk_player['release_url']
|
'Content-Type': 'application/json',
|
||||||
query = {
|
'Referer': url,
|
||||||
'mbr': 'true',
|
|
||||||
'switch': 'http'
|
|
||||||
}
|
|
||||||
if fox_pdk_player.get('access') == 'locked':
|
|
||||||
ap_p = settings['foxAdobePassProvider']
|
|
||||||
rating = ap_p.get('videoRating')
|
|
||||||
if rating == 'n/a':
|
|
||||||
rating = None
|
|
||||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
|
||||||
'id': video_id,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
return info
|
title = video['name']
|
||||||
|
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
video['videoRelease']['url'], video_id)['playURL']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = video.get('description')
|
||||||
|
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||||
|
video.get('duration')) or parse_duration(video.get('duration'))
|
||||||
|
timestamp = unified_timestamp(video.get('datePublished'))
|
||||||
|
age_limit = parse_age_limit(video.get('contentRating'))
|
||||||
|
|
||||||
|
data = try_get(
|
||||||
|
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||||
|
|
||||||
|
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||||
|
|
||||||
|
series = video.get('seriesName') or data.get(
|
||||||
|
'seriesName') or data.get('show')
|
||||||
|
season_number = int_or_none(video.get('seasonNumber'))
|
||||||
|
episode = video.get('name')
|
||||||
|
episode_number = int_or_none(video.get('episodeNumber'))
|
||||||
|
release_year = int_or_none(video.get('releaseYear'))
|
||||||
|
|
||||||
|
if data.get('authRequired'):
|
||||||
|
# TODO: AP
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'creator': creator,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'release_year': release_year,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
@ -22,6 +22,8 @@ from ..utils import (
|
|||||||
HEADRequest,
|
HEADRequest,
|
||||||
is_html,
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -97,6 +99,8 @@ from .washingtonpost import WashingtonPostIE
|
|||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
|
from .megaphone import MegaphoneIE
|
||||||
|
from .vzaar import VzaarIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -574,6 +578,19 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'movie expired',
|
'skip': 'movie expired',
|
||||||
},
|
},
|
||||||
|
# ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
|
||||||
|
{
|
||||||
|
'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Steampunk Fest Comes to Honesdale',
|
||||||
|
'duration': 43.276,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# embed.ly video
|
# embed.ly video
|
||||||
{
|
{
|
||||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||||
@ -1115,6 +1132,35 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Video.js embed, multiple formats
|
||||||
|
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yygqldloqIk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'SolidWorks. Урок 6 Настройка чертежа',
|
||||||
|
'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
|
||||||
|
'upload_date': '20130314',
|
||||||
|
'uploader': 'PROстое3D',
|
||||||
|
'uploader_id': 'PROstoe3D',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Video.js embed, single format
|
||||||
|
'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'watch',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Step 1 - Good Foundation',
|
||||||
|
'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# rtl.nl embed
|
# rtl.nl embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
@ -1504,14 +1550,27 @@ class GenericIE(InfoExtractor):
|
|||||||
# LiveLeak embed
|
# LiveLeak embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.wykop.pl/link/3088787/',
|
'url': 'http://www.wykop.pl/link/3088787/',
|
||||||
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
|
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '874_1459135191',
|
'id': '874_1459135191',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Man shows poor quality of new apartment building',
|
'title': 'Man shows poor quality of new apartment building',
|
||||||
'description': 'The wall is like a sand pile.',
|
'description': 'The wall is like a sand pile.',
|
||||||
'uploader': 'Lake8737',
|
'uploader': 'Lake8737',
|
||||||
}
|
},
|
||||||
|
'add_ie': [LiveLeakIE.ie_key()],
|
||||||
|
},
|
||||||
|
# Another LiveLeak embed pattern (#13336)
|
||||||
|
{
|
||||||
|
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2eb_1496309988',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Thief robs place where everyone was armed',
|
||||||
|
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
||||||
|
'uploader': 'brazilwtf',
|
||||||
|
},
|
||||||
|
'add_ie': [LiveLeakIE.ie_key()],
|
||||||
},
|
},
|
||||||
# Duplicated embedded video URLs
|
# Duplicated embedded video URLs
|
||||||
{
|
{
|
||||||
@ -1770,6 +1829,21 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 5,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Limelight embed (LimelightPlayerUtil.embed)
|
||||||
|
'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '07448641',
|
||||||
|
'timestamp': 1499890639,
|
||||||
|
'upload_date': '20170712',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['LimelightMedia'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
|
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -1826,6 +1900,25 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
|
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# vzaar embed
|
||||||
|
'url': 'http://help.vzaar.com/article/165-embedding-video',
|
||||||
|
'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8707641',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Building A Business Online: Principal Chairs Q & A',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# multiple HTML5 videos on one page
|
||||||
|
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'keyscenarios',
|
||||||
|
'title': 'Rescue Kit 14 Free Edition - Getting started',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@ -1975,7 +2068,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
if head_response is not False:
|
if head_response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = head_response.geturl()
|
new_url = compat_str(head_response.geturl())
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
if force_videoid:
|
if force_videoid:
|
||||||
@ -2076,7 +2169,7 @@ class GenericIE(InfoExtractor):
|
|||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc, video_id,
|
doc, video_id,
|
||||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
@ -2190,36 +2283,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if vid_me_embed_url is not None:
|
if vid_me_embed_url is not None:
|
||||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for YouTube embeds
|
||||||
matches = re.findall(r'''(?x)
|
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||||
(?:
|
if youtube_urls:
|
||||||
<iframe[^>]+?src=|
|
|
||||||
data-video-url=|
|
|
||||||
<embed[^>]+?src=|
|
|
||||||
embedSWF\(?:\s*|
|
|
||||||
<object[^>]+data=|
|
|
||||||
new\s+SWFObject\(
|
|
||||||
)
|
|
||||||
(["\'])
|
|
||||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
|
||||||
(?:embed|v|p)/.+?)
|
|
||||||
\1''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
# Look for lazyYT YouTube embed
|
|
||||||
matches = re.findall(
|
|
||||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
|
||||||
|
|
||||||
# Look for Wordpress "YouTube Video Importer" plugin
|
|
||||||
matches = re.findall(r'''(?x)<div[^>]+
|
|
||||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
|
||||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
|
||||||
|
|
||||||
matches = DailymotionIE._extract_urls(webpage)
|
matches = DailymotionIE._extract_urls(webpage)
|
||||||
if matches:
|
if matches:
|
||||||
@ -2292,6 +2360,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
|
re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@ -2716,9 +2785,9 @@ class GenericIE(InfoExtractor):
|
|||||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||||
|
|
||||||
# Look for LiveLeak embeds
|
# Look for LiveLeak embeds
|
||||||
liveleak_url = LiveLeakIE._extract_url(webpage)
|
liveleak_urls = LiveLeakIE._extract_urls(webpage)
|
||||||
if liveleak_url:
|
if liveleak_urls:
|
||||||
return self.url_result(liveleak_url, 'LiveLeak')
|
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for 3Q SDN embeds
|
# Look for 3Q SDN embeds
|
||||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||||
@ -2790,6 +2859,18 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
joj_urls, video_id, video_title, ie=JojIE.ie_key())
|
joj_urls, video_id, video_title, ie=JojIE.ie_key())
|
||||||
|
|
||||||
|
# Look for megaphone.fm embeds
|
||||||
|
mpfn_urls = MegaphoneIE._extract_urls(webpage)
|
||||||
|
if mpfn_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
|
||||||
|
|
||||||
|
# Look for vzaar embeds
|
||||||
|
vzaar_urls = VzaarIE._extract_urls(webpage)
|
||||||
|
if vzaar_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
|
||||||
|
|
||||||
def merge_dicts(dict1, dict2):
|
def merge_dicts(dict1, dict2):
|
||||||
merged = {}
|
merged = {}
|
||||||
for k, v in dict1.items():
|
for k, v in dict1.items():
|
||||||
@ -2805,22 +2886,23 @@ class GenericIE(InfoExtractor):
|
|||||||
merged[k] = v
|
merged[k] = v
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
|
||||||
json_ld = self._search_json_ld(
|
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
|
||||||
if json_ld.get('url'):
|
|
||||||
return merge_dicts(json_ld, info_dict)
|
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
for entry in entries:
|
if len(entries) == 1:
|
||||||
entry.update({
|
entries[0].update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
for num, entry in enumerate(entries, start=1):
|
||||||
|
entry.update({
|
||||||
|
'id': '%s-%s' % (video_id, num),
|
||||||
|
'title': '%s (%d)' % (video_title, num),
|
||||||
|
})
|
||||||
|
for entry in entries:
|
||||||
self._sort_formats(entry['formats'])
|
self._sort_formats(entry['formats'])
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries, video_id, video_title)
|
||||||
|
|
||||||
jwplayer_data = self._find_jwplayer_data(
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
webpage, video_id, transform_source=js_to_json)
|
webpage, video_id, transform_source=js_to_json)
|
||||||
@ -2829,6 +2911,52 @@ class GenericIE(InfoExtractor):
|
|||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
return merge_dicts(info, info_dict)
|
return merge_dicts(info, info_dict)
|
||||||
|
|
||||||
|
# Video.js embed
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
sources = self._parse_json(
|
||||||
|
mobj.group(1), video_id, transform_source=js_to_json,
|
||||||
|
fatal=False) or []
|
||||||
|
if not isinstance(sources, list):
|
||||||
|
sources = [sources]
|
||||||
|
formats = []
|
||||||
|
for source in sources:
|
||||||
|
src = source.get('src')
|
||||||
|
if not src or not isinstance(src, compat_str):
|
||||||
|
continue
|
||||||
|
src = compat_urlparse.urljoin(url, src)
|
||||||
|
src_type = source.get('type')
|
||||||
|
if isinstance(src_type, compat_str):
|
||||||
|
src_type = src_type.lower()
|
||||||
|
ext = determine_ext(src).lower()
|
||||||
|
if src_type == 'video/youtube':
|
||||||
|
return self.url_result(src, YoutubeIE.ie_key())
|
||||||
|
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
src, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': src,
|
||||||
|
'ext': (mimetype2ext(src_type) or
|
||||||
|
ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||||
|
})
|
||||||
|
if formats:
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info_dict['formats'] = formats
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
# Looking for http://schema.org/VideoObject
|
||||||
|
json_ld = self._search_json_ld(
|
||||||
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
if json_ld.get('url'):
|
||||||
|
return merge_dicts(json_ld, info_dict)
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
@ -2916,7 +3044,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# be supported by youtube-dl thus this is checked the very last (see
|
# be supported by youtube-dl thus this is checked the very last (see
|
||||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||||
if embed_url:
|
if embed_url and embed_url != url:
|
||||||
return self.url_result(embed_url)
|
return self.url_result(embed_url)
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GfycatIE(InfoExtractor):
|
class GfycatIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
|
|||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -4,26 +4,61 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GoogleDriveIE(InfoExtractor):
|
class GoogleDriveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:docs|drive)\.google\.com/
|
||||||
|
(?:
|
||||||
|
(?:uc|open)\?.*?id=|
|
||||||
|
file/d/
|
||||||
|
)|
|
||||||
|
video\.google\.com/get_player\?.*?docid=
|
||||||
|
)
|
||||||
|
(?P<id>[a-zA-Z0-9_-]{28,})
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
'md5': '5c602afbbf2c1db91831f5d82f678554',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 45,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
|
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
|
||||||
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
|
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||||
|
'duration': 189,
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
@ -44,6 +79,13 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
'46': 'webm',
|
'46': 'webm',
|
||||||
'59': 'mp4',
|
'59': 'mp4',
|
||||||
}
|
}
|
||||||
|
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||||
|
_CAPTIONS_ENTRY_TAG = {
|
||||||
|
'subtitles': 'track',
|
||||||
|
'automatic_captions': 'target',
|
||||||
|
}
|
||||||
|
_caption_formats_ext = []
|
||||||
|
_captions_xml = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
@ -53,22 +95,100 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
|
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
|
||||||
|
|
||||||
|
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
|
||||||
|
if self._captions_xml:
|
||||||
|
return
|
||||||
|
self._captions_xml = self._download_xml(
|
||||||
|
self._BASE_URL_CAPTIONS, video_id, query={
|
||||||
|
'id': video_id,
|
||||||
|
'vid': subtitles_id,
|
||||||
|
'hl': hl,
|
||||||
|
'v': video_id,
|
||||||
|
'type': 'list',
|
||||||
|
'tlangs': '1',
|
||||||
|
'fmts': '1',
|
||||||
|
'vssids': '1',
|
||||||
|
}, note='Downloading subtitles XML',
|
||||||
|
errnote='Unable to download subtitles XML', fatal=False)
|
||||||
|
if self._captions_xml:
|
||||||
|
for f in self._captions_xml.findall('format'):
|
||||||
|
if f.attrib.get('fmt_code') and not f.attrib.get('default'):
|
||||||
|
self._caption_formats_ext.append(f.attrib['fmt_code'])
|
||||||
|
|
||||||
|
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
|
||||||
|
origin_lang_code=None):
|
||||||
|
if not subtitles_id or not caption_type:
|
||||||
|
return
|
||||||
|
captions = {}
|
||||||
|
for caption_entry in self._captions_xml.findall(
|
||||||
|
self._CAPTIONS_ENTRY_TAG[caption_type]):
|
||||||
|
caption_lang_code = caption_entry.attrib.get('lang_code')
|
||||||
|
if not caption_lang_code:
|
||||||
|
continue
|
||||||
|
caption_format_data = []
|
||||||
|
for caption_format in self._caption_formats_ext:
|
||||||
|
query = {
|
||||||
|
'vid': subtitles_id,
|
||||||
|
'v': video_id,
|
||||||
|
'fmt': caption_format,
|
||||||
|
'lang': (caption_lang_code if origin_lang_code is None
|
||||||
|
else origin_lang_code),
|
||||||
|
'type': 'track',
|
||||||
|
'name': '',
|
||||||
|
'kind': '',
|
||||||
|
}
|
||||||
|
if origin_lang_code is not None:
|
||||||
|
query.update({'tlang': caption_lang_code})
|
||||||
|
caption_format_data.append({
|
||||||
|
'url': update_url_query(self._BASE_URL_CAPTIONS, query),
|
||||||
|
'ext': caption_format,
|
||||||
|
})
|
||||||
|
captions[caption_lang_code] = caption_format_data
|
||||||
|
return captions
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, subtitles_id, hl):
|
||||||
|
if not subtitles_id or not hl:
|
||||||
|
return
|
||||||
|
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||||
|
if not self._captions_xml:
|
||||||
|
return
|
||||||
|
return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
|
||||||
|
|
||||||
|
def _get_automatic_captions(self, video_id, subtitles_id, hl):
|
||||||
|
if not subtitles_id or not hl:
|
||||||
|
return
|
||||||
|
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||||
|
if not self._captions_xml:
|
||||||
|
return
|
||||||
|
track = self._captions_xml.find('track')
|
||||||
|
if track is None:
|
||||||
|
return
|
||||||
|
origin_lang_code = track.attrib.get('lang_code')
|
||||||
|
if not origin_lang_code:
|
||||||
|
return
|
||||||
|
return self._get_captions_by_type(
|
||||||
|
video_id, subtitles_id, 'automatic_captions', origin_lang_code)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
title = self._search_regex(
|
||||||
if reason:
|
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||||
raise ExtractorError(reason)
|
default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
|
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
|
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||||
fmt_stream_map = self._search_regex(
|
default=None))
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
|
||||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
fmt_stream_map = self._search_regex(
|
||||||
|
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||||
|
'fmt stream map', default='').split(',')
|
||||||
|
fmt_list = self._search_regex(
|
||||||
|
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||||
|
'fmt_list', default='').split(',')
|
||||||
|
if fmt_stream_map and fmt_list:
|
||||||
resolutions = {}
|
resolutions = {}
|
||||||
for fmt in fmt_list:
|
for fmt in fmt_list:
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -77,7 +197,6 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
resolutions[mobj.group('format_id')] = (
|
resolutions[mobj.group('format_id')] = (
|
||||||
int(mobj.group('width')), int(mobj.group('height')))
|
int(mobj.group('width')), int(mobj.group('height')))
|
||||||
|
|
||||||
formats = []
|
|
||||||
for fmt_stream in fmt_stream_map:
|
for fmt_stream in fmt_stream_map:
|
||||||
fmt_stream_split = fmt_stream.split('|')
|
fmt_stream_split = fmt_stream.split('|')
|
||||||
if len(fmt_stream_split) < 2:
|
if len(fmt_stream_split) < 2:
|
||||||
@ -95,12 +214,64 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
'height': resolution[1],
|
'height': resolution[1],
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
source_url = update_url_query(
|
||||||
|
'https://drive.google.com/uc', {
|
||||||
|
'id': video_id,
|
||||||
|
'export': 'download',
|
||||||
|
})
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
source_url, video_id, note='Requesting source file',
|
||||||
|
errnote='Unable to request source file', fatal=False)
|
||||||
|
if urlh:
|
||||||
|
def add_source_format(src_url):
|
||||||
|
formats.append({
|
||||||
|
'url': src_url,
|
||||||
|
'ext': determine_ext(title, 'mp4').lower(),
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
|
})
|
||||||
|
if urlh.headers.get('Content-Disposition'):
|
||||||
|
add_source_format(source_url)
|
||||||
|
else:
|
||||||
|
confirmation_webpage = self._webpage_read_content(
|
||||||
|
urlh, url, video_id, note='Downloading confirmation page',
|
||||||
|
errnote='Unable to confirm download', fatal=False)
|
||||||
|
if confirmation_webpage:
|
||||||
|
confirm = self._search_regex(
|
||||||
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
|
'confirmation code', fatal=False)
|
||||||
|
if confirm:
|
||||||
|
add_source_format(update_url_query(source_url, {
|
||||||
|
'confirm': confirm,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
reason = self._search_regex(
|
||||||
|
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
|
if reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
hl = self._search_regex(
|
||||||
|
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
||||||
|
subtitles_id = None
|
||||||
|
ttsurl = self._search_regex(
|
||||||
|
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
||||||
|
if ttsurl:
|
||||||
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
|
# query string
|
||||||
|
subtitles_id = ttsurl.encode('utf-8').decode(
|
||||||
|
'unicode_escape').split('=')[-1]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
video_id, subtitles_id, hl),
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor):
|
|||||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||||
'thumbnail': r're:^https?://.*/gallery/$',
|
'thumbnail': r're:^https?://.*/gallery/$',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# YouTube embed
|
||||||
|
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||||
|
'md5': 'e403d2b43fea8e405e88e3f8623909f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6kmWbXleKW4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NEU IM SEPTEMBER | Netflix',
|
||||||
|
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
|
||||||
|
'upload_date': '20170830',
|
||||||
|
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
|
||||||
|
'uploader_id': 'netflixdach',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||||
|
if not title or title == "c't":
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||||
|
if yt_urls:
|
||||||
|
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
container_id = self._search_regex(
|
container_id = self._search_regex(
|
||||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||||
webpage, 'container ID')
|
webpage, 'container ID')
|
||||||
@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor):
|
|||||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||||
webpage, 'sequenz ID')
|
webpage, 'sequenz ID')
|
||||||
|
|
||||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
|
||||||
if not title or title == "c't":
|
|
||||||
title = self._search_regex(
|
|
||||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
'http://www.heise.de/videout/feed', video_id, query={
|
'http://www.heise.de/videout/feed', video_id, query={
|
||||||
'container': container_id,
|
'container': container_id,
|
||||||
|
149
youtube_dl/extractor/kakao.py
Normal file
149
youtube_dl/extractor/kakao.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KakaoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
||||||
|
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||||
|
'md5': '702b2fbdeb51ad82f5c904e8c0766340',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '301965083',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||||
|
'uploader_id': 2671005,
|
||||||
|
'uploader': '그랑그랑이',
|
||||||
|
'timestamp': 1488160199,
|
||||||
|
'upload_date': '20170227',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
|
||||||
|
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '300103180',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||||
|
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||||
|
'uploader_id': 2653210,
|
||||||
|
'uploader': '쇼 음악중심',
|
||||||
|
'timestamp': 1485684628,
|
||||||
|
'upload_date': '20170129',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
player_header = {
|
||||||
|
'Referer': update_url_query(
|
||||||
|
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'autoplay': '1',
|
||||||
|
'profile': 'HIGH',
|
||||||
|
'wmode': 'transparent',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
QUERY_COMMON = {
|
||||||
|
'player': 'monet_html5',
|
||||||
|
'referer': url,
|
||||||
|
'uuid': '',
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'section': '',
|
||||||
|
'dteType': 'PC',
|
||||||
|
}
|
||||||
|
|
||||||
|
query = QUERY_COMMON.copy()
|
||||||
|
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
||||||
|
impress = self._download_json(
|
||||||
|
'%s/%s/impress' % (self._API_BASE, video_id),
|
||||||
|
video_id, 'Downloading video info',
|
||||||
|
query=query, headers=player_header)
|
||||||
|
|
||||||
|
clip_link = impress['clipLink']
|
||||||
|
clip = clip_link['clip']
|
||||||
|
|
||||||
|
title = clip.get('title') or clip_link.get('displayTitle')
|
||||||
|
|
||||||
|
tid = impress.get('tid', '')
|
||||||
|
|
||||||
|
query = QUERY_COMMON.copy()
|
||||||
|
query.update({
|
||||||
|
'tid': tid,
|
||||||
|
'profile': 'HIGH',
|
||||||
|
})
|
||||||
|
raw = self._download_json(
|
||||||
|
'%s/%s/raw' % (self._API_BASE, video_id),
|
||||||
|
video_id, 'Downloading video formats info',
|
||||||
|
query=query, headers=player_header)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in raw.get('outputList', []):
|
||||||
|
try:
|
||||||
|
profile_name = fmt['profile']
|
||||||
|
fmt_url_json = self._download_json(
|
||||||
|
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
||||||
|
video_id,
|
||||||
|
'Downloading video URL for profile %s' % profile_name,
|
||||||
|
query={
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'section': '',
|
||||||
|
'tid': tid,
|
||||||
|
'profile': profile_name
|
||||||
|
}, headers=player_header, fatal=False)
|
||||||
|
|
||||||
|
if fmt_url_json is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fmt_url = fmt_url_json['url']
|
||||||
|
formats.append({
|
||||||
|
'url': fmt_url,
|
||||||
|
'format_id': profile_name,
|
||||||
|
'width': int_or_none(fmt.get('width')),
|
||||||
|
'height': int_or_none(fmt.get('height')),
|
||||||
|
'format_note': fmt.get('label'),
|
||||||
|
'filesize': int_or_none(fmt.get('filesize'))
|
||||||
|
})
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbs = []
|
||||||
|
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||||
|
thumbs.append({
|
||||||
|
'url': thumb.get('thumbnailUrl'),
|
||||||
|
'id': compat_str(thumb.get('timeInSec')),
|
||||||
|
'preference': -1 if thumb.get('isDefault') else 0
|
||||||
|
})
|
||||||
|
top_thumbnail = clip.get('thumbnailUrl')
|
||||||
|
if top_thumbnail:
|
||||||
|
thumbs.append({
|
||||||
|
'url': top_thumbnail,
|
||||||
|
'preference': 10,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': clip.get('description'),
|
||||||
|
'uploader': clip_link.get('channel', {}).get('name'),
|
||||||
|
'uploader_id': clip_link.get('channelId'),
|
||||||
|
'thumbnails': thumbs,
|
||||||
|
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||||
|
'duration': int_or_none(clip.get('duration')),
|
||||||
|
'view_count': int_or_none(clip.get('playCount')),
|
||||||
|
'like_count': int_or_none(clip.get('likeCount')),
|
||||||
|
'comment_count': int_or_none(clip.get('commentCount')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .canvas import CanvasIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||||
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
|
'md5': '6bdeb65998930251bbd1c510750edba9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zomerse-filmpjes',
|
'id': 'zomerse-filmpjes',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
|
|||||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# mzid in playerConfig instead of sources
|
||||||
|
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
||||||
|
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
|
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Nachtwacht: De Greystook',
|
||||||
|
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1468.03,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
|
|||||||
'player config'),
|
'player config'),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
mzid = config.get('mzid')
|
||||||
|
if mzid:
|
||||||
|
return self.url_result(
|
||||||
|
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
||||||
|
CanvasIE.ie_key(), video_id=mzid)
|
||||||
|
|
||||||
title = config['title']
|
title = config['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ITTFIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(
|
||||||
|
update_url_query('https://www.laola1.tv/titanplayer.php', {
|
||||||
|
'videoid': self._match_id(url),
|
||||||
|
'type': 'V',
|
||||||
|
'lang': 'en',
|
||||||
|
'portal': 'int',
|
||||||
|
'customer': 1024,
|
||||||
|
}), Laola1TvEmbedIE.ie_key())
|
||||||
|
@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
'Channel': 'channel',
|
'Channel': 'channel',
|
||||||
'ChannelList': 'channel_list',
|
'ChannelList': 'channel_list',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def smuggle(url):
|
||||||
|
return smuggle_url(url, {'source_url': source_url})
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for kind, video_id in re.findall(
|
for kind, video_id in re.findall(
|
||||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||||
webpage):
|
webpage):
|
||||||
entries.append(cls.url_result(
|
entries.append(cls.url_result(
|
||||||
smuggle_url(
|
smuggle('limelight:%s:%s' % (lm[kind], video_id)),
|
||||||
'limelight:%s:%s' % (lm[kind], video_id),
|
|
||||||
{'source_url': source_url}),
|
|
||||||
'Limelight%s' % kind, video_id))
|
'Limelight%s' % kind, video_id))
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
# As per [1] class attribute should be exactly equal to
|
# As per [1] class attribute should be exactly equal to
|
||||||
@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
''', webpage):
|
''', webpage):
|
||||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||||
entries.append(cls.url_result(
|
entries.append(cls.url_result(
|
||||||
smuggle_url(
|
smuggle('limelight:%s:%s' % (kind, video_id)),
|
||||||
'limelight:%s:%s' % (kind, video_id),
|
|
||||||
{'source_url': source_url}),
|
|
||||||
'Limelight%s' % kind.capitalize(), video_id))
|
'Limelight%s' % kind.capitalize(), video_id))
|
||||||
|
# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
|
||||||
|
webpage):
|
||||||
|
entries.append(cls.url_result(
|
||||||
|
smuggle('limelight:media:%s' % video_id),
|
||||||
|
LimelightMediaIE.ie_key(), video_id))
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||||
|
@ -72,15 +72,20 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '677_1439397581',
|
||||||
|
'title': 'Fuel Depot in China Explosion caught on video',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_urls(webpage):
|
||||||
mobj = re.search(
|
return re.findall(
|
||||||
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
|
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj:
|
|
||||||
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -111,8 +116,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
info_dict = entries[0]
|
for idx, info_dict in enumerate(entries):
|
||||||
|
|
||||||
for a_format in info_dict['formats']:
|
for a_format in info_dict['formats']:
|
||||||
if not a_format.get('height'):
|
if not a_format.get('height'):
|
||||||
a_format['height'] = int_or_none(self._search_regex(
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
@ -121,8 +125,13 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
|
|
||||||
|
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||||
|
if len(entries) > 1:
|
||||||
|
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
||||||
|
else:
|
||||||
|
info_dict['id'] = video_id
|
||||||
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
@ -130,4 +139,31 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
})
|
})
|
||||||
|
|
||||||
return info_dict
|
return self.playlist_result(entries, video_id, video_title)
|
||||||
|
|
||||||
|
|
||||||
|
class LiveLeakEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
|
||||||
|
|
||||||
|
# See generic.py for actual test cases
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
kind, video_id = mobj.group('kind', 'id')
|
||||||
|
|
||||||
|
if kind == 'f':
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
liveleak_url = self._search_regex(
|
||||||
|
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||||
|
webpage, 'LiveLeak URL', group='url')
|
||||||
|
elif kind == 'i':
|
||||||
|
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
|
||||||
|
|
||||||
|
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||||
|
@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor):
|
|||||||
class LyndaIE(LyndaBaseIE):
|
class LyndaIE(LyndaBaseIE):
|
||||||
IE_NAME = 'lynda'
|
IE_NAME = 'lynda'
|
||||||
IE_DESC = 'lynda.com videos'
|
IE_DESC = 'lynda.com videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||||
|
|
||||||
@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _raise_unavailable(self, video_id):
|
def _raise_unavailable(self, video_id):
|
||||||
@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
|
|
||||||
# Course link equals to welcome/introduction video link of same course
|
# Course link equals to welcome/introduction video link of same course
|
||||||
# We will recognize it as course link
|
# We will recognize it as course link
|
||||||
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
48
youtube_dl/extractor/manyvids.py
Normal file
48
youtube_dl/extractor/manyvids.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class ManyVidsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||||
|
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '133957',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'everthing about me (Preview)',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'video URL', group='url')
|
||||||
|
|
||||||
|
title = '%s (Preview)' % self._html_search_regex(
|
||||||
|
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
||||||
|
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||||
|
'view count', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'formats': [{
|
||||||
|
'url': video_url,
|
||||||
|
}],
|
||||||
|
}
|
55
youtube_dl/extractor/megaphone.py
Normal file
55
youtube_dl/extractor/megaphone.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import js_to_json
|
||||||
|
|
||||||
|
|
||||||
|
class MegaphoneIE(InfoExtractor):
|
||||||
|
IE_NAME = 'megaphone.fm'
|
||||||
|
IE_DESC = 'megaphone.fm embedded players'
|
||||||
|
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://player.megaphone.fm/GLT9749789991?"',
|
||||||
|
'md5': '4816a0de523eb3e972dc0dda2c191f96',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GLT9749789991',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '#97 What Kind Of Idiot Gets Phished?',
|
||||||
|
'thumbnail': 're:^https://.*\.png.*$',
|
||||||
|
'duration': 1776.26375,
|
||||||
|
'author': 'Reply All',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_property('audio:title', webpage)
|
||||||
|
author = self._og_search_property('audio:artist', webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
|
||||||
|
episode_data = self._parse_json(episode_json, video_id, js_to_json)
|
||||||
|
video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'author': author,
|
||||||
|
'duration': episode_data['duration'],
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_urls(cls, webpage):
|
||||||
|
return [m[0] for m in re.findall(
|
||||||
|
r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
|
@ -9,14 +9,19 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_zip
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -53,16 +58,12 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decrypt_play_info(play_info):
|
def _decrypt_xor_cipher(key, ciphertext):
|
||||||
KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
|
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||||
|
|
||||||
play_info = base64.b64decode(play_info.encode('ascii'))
|
|
||||||
|
|
||||||
return ''.join([
|
return ''.join([
|
||||||
compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
|
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||||
for idx, ch in enumerate(play_info)])
|
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -72,19 +73,66 @@ class MixcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
|
# Legacy path
|
||||||
|
encrypted_play_info = self._search_regex(
|
||||||
|
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
|
||||||
|
|
||||||
|
if encrypted_play_info is not None:
|
||||||
|
# Decode
|
||||||
|
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||||
|
else:
|
||||||
|
# New path
|
||||||
|
full_info_json = self._parse_json(self._html_search_regex(
|
||||||
|
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
||||||
|
webpage, 'play info'), 'play info')
|
||||||
|
for item in full_info_json:
|
||||||
|
item_data = try_get(
|
||||||
|
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||||
|
dict)
|
||||||
|
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
||||||
|
info_json = item_data
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Failed to extract matching stream info')
|
||||||
|
|
||||||
message = self._html_search_regex(
|
message = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||||
webpage, 'error message', default=None)
|
webpage, 'error message', default=None)
|
||||||
|
|
||||||
encrypted_play_info = self._search_regex(
|
js_url = self._search_regex(
|
||||||
r'm-play-info="([^"]+)"', webpage, 'play info')
|
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
|
||||||
play_info = self._parse_json(
|
webpage, 'js url')
|
||||||
self._decrypt_play_info(encrypted_play_info), track_id)
|
js = self._download_webpage(js_url, track_id, 'Downloading JS')
|
||||||
|
# Known plaintext attack
|
||||||
|
if encrypted_play_info:
|
||||||
|
kps = ['{"stream_url":']
|
||||||
|
kpa_target = encrypted_play_info
|
||||||
|
else:
|
||||||
|
kps = ['https://', 'http://']
|
||||||
|
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||||
|
for kp in kps:
|
||||||
|
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||||
|
for quote in ["'", '"']:
|
||||||
|
key = self._search_regex(
|
||||||
|
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
|
||||||
|
js, 'encryption key', default=None)
|
||||||
|
if key is not None:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Failed to extract encryption key')
|
||||||
|
|
||||||
|
if encrypted_play_info is not None:
|
||||||
|
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
|
||||||
if message and 'stream_url' not in play_info:
|
if message and 'stream_url' not in play_info:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
|
||||||
song_url = play_info['stream_url']
|
song_url = play_info['stream_url']
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'normal',
|
||||||
|
'url': song_url
|
||||||
|
}]
|
||||||
|
|
||||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||||
@ -100,10 +148,44 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||||
webpage, 'play count', default=None))
|
webpage, 'play count', default=None))
|
||||||
|
|
||||||
|
else:
|
||||||
|
title = info_json['name']
|
||||||
|
thumbnail = urljoin(
|
||||||
|
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
|
||||||
|
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
|
||||||
|
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
|
||||||
|
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
|
||||||
|
description = try_get(info_json, lambda x: x['description'])
|
||||||
|
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
|
||||||
|
|
||||||
|
stream_info = info_json['streamInfo']
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||||
|
format_url = stream_info.get(url_key)
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||||
|
if not decrypted:
|
||||||
|
continue
|
||||||
|
if url_key == 'hlsUrl':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif url_key == 'dashUrl':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
decrypted, track_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': decrypted,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': song_url,
|
'formats': formats,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -15,7 +15,7 @@ class MLBIE(InfoExtractor):
|
|||||||
(?:[\da-z_-]+\.)*mlb\.com/
|
(?:[\da-z_-]+\.)*mlb\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
|
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
|
||||||
(?:
|
(?:
|
||||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||||
@ -84,7 +84,7 @@ class MLBIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
||||||
'md5': 'b190e70141fb9a1552a85426b4da1b5d',
|
'md5': 'aafaf5b0186fee8f32f20508092f8111',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '75609783',
|
'id': '75609783',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -94,6 +94,10 @@ class MLBIE(InfoExtractor):
|
|||||||
'upload_date': '20150415',
|
'upload_date': '20150415',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
|
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -8,8 +8,8 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class MorningstarIE(InfoExtractor):
|
class MorningstarIE(InfoExtractor):
|
||||||
IE_DESC = 'morningstar.com'
|
IE_DESC = 'morningstar.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor):
|
|||||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -1,62 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none
|
|
||||||
|
|
||||||
|
|
||||||
class MporaIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
|
||||||
IE_NAME = 'MPORA'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
|
|
||||||
'md5': 'a7a228473eedd3be741397cf452932eb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'AAdo8okx4wiz',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Katy Curd - Winter in the Forest',
|
|
||||||
'duration': 416,
|
|
||||||
'uploader': 'Peter Newman Media',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
data_json = self._search_regex(
|
|
||||||
[r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
|
|
||||||
r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
|
|
||||||
webpage, 'json')
|
|
||||||
data = self._parse_json(data_json, video_id)
|
|
||||||
|
|
||||||
uploader = data['info_overlay'].get('username')
|
|
||||||
duration = data['video']['duration'] // 1000
|
|
||||||
thumbnail = data['video']['encodings']['sd']['poster']
|
|
||||||
title = data['info_overlay']['title']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for encoding_id, edata in data['video']['encodings'].items():
|
|
||||||
for src in edata['sources']:
|
|
||||||
width_str = self._search_regex(
|
|
||||||
r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
|
|
||||||
False, default=None)
|
|
||||||
vcodec = src['type'].partition('/')[2]
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': encoding_id + '-' + vcodec,
|
|
||||||
'url': src['src'],
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'width': int_or_none(width_str),
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': uploader,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NBCIE(AdobePassIE):
|
class NBCIE(AdobePassIE):
|
||||||
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
permalink = 'http' + permalink
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://api.nbc.com/v3/videos', video_id, query={
|
'https://api.nbc.com/v3/videos', video_id, query={
|
||||||
'filter[permalink]': permalink,
|
'filter[permalink]': permalink,
|
||||||
@ -109,10 +110,10 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9CsDKds0kvHI',
|
'id': '9CsDKds0kvHI',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||||
'timestamp': 1426270238,
|
'timestamp': 1426270238,
|
||||||
@ -120,7 +121,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
'uploader': 'NBCU-SPORTS',
|
'uploader': 'NBCU-SPORTS',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -134,7 +135,8 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
theplatform_url = self._og_search_video_url(webpage)
|
theplatform_url = self._og_search_video_url(webpage).replace(
|
||||||
|
'vplayer.nbcsports.com', 'player.theplatform.com')
|
||||||
return self.url_result(theplatform_url, 'ThePlatform')
|
return self.url_result(theplatform_url, 'ThePlatform')
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
class NickDeIE(MTVServicesInfoExtractor):
|
class NickDeIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'nick.de'
|
IE_NAME = 'nick.de'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
|
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -88,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
|
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_mrss_url(self, webpage, host):
|
def _extract_mrss_url(self, webpage, host):
|
||||||
|
@ -11,10 +11,15 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
@ -31,12 +36,15 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny',
|
'title': 'Big Buck Bunny',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
'uploader': 'takuya0301',
|
'uploader': 'takuya0301',
|
||||||
'uploader_id': '2698420',
|
'uploader_id': '2698420',
|
||||||
'upload_date': '20131123',
|
'upload_date': '20131123',
|
||||||
'timestamp': 1385182762,
|
'timestamp': 1385182762,
|
||||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
'duration': 33,
|
'duration': 33,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Requires an account',
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
@ -48,6 +56,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'ext': 'swf',
|
'ext': 'swf',
|
||||||
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||||
'description': 'md5:689f066d74610b3b22e0f1739add0f58',
|
'description': 'md5:689f066d74610b3b22e0f1739add0f58',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
'uploader': 'りょうた',
|
'uploader': 'りょうた',
|
||||||
'uploader_id': '18822557',
|
'uploader_id': '18822557',
|
||||||
'upload_date': '20110429',
|
'upload_date': '20110429',
|
||||||
@ -64,9 +73,11 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'ext': 'unknown_video',
|
'ext': 'unknown_video',
|
||||||
'description': 'deleted',
|
'description': 'deleted',
|
||||||
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
'upload_date': '20071224',
|
'upload_date': '20071224',
|
||||||
'timestamp': int, # timestamp field has different value if logged in
|
'timestamp': int, # timestamp field has different value if logged in
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Requires an account',
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
@ -76,12 +87,51 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
|
'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
|
||||||
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
|
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
'timestamp': 1388851200,
|
'timestamp': 1388851200,
|
||||||
'upload_date': '20140104',
|
'upload_date': '20140104',
|
||||||
'uploader': 'アニメロチャンネル',
|
'uploader': 'アニメロチャンネル',
|
||||||
'uploader_id': '312',
|
'uploader_id': '312',
|
||||||
},
|
},
|
||||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||||
|
}, {
|
||||||
|
# video not available via `getflv`; "old" HTML5 video
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/sm1151009',
|
||||||
|
'md5': '8fa81c364eb619d4085354eab075598a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sm1151009',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
|
||||||
|
'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
|
'duration': 184,
|
||||||
|
'timestamp': 1190868283,
|
||||||
|
'upload_date': '20070927',
|
||||||
|
'uploader': 'denden2',
|
||||||
|
'uploader_id': '1392194',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'Requires an account',
|
||||||
|
}, {
|
||||||
|
# "New" HTML5 video
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/sm31464864',
|
||||||
|
'md5': '351647b4917660986dc0fa8864085135',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sm31464864',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
|
||||||
|
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
|
||||||
|
'timestamp': 1498514060,
|
||||||
|
'upload_date': '20170626',
|
||||||
|
'uploader': 'ゲス',
|
||||||
|
'uploader_id': '40826363',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
|
'duration': 198,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -119,6 +169,84 @@ class NiconicoIE(InfoExtractor):
|
|||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return login_ok
|
return login_ok
|
||||||
|
|
||||||
|
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||||
|
def yesno(boolean):
|
||||||
|
return 'yes' if boolean else 'no'
|
||||||
|
|
||||||
|
session_api_data = api_data['video']['dmcInfo']['session_api']
|
||||||
|
session_api_endpoint = session_api_data['urls'][0]
|
||||||
|
|
||||||
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
|
|
||||||
|
session_response = self._download_json(
|
||||||
|
session_api_endpoint['url'], video_id,
|
||||||
|
query={'_format': 'json'},
|
||||||
|
headers={'Content-Type': 'application/json'},
|
||||||
|
note='Downloading JSON metadata for %s' % format_id,
|
||||||
|
data=json.dumps({
|
||||||
|
'session': {
|
||||||
|
'client_info': {
|
||||||
|
'player_id': session_api_data['player_id'],
|
||||||
|
},
|
||||||
|
'content_auth': {
|
||||||
|
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
||||||
|
'content_key_timeout': session_api_data['content_key_timeout'],
|
||||||
|
'service_id': 'nicovideo',
|
||||||
|
'service_user_id': session_api_data['service_user_id']
|
||||||
|
},
|
||||||
|
'content_id': session_api_data['content_id'],
|
||||||
|
'content_src_id_sets': [{
|
||||||
|
'content_src_ids': [{
|
||||||
|
'src_id_to_mux': {
|
||||||
|
'audio_src_ids': [audio_quality['id']],
|
||||||
|
'video_src_ids': [video_quality['id']],
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}],
|
||||||
|
'content_type': 'movie',
|
||||||
|
'content_uri': '',
|
||||||
|
'keep_method': {
|
||||||
|
'heartbeat': {
|
||||||
|
'lifetime': session_api_data['heartbeat_lifetime']
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'priority': session_api_data['priority'],
|
||||||
|
'protocol': {
|
||||||
|
'name': 'http',
|
||||||
|
'parameters': {
|
||||||
|
'http_parameters': {
|
||||||
|
'parameters': {
|
||||||
|
'http_output_download_parameters': {
|
||||||
|
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'recipe_id': session_api_data['recipe_id'],
|
||||||
|
'session_operation_auth': {
|
||||||
|
'session_operation_auth_by_signature': {
|
||||||
|
'signature': session_api_data['signature'],
|
||||||
|
'token': session_api_data['token'],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'timing_constraint': 'unlimited'
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
resolution = video_quality.get('resolution', {})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': session_response['data']['session']['content_uri'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
||||||
|
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
||||||
|
'height': resolution.get('height'),
|
||||||
|
'width': resolution.get('width'),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
@ -130,10 +258,16 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if video_id.startswith('so'):
|
if video_id.startswith('so'):
|
||||||
video_id = self._match_id(handle.geturl())
|
video_id = self._match_id(handle.geturl())
|
||||||
|
|
||||||
video_info = self._download_xml(
|
api_data = self._parse_json(self._html_search_regex(
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
note='Downloading video info page')
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
|
def _format_id_from_url(video_url):
|
||||||
|
return 'economy' if video_real_url.endswith('low') else 'normal'
|
||||||
|
|
||||||
|
try:
|
||||||
|
video_real_url = api_data['video']['smileInfo']['url']
|
||||||
|
except KeyError: # Flash videos
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
@ -153,10 +287,55 @@ class NiconicoIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to find video URL')
|
raise ExtractorError('Unable to find video URL')
|
||||||
|
|
||||||
|
video_info_xml = self._download_xml(
|
||||||
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||||
|
video_id, note='Downloading video info page')
|
||||||
|
|
||||||
|
def get_video_info(items):
|
||||||
|
if not isinstance(items, list):
|
||||||
|
items = [items]
|
||||||
|
for item in items:
|
||||||
|
ret = xpath_text(video_info_xml, './/' + item)
|
||||||
|
if ret:
|
||||||
|
return ret
|
||||||
|
|
||||||
video_real_url = flv_info['url'][0]
|
video_real_url = flv_info['url'][0]
|
||||||
|
|
||||||
|
extension = get_video_info('movie_type')
|
||||||
|
if not extension:
|
||||||
|
extension = determine_ext(video_real_url)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video_real_url,
|
||||||
|
'ext': extension,
|
||||||
|
'format_id': _format_id_from_url(video_real_url),
|
||||||
|
}]
|
||||||
|
else:
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
dmc_info = api_data['video'].get('dmcInfo')
|
||||||
|
if dmc_info: # "New" HTML5 videos
|
||||||
|
quality_info = dmc_info['quality']
|
||||||
|
for audio_quality in quality_info['audios']:
|
||||||
|
for video_quality in quality_info['videos']:
|
||||||
|
if not audio_quality['available'] or not video_quality['available']:
|
||||||
|
continue
|
||||||
|
formats.append(self._extract_format_for_quality(
|
||||||
|
api_data, video_id, audio_quality, video_quality))
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
else: # "Old" HTML5 videos
|
||||||
|
formats = [{
|
||||||
|
'url': video_real_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': _format_id_from_url(video_real_url),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def get_video_info(items):
|
||||||
|
return dict_get(api_data['video'], items)
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = xpath_text(video_info, './/title')
|
title = get_video_info('title')
|
||||||
if not title:
|
if not title:
|
||||||
title = self._og_search_title(webpage, default=None)
|
title = self._og_search_title(webpage, default=None)
|
||||||
if not title:
|
if not title:
|
||||||
@ -170,18 +349,15 @@ class NiconicoIE(InfoExtractor):
|
|||||||
watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
|
watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
|
||||||
video_detail = watch_api_data.get('videoDetail', {})
|
video_detail = watch_api_data.get('videoDetail', {})
|
||||||
|
|
||||||
extension = xpath_text(video_info, './/movie_type')
|
|
||||||
if not extension:
|
|
||||||
extension = determine_ext(video_real_url)
|
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
xpath_text(video_info, './/thumbnail_url') or
|
get_video_info(['thumbnail_url', 'thumbnailURL']) or
|
||||||
self._html_search_meta('image', webpage, 'thumbnail', default=None) or
|
self._html_search_meta('image', webpage, 'thumbnail', default=None) or
|
||||||
video_detail.get('thumbnail'))
|
video_detail.get('thumbnail'))
|
||||||
|
|
||||||
description = xpath_text(video_info, './/description')
|
description = get_video_info('description')
|
||||||
|
|
||||||
timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
|
timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
|
||||||
|
unified_timestamp(get_video_info('postedDateTime')))
|
||||||
if not timestamp:
|
if not timestamp:
|
||||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||||
if match:
|
if match:
|
||||||
@ -191,7 +367,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
|
|
||||||
view_count = int_or_none(xpath_text(video_info, './/view_counter'))
|
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||||
@ -200,38 +376,33 @@ class NiconicoIE(InfoExtractor):
|
|||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = view_count or video_detail.get('viewCount')
|
view_count = view_count or video_detail.get('viewCount')
|
||||||
|
|
||||||
comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
|
comment_count = (int_or_none(get_video_info('comment_num')) or
|
||||||
|
video_detail.get('commentCount') or
|
||||||
|
try_get(api_data, lambda x: x['thread']['commentCount']))
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
webpage, 'comment count', default=None)
|
webpage, 'comment count', default=None)
|
||||||
if match:
|
if match:
|
||||||
comment_count = int_or_none(match.replace(',', ''))
|
comment_count = int_or_none(match.replace(',', ''))
|
||||||
comment_count = comment_count or video_detail.get('commentCount')
|
|
||||||
|
|
||||||
duration = (parse_duration(
|
duration = (parse_duration(
|
||||||
xpath_text(video_info, './/length') or
|
get_video_info('length') or
|
||||||
self._html_search_meta(
|
self._html_search_meta(
|
||||||
'video:duration', webpage, 'video duration', default=None)) or
|
'video:duration', webpage, 'video duration', default=None)) or
|
||||||
video_detail.get('length'))
|
video_detail.get('length') or
|
||||||
|
get_video_info('duration'))
|
||||||
|
|
||||||
webpage_url = xpath_text(video_info, './/watch_url') or url
|
webpage_url = get_video_info('watch_url') or url
|
||||||
|
|
||||||
if video_info.find('.//ch_id') is not None:
|
owner = api_data.get('owner', {})
|
||||||
uploader_id = video_info.find('.//ch_id').text
|
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
||||||
uploader = video_info.find('.//ch_name').text
|
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
||||||
elif video_info.find('.//user_id') is not None:
|
|
||||||
uploader_id = video_info.find('.//user_id').text
|
|
||||||
uploader = video_info.find('.//user_nickname').text
|
|
||||||
else:
|
|
||||||
uploader_id = uploader = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_real_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': extension,
|
'formats': formats,
|
||||||
'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
|
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
|
|||||||
'timestamp': 1491399228,
|
'timestamp': 1491399228,
|
||||||
'upload_date': '20170405',
|
'upload_date': '20170405',
|
||||||
'uploader_id': '618566855001',
|
'uploader_id': '618566855001',
|
||||||
'creator': 'vtele',
|
|
||||||
'view_count': int,
|
|
||||||
'series': 'RPM+',
|
'series': 'RPM+',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5395865725001',
|
'id': '5395865725001',
|
||||||
'title': 'Épisode 13 : Les retrouvailles',
|
'title': 'Épisode 13 : Les retrouvailles',
|
||||||
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
|
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'timestamp': 1492019320,
|
'timestamp': 1492019320,
|
||||||
'upload_date': '20170412',
|
'upload_date': '20170412',
|
||||||
'uploader_id': '618566855001',
|
'uploader_id': '618566855001',
|
||||||
'creator': 'vtele',
|
|
||||||
'view_count': int,
|
|
||||||
'series': "L'amour est dans le pré",
|
'series': "L'amour est dans le pré",
|
||||||
'season_number': 5,
|
'season_number': 5,
|
||||||
'episode': 'Épisode 13',
|
'episode': 'Épisode 13',
|
||||||
@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
data = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
|
|
||||||
video_id)['data']
|
|
||||||
|
|
||||||
content = try_get(data, lambda x: x['contents'][0])
|
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||||
|
|
||||||
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
|
data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
title = try_get(
|
||||||
|
data, lambda x: x['video']['nom'],
|
||||||
|
compat_str) or self._html_search_meta(
|
||||||
|
'dcterms.Title', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
description = self._html_search_meta(
|
||||||
|
('dcterms.Description', 'description'), webpage, 'description')
|
||||||
|
|
||||||
series = try_get(
|
series = try_get(
|
||||||
data, (
|
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||||
lambda x: x['show']['title'],
|
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||||
lambda x: x['season']['show']['title']),
|
webpage, 'series', default=None)
|
||||||
compat_str)
|
|
||||||
|
|
||||||
episode = None
|
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||||
og = data.get('og')
|
season = try_get(season_el, lambda x: x['nom'], compat_str)
|
||||||
if isinstance(og, dict) and og.get('type') == 'video.episode':
|
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||||
episode = og.get('title')
|
|
||||||
|
|
||||||
video = content or data
|
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||||
|
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
|
||||||
|
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': BrightcoveNewIE.ie_key(),
|
'ie_key': BrightcoveNewIE.ie_key(),
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
'title': title,
|
||||||
{'geo_countries': ['CA']}),
|
'description': description,
|
||||||
'id': brightcove_id,
|
|
||||||
'title': video.get('title'),
|
|
||||||
'creator': video.get('source'),
|
|
||||||
'view_count': int_or_none(video.get('viewsCount')),
|
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': int_or_none(try_get(
|
'season': season,
|
||||||
data, lambda x: x['season']['seasonNumber'])),
|
'season_number': season_number,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': int_or_none(data.get('episodeNumber')),
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
(?:/\d{2}-\d{2}-\d{4})?
|
(?:/\d{2}-\d{2}-\d{4})?
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
(?:\#del=(?P<part_id>\d+))?
|
||||||
''' % _EPISODE_RE
|
''' % _EPISODE_RE
|
||||||
_API_HOST = 'psapi-we.nrk.no'
|
_API_HOST = 'psapi-ne.nrk.no'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
|
@ -1,14 +1,244 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_chr
|
from ..compat import (
|
||||||
from ..utils import (
|
compat_urlparse,
|
||||||
determine_ext,
|
compat_kwargs,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
from ..utils import (
|
||||||
|
check_executable,
|
||||||
|
determine_ext,
|
||||||
|
encodeArgument,
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_id,
|
||||||
|
get_exe_version,
|
||||||
|
is_outdated_version,
|
||||||
|
std_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_to_dict(cookie):
|
||||||
|
cookie_dict = {
|
||||||
|
'name': cookie.name,
|
||||||
|
'value': cookie.value,
|
||||||
|
}
|
||||||
|
if cookie.port_specified:
|
||||||
|
cookie_dict['port'] = cookie.port
|
||||||
|
if cookie.domain_specified:
|
||||||
|
cookie_dict['domain'] = cookie.domain
|
||||||
|
if cookie.path_specified:
|
||||||
|
cookie_dict['path'] = cookie.path
|
||||||
|
if cookie.expires is not None:
|
||||||
|
cookie_dict['expires'] = cookie.expires
|
||||||
|
if cookie.secure is not None:
|
||||||
|
cookie_dict['secure'] = cookie.secure
|
||||||
|
if cookie.discard is not None:
|
||||||
|
cookie_dict['discard'] = cookie.discard
|
||||||
|
try:
|
||||||
|
if (cookie.has_nonstandard_attr('httpOnly') or
|
||||||
|
cookie.has_nonstandard_attr('httponly') or
|
||||||
|
cookie.has_nonstandard_attr('HttpOnly')):
|
||||||
|
cookie_dict['httponly'] = True
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
return cookie_dict
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_jar_to_list(cookie_jar):
|
||||||
|
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
||||||
|
|
||||||
|
|
||||||
|
class PhantomJSwrapper(object):
|
||||||
|
"""PhantomJS wrapper class
|
||||||
|
|
||||||
|
This class is experimental.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_TEMPLATE = r'''
|
||||||
|
phantom.onError = function(msg, trace) {{
|
||||||
|
var msgStack = ['PHANTOM ERROR: ' + msg];
|
||||||
|
if(trace && trace.length) {{
|
||||||
|
msgStack.push('TRACE:');
|
||||||
|
trace.forEach(function(t) {{
|
||||||
|
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
|
||||||
|
+ (t.function ? ' (in function ' + t.function +')' : ''));
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
console.error(msgStack.join('\n'));
|
||||||
|
phantom.exit(1);
|
||||||
|
}};
|
||||||
|
var page = require('webpage').create();
|
||||||
|
var fs = require('fs');
|
||||||
|
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||||
|
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||||
|
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
|
||||||
|
phantom.addCookie(x);
|
||||||
|
}});
|
||||||
|
page.settings.resourceTimeout = {timeout};
|
||||||
|
page.settings.userAgent = "{ua}";
|
||||||
|
page.onLoadStarted = function() {{
|
||||||
|
page.evaluate(function() {{
|
||||||
|
delete window._phantom;
|
||||||
|
delete window.callPhantom;
|
||||||
|
}});
|
||||||
|
}};
|
||||||
|
var saveAndExit = function() {{
|
||||||
|
fs.write("{html}", page.content, write);
|
||||||
|
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
|
||||||
|
phantom.exit();
|
||||||
|
}};
|
||||||
|
page.onLoadFinished = function(status) {{
|
||||||
|
if(page.url === "") {{
|
||||||
|
page.setContent(fs.read("{html}", read), "{url}");
|
||||||
|
}}
|
||||||
|
else {{
|
||||||
|
{jscode}
|
||||||
|
}}
|
||||||
|
}};
|
||||||
|
page.open("");
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _version():
|
||||||
|
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||||
|
|
||||||
|
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||||
|
self.exe = check_executable('phantomjs', ['-v'])
|
||||||
|
if not self.exe:
|
||||||
|
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||||
|
'download it from http://phantomjs.org',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
self.extractor = extractor
|
||||||
|
|
||||||
|
if required_version:
|
||||||
|
version = self._version()
|
||||||
|
if is_outdated_version(version, required_version):
|
||||||
|
self.extractor._downloader.report_warning(
|
||||||
|
'Your copy of PhantomJS is outdated, update it to version '
|
||||||
|
'%s or newer if you encounter any errors.' % required_version)
|
||||||
|
|
||||||
|
self.options = {
|
||||||
|
'timeout': timeout,
|
||||||
|
}
|
||||||
|
self._TMP_FILES = {}
|
||||||
|
for name in self._TMP_FILE_NAMES:
|
||||||
|
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
tmp.close()
|
||||||
|
self._TMP_FILES[name] = tmp
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
for name in self._TMP_FILE_NAMES:
|
||||||
|
try:
|
||||||
|
os.remove(self._TMP_FILES[name].name)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _save_cookies(self, url):
|
||||||
|
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
|
||||||
|
for cookie in cookies:
|
||||||
|
if 'path' not in cookie:
|
||||||
|
cookie['path'] = '/'
|
||||||
|
if 'domain' not in cookie:
|
||||||
|
cookie['domain'] = compat_urlparse.urlparse(url).netloc
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
|
||||||
|
f.write(json.dumps(cookies).encode('utf-8'))
|
||||||
|
|
||||||
|
def _load_cookies(self):
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
|
||||||
|
cookies = json.loads(f.read().decode('utf-8'))
|
||||||
|
for cookie in cookies:
|
||||||
|
if cookie['httponly'] is True:
|
||||||
|
cookie['rest'] = {'httpOnly': None}
|
||||||
|
if 'expiry' in cookie:
|
||||||
|
cookie['expire_time'] = cookie['expiry']
|
||||||
|
self.extractor._set_cookie(**compat_kwargs(cookie))
|
||||||
|
|
||||||
|
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
||||||
|
"""
|
||||||
|
Downloads webpage (if needed) and executes JS
|
||||||
|
|
||||||
|
Params:
|
||||||
|
url: website url
|
||||||
|
html: optional, html code of website
|
||||||
|
video_id: video id
|
||||||
|
note: optional, displayed when downloading webpage
|
||||||
|
note2: optional, displayed when executing JS
|
||||||
|
headers: custom http headers
|
||||||
|
jscode: code to be executed when page is loaded
|
||||||
|
|
||||||
|
Returns tuple with:
|
||||||
|
* downloaded website (after JS execution)
|
||||||
|
* anything you print with `console.log` (but not inside `page.execute`!)
|
||||||
|
|
||||||
|
In most cases you don't need to add any `jscode`.
|
||||||
|
It is executed in `page.onLoadFinished`.
|
||||||
|
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
|
||||||
|
It is possible to wait for some element on the webpage, for example:
|
||||||
|
var check = function() {
|
||||||
|
var elementFound = page.evaluate(function() {
|
||||||
|
return document.querySelector('#b.done') !== null;
|
||||||
|
});
|
||||||
|
if(elementFound)
|
||||||
|
saveAndExit();
|
||||||
|
else
|
||||||
|
window.setTimeout(check, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
page.evaluate(function(){
|
||||||
|
document.querySelector('#a').click();
|
||||||
|
});
|
||||||
|
check();
|
||||||
|
"""
|
||||||
|
if 'saveAndExit();' not in jscode:
|
||||||
|
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||||
|
if not html:
|
||||||
|
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
|
||||||
|
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||||
|
f.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
self._save_cookies(url)
|
||||||
|
|
||||||
|
replaces = self.options
|
||||||
|
replaces['url'] = url
|
||||||
|
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||||
|
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||||
|
replaces['jscode'] = jscode
|
||||||
|
|
||||||
|
for x in self._TMP_FILE_NAMES:
|
||||||
|
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||||
|
|
||||||
|
with open(self._TMP_FILES['script'].name, 'wb') as f:
|
||||||
|
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
|
||||||
|
|
||||||
|
if video_id is None:
|
||||||
|
self.extractor.to_screen('%s' % (note2,))
|
||||||
|
else:
|
||||||
|
self.extractor.to_screen('%s: %s' % (video_id, note2))
|
||||||
|
|
||||||
|
p = subprocess.Popen([
|
||||||
|
self.exe, '--ssl-protocol=any',
|
||||||
|
self._TMP_FILES['script'].name
|
||||||
|
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
out, err = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Executing JS failed\n:' + encodeArgument(err))
|
||||||
|
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||||
|
html = f.read().decode('utf-8')
|
||||||
|
|
||||||
|
self._load_cookies()
|
||||||
|
|
||||||
|
return (html, encodeArgument(out))
|
||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
@ -58,6 +288,8 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
@ -66,47 +298,22 @@ class OpenloadIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
url = 'https://openload.co/embed/%s/' % video_id
|
||||||
|
headers = {
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
|
}
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||||
|
|
||||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||||
|
|
||||||
ol_id = self._search_regex(
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||||
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
|
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||||
webpage, 'openload ID')
|
|
||||||
|
|
||||||
decoded = ''
|
decoded_id = get_element_by_id('streamurl', webpage)
|
||||||
a = ol_id[0:24]
|
|
||||||
b = []
|
|
||||||
for i in range(0, len(a), 8):
|
|
||||||
b.append(int(a[i:i + 8] or '0', 16))
|
|
||||||
ol_id = ol_id[24:]
|
|
||||||
j = 0
|
|
||||||
k = 0
|
|
||||||
while j < len(ol_id):
|
|
||||||
c = 128
|
|
||||||
d = 0
|
|
||||||
e = 0
|
|
||||||
f = 0
|
|
||||||
_more = True
|
|
||||||
while _more:
|
|
||||||
if j + 1 >= len(ol_id):
|
|
||||||
c = 143
|
|
||||||
f = int(ol_id[j:j + 2] or '0', 16)
|
|
||||||
j += 2
|
|
||||||
d += (f & 127) << e
|
|
||||||
e += 7
|
|
||||||
_more = f >= c
|
|
||||||
g = d ^ b[k % 3]
|
|
||||||
for i in range(4):
|
|
||||||
char_dec = (g >> 8 * i) & (c + 127)
|
|
||||||
char = compat_chr(char_dec)
|
|
||||||
if char != '#':
|
|
||||||
decoded += char
|
|
||||||
k += 1
|
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/%s?mime=true'
|
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||||
video_url = video_url % decoded
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
@ -114,15 +321,17 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'description', webpage, 'title', fatal=True)
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
subtitles = entries[0]['subtitles'] if entries else None
|
entry = entries[0] if entries else {}
|
||||||
|
subtitles = entry.get('subtitles')
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
# Seems all videos have extensions in their titles
|
# Seems all videos have extensions in their titles
|
||||||
'ext': determine_ext(title, 'mp4'),
|
'ext': determine_ext(title, 'mp4'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
return info_dict
|
return info_dict
|
||||||
|
@ -6,14 +6,15 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
|
||||||
unified_strdate,
|
|
||||||
strip_jsonp,
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
|
orderedSet,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFFM4StoryIE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:fm4:story'
|
||||||
|
IE_DESC = 'fm4.orf.at stories'
|
||||||
|
_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://fm4.orf.at/stories/2865738/',
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '547792',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Manu Delago und Inner Tongue live',
|
||||||
|
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||||
|
'duration': 1748.52,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20170913',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'md5': 'c6dd2179731f86f4f55a7b49899d515f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '547798',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Manu Delago und Inner Tongue live (2)',
|
||||||
|
'duration': 1504.08,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20170913',
|
||||||
|
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
story_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, story_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
||||||
|
for idx, video_id in enumerate(all_ids):
|
||||||
|
data = self._download_json(
|
||||||
|
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||||
|
video_id)[0]
|
||||||
|
|
||||||
|
duration = float_or_none(data['duration'], 1000)
|
||||||
|
|
||||||
|
video = data['sources']['q8c']
|
||||||
|
load_balancer_url = video['loadBalancerUrl']
|
||||||
|
abr = int_or_none(video.get('audioBitrate'))
|
||||||
|
vbr = int_or_none(video.get('bitrate'))
|
||||||
|
fps = int_or_none(video.get('videoFps'))
|
||||||
|
width = int_or_none(video.get('videoWidth'))
|
||||||
|
height = int_or_none(video.get('videoHeight'))
|
||||||
|
thumbnail = video.get('preview')
|
||||||
|
|
||||||
|
rendition = self._download_json(
|
||||||
|
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
|
f = {
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'fps': fps,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in rendition['redirect'].items():
|
||||||
|
if format_id == 'rtmp':
|
||||||
|
ff = f.copy()
|
||||||
|
ff.update({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
formats.append(ff)
|
||||||
|
elif determine_ext(format_url) == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_id))
|
||||||
|
elif determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
|
||||||
|
if idx >= 1:
|
||||||
|
# Titles are duplicates, make them unique
|
||||||
|
title += ' (' + str(idx + 1) + ')'
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'dc.date', webpage, 'upload date'))
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
|
|||||||
# Direct video URL
|
# Direct video URL
|
||||||
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||||
# Article with embedded player (or direct video)
|
# Article with embedded player (or direct video)
|
||||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||||
)
|
)
|
||||||
@ -345,6 +345,21 @@ class PBSIE(InfoExtractor):
|
|||||||
'formats': 'mincount:8',
|
'formats': 'mincount:8',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/13801
|
||||||
|
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3003333873',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'PBS NewsHour - full episode July 31, 2017',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'duration': 3265,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -433,6 +448,9 @@ class PBSIE(InfoExtractor):
|
|||||||
if url:
|
if url:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
url = self._og_search_url(webpage)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
player_id = mobj.group('player_id')
|
player_id = mobj.group('player_id')
|
||||||
|
@ -80,18 +80,24 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
stream = self._call_api(
|
stream = self._call_api(
|
||||||
'getAccessPublic', {'broadcast_id': token}, token)
|
'getAccessPublic', {'broadcast_id': token}, token)
|
||||||
|
|
||||||
|
video_urls = set()
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
||||||
video_url = stream.get(format_id + '_url')
|
video_url = stream.get(format_id + '_url')
|
||||||
if not video_url:
|
if not video_url or video_url in video_urls:
|
||||||
continue
|
continue
|
||||||
f = {
|
video_urls.add(video_url)
|
||||||
|
if format_id != 'rtmp':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, token, 'mp4',
|
||||||
|
entry_protocol='m3u8_native'
|
||||||
|
if state in ('ended', 'timed_out') else 'm3u8',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||||
}
|
})
|
||||||
if format_id != 'rtmp':
|
|
||||||
f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8'
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@ -26,6 +27,39 @@ from ..utils import (
|
|||||||
class PluralsightBaseIE(InfoExtractor):
|
class PluralsightBaseIE(InfoExtractor):
|
||||||
_API_BASE = 'https://app.pluralsight.com'
|
_API_BASE = 'https://app.pluralsight.com'
|
||||||
|
|
||||||
|
def _download_course(self, course_id, url, display_id):
|
||||||
|
try:
|
||||||
|
return self._download_course_rpc(course_id, url, display_id)
|
||||||
|
except ExtractorError:
|
||||||
|
# Old API fallback
|
||||||
|
return self._download_json(
|
||||||
|
'https://app.pluralsight.com/player/user/api/v1/player/payload',
|
||||||
|
display_id, data=urlencode_postdata({'courseId': course_id}),
|
||||||
|
headers={'Referer': url})
|
||||||
|
|
||||||
|
def _download_course_rpc(self, course_id, url, display_id):
|
||||||
|
response = self._download_json(
|
||||||
|
'%s/player/functions/rpc' % self._API_BASE, display_id,
|
||||||
|
'Downloading course JSON',
|
||||||
|
data=json.dumps({
|
||||||
|
'fn': 'bootstrapPlayer',
|
||||||
|
'payload': {
|
||||||
|
'courseId': course_id,
|
||||||
|
},
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json;charset=utf-8',
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
|
||||||
|
course = try_get(response, lambda x: x['payload']['course'], dict)
|
||||||
|
if course:
|
||||||
|
return course
|
||||||
|
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, response['error']['message']),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
|
||||||
class PluralsightIE(PluralsightBaseIE):
|
class PluralsightIE(PluralsightBaseIE):
|
||||||
IE_NAME = 'pluralsight'
|
IE_NAME = 'pluralsight'
|
||||||
@ -162,10 +196,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
|
|
||||||
display_id = '%s-%s' % (name, clip_id)
|
display_id = '%s-%s' % (name, clip_id)
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_course(course_name, url, display_id)
|
||||||
'https://app.pluralsight.com/player/user/api/v1/player/payload',
|
|
||||||
display_id, data=urlencode_postdata({'courseId': course_name}),
|
|
||||||
headers={'Referer': url})
|
|
||||||
|
|
||||||
collection = course['modules']
|
collection = course['modules']
|
||||||
|
|
||||||
@ -224,6 +255,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
req_format_split = req_format.split('-', 1)
|
req_format_split = req_format.split('-', 1)
|
||||||
if len(req_format_split) > 1:
|
if len(req_format_split) > 1:
|
||||||
req_ext, req_quality = req_format_split
|
req_ext, req_quality = req_format_split
|
||||||
|
req_quality = '-'.join(req_quality.split('-')[:2])
|
||||||
for allowed_quality in ALLOWED_QUALITIES:
|
for allowed_quality in ALLOWED_QUALITIES:
|
||||||
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
|
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
|
||||||
return (AllowedQuality(req_ext, (req_quality, )), )
|
return (AllowedQuality(req_ext, (req_quality, )), )
|
||||||
@ -330,18 +362,7 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
|||||||
|
|
||||||
# TODO: PSM cookie
|
# TODO: PSM cookie
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_course(course_id, url, course_id)
|
||||||
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
|
||||||
'Downloading course JSON',
|
|
||||||
data=json.dumps({
|
|
||||||
'fn': 'bootstrapPlayer',
|
|
||||||
'payload': {
|
|
||||||
'courseId': course_id,
|
|
||||||
}
|
|
||||||
}).encode('utf-8'),
|
|
||||||
headers={
|
|
||||||
'Content-Type': 'application/json;charset=utf-8'
|
|
||||||
})['payload']['course']
|
|
||||||
|
|
||||||
title = course['title']
|
title = course['title']
|
||||||
course_name = course['name']
|
course_name = course['name']
|
||||||
|
@ -9,10 +9,16 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
class PodomaticIE(InfoExtractor):
|
class PodomaticIE(InfoExtractor):
|
||||||
IE_NAME = 'podomatic'
|
IE_NAME = 'podomatic'
|
||||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?P<proto>https?)://
|
||||||
|
(?:
|
||||||
|
(?P<channel>[^.]+)\.podomatic\.com/entry|
|
||||||
|
(?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes
|
||||||
|
)/
|
||||||
|
(?P<id>[^/?#&]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
|
||||||
'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
|
'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
|
||||||
'md5': '84bb855fcf3429e6bf72460e1eed782d',
|
'md5': '84bb855fcf3429e6bf72460e1eed782d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -23,8 +29,7 @@ class PodomaticIE(InfoExtractor):
|
|||||||
'title': '64. When the Moon Hits Your Eye',
|
'title': '64. When the Moon Hits Your Eye',
|
||||||
'duration': 446,
|
'duration': 446,
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
|
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
|
||||||
'md5': 'd2cf443931b6148e27638650e2638297',
|
'md5': 'd2cf443931b6148e27638650e2638297',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -35,13 +40,15 @@ class PodomaticIE(InfoExtractor):
|
|||||||
'title': 'Einunddreizig',
|
'title': 'Einunddreizig',
|
||||||
'duration': 3799,
|
'duration': 3799,
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
]
|
'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
channel = mobj.group('channel')
|
channel = mobj.group('channel') or mobj.group('channel_2')
|
||||||
|
|
||||||
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
||||||
'?permalink=true&rtmp=0') %
|
'?permalink=true&rtmp=0') %
|
||||||
|
78
youtube_dl/extractor/popcorntv.py
Normal file
78
youtube_dl/extractor/popcorntv.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PopcornTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
|
||||||
|
'md5': '47d65a48d147caf692ab8562fe630b45',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9183',
|
||||||
|
'display_id': 'food-wars-battaglie-culinarie-episodio-01',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
|
||||||
|
'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1497610857,
|
||||||
|
'upload_date': '20170616',
|
||||||
|
'duration': 1440,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id, video_id = mobj.group('display_id', 'id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
m3u8_url = extract_attributes(
|
||||||
|
self._search_regex(
|
||||||
|
r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
|
||||||
|
webpage, 'content'
|
||||||
|
))['href']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
|
||||||
|
'title', default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
timestamp = unified_timestamp(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'timestamp'))
|
||||||
|
print(self._html_search_meta(
|
||||||
|
'duration', webpage))
|
||||||
|
duration = int_or_none(self._html_search_meta(
|
||||||
|
'duration', webpage), invscale=60)
|
||||||
|
view_count = int_or_none(self._html_search_meta(
|
||||||
|
'interactionCount', webpage, 'view count'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||||
|
|
||||||
sources = self._parse_json(js_to_json(self._search_regex(
|
sources = self._parse_json(js_to_json(self._search_regex(
|
||||||
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
|
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||||
webpage, 'sources', default='{}')), video_id)
|
webpage, 'sources', default='{}')), video_id)
|
||||||
|
|
||||||
if not sources:
|
if not sources:
|
||||||
@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
|
|||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
title, thumbnail, duration = [None] * 3
|
title, thumbnail, duration = [None] * 3
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
view_count = self._extract_count(
|
view_count = self._extract_count(
|
||||||
@ -227,20 +227,6 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||||
def _extract_entries(self, webpage):
|
def _extract_entries(self, webpage):
|
||||||
return [
|
|
||||||
self.url_result(
|
|
||||||
'http://www.pornhub.com/%s' % video_url,
|
|
||||||
PornHubIE.ie_key(), video_title=title)
|
|
||||||
for video_url, title in orderedSet(re.findall(
|
|
||||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
|
||||||
webpage))
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/11594).
|
# https://github.com/rg3/youtube-dl/issues/11594).
|
||||||
@ -248,7 +234,21 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
||||||
'container', default=webpage)
|
'container', default=webpage)
|
||||||
|
|
||||||
entries = self._extract_entries(container)
|
return [
|
||||||
|
self.url_result(
|
||||||
|
'http://www.pornhub.com/%s' % video_url,
|
||||||
|
PornHubIE.ie_key(), video_title=title)
|
||||||
|
for video_url, title in orderedSet(re.findall(
|
||||||
|
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||||
|
container))
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = self._extract_entries(webpage)
|
||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -2,38 +2,37 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import time
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
sanitized_Request,
|
|
||||||
strip_jsonp,
|
|
||||||
unescapeHTML,
|
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
strip_jsonp,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class QQMusicIE(InfoExtractor):
|
class QQMusicIE(InfoExtractor):
|
||||||
IE_NAME = 'qqmusic'
|
IE_NAME = 'qqmusic'
|
||||||
IE_DESC = 'QQ音乐'
|
IE_DESC = 'QQ音乐'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
_VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
|
||||||
'md5': '9ce1c1c8445f561506d2e3cfb0255705',
|
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '004295Et37taLD',
|
'id': '004295Et37taLD',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '可惜没如果',
|
'title': '可惜没如果',
|
||||||
'release_date': '20141227',
|
'release_date': '20141227',
|
||||||
'creator': '林俊杰',
|
'creator': '林俊杰',
|
||||||
'description': 'md5:d327722d0361576fde558f1ac68a7065',
|
'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'There is no mp3-320 version of this song.',
|
'note': 'There is no mp3-320 version of this song.',
|
||||||
'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
|
'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
|
||||||
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
|
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '004MsGEo3DdNxV',
|
'id': '004MsGEo3DdNxV',
|
||||||
@ -46,14 +45,14 @@ class QQMusicIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'lyrics not in .lrc format',
|
'note': 'lyrics not in .lrc format',
|
||||||
'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
|
'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '001JyApY11tIp6',
|
'id': '001JyApY11tIp6',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Shadows Over Transylvania',
|
'title': 'Shadows Over Transylvania',
|
||||||
'release_date': '19970225',
|
'release_date': '19970225',
|
||||||
'creator': 'Dark Funeral',
|
'creator': 'Dark Funeral',
|
||||||
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
|
'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -105,7 +104,7 @@ class QQMusicIE(InfoExtractor):
|
|||||||
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
|
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
|
||||||
detail_info_page, 'album mid', default=None)
|
detail_info_page, 'album mid', default=None)
|
||||||
if albummid:
|
if albummid:
|
||||||
thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
|
thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
|
||||||
% (albummid[-2:-1], albummid[-1], albummid)
|
% (albummid[-2:-1], albummid[-1], albummid)
|
||||||
|
|
||||||
guid = self.m_r_get_ruin()
|
guid = self.m_r_get_ruin()
|
||||||
@ -156,15 +155,39 @@ class QQPlaylistBaseIE(InfoExtractor):
|
|||||||
def qq_static_url(category, mid):
|
def qq_static_url(category, mid):
|
||||||
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
|
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
|
||||||
|
|
||||||
@classmethod
|
def get_singer_all_songs(self, singmid, num):
|
||||||
def get_entries_from_page(cls, page):
|
return self._download_webpage(
|
||||||
|
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
|
||||||
|
query={
|
||||||
|
'format': 'json',
|
||||||
|
'inCharset': 'utf8',
|
||||||
|
'outCharset': 'utf-8',
|
||||||
|
'platform': 'yqq',
|
||||||
|
'needNewCode': 0,
|
||||||
|
'singermid': singmid,
|
||||||
|
'order': 'listen',
|
||||||
|
'begin': 0,
|
||||||
|
'num': num,
|
||||||
|
'songstatus': 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
def get_entries_from_page(self, singmid):
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
|
default_num = 1
|
||||||
song_mid = unescapeHTML(item).split('|')[-5]
|
json_text = self.get_singer_all_songs(singmid, default_num)
|
||||||
entries.append(cls.url_result(
|
json_obj_all_songs = self._parse_json(json_text, singmid)
|
||||||
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
|
|
||||||
song_mid))
|
if json_obj_all_songs['code'] == 0:
|
||||||
|
total = json_obj_all_songs['data']['total']
|
||||||
|
json_text = self.get_singer_all_songs(singmid, total)
|
||||||
|
json_obj_all_songs = self._parse_json(json_text, singmid)
|
||||||
|
|
||||||
|
for item in json_obj_all_songs['data']['list']:
|
||||||
|
if item['musicData'].get('songmid') is not None:
|
||||||
|
songmid = item['musicData']['songmid']
|
||||||
|
entries.append(self.url_result(
|
||||||
|
r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
@ -172,42 +195,32 @@ class QQPlaylistBaseIE(InfoExtractor):
|
|||||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:singer'
|
IE_NAME = 'qqmusic:singer'
|
||||||
IE_DESC = 'QQ音乐 - 歌手'
|
IE_DESC = 'QQ音乐 - 歌手'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
_VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '001BLpXF2DyJe2',
|
'id': '001BLpXF2DyJe2',
|
||||||
'title': '林俊杰',
|
'title': '林俊杰',
|
||||||
'description': 'md5:870ec08f7d8547c29c93010899103751',
|
'description': 'md5:870ec08f7d8547c29c93010899103751',
|
||||||
},
|
},
|
||||||
'playlist_count': 12,
|
'playlist_mincount': 12,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mid = self._match_id(url)
|
mid = self._match_id(url)
|
||||||
|
|
||||||
singer_page = self._download_webpage(
|
entries = self.get_entries_from_page(mid)
|
||||||
self.qq_static_url('singer', mid), mid, 'Download singer page')
|
singer_page = self._download_webpage(url, mid, 'Download singer page')
|
||||||
|
|
||||||
entries = self.get_entries_from_page(singer_page)
|
|
||||||
|
|
||||||
singer_name = self._html_search_regex(
|
singer_name = self._html_search_regex(
|
||||||
r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
|
r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
|
||||||
default=None)
|
|
||||||
|
|
||||||
singer_id = self._html_search_regex(
|
|
||||||
r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
singer_desc = None
|
singer_desc = None
|
||||||
|
|
||||||
if singer_id:
|
if mid:
|
||||||
req = sanitized_Request(
|
|
||||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
|
|
||||||
req.add_header(
|
|
||||||
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
|
|
||||||
singer_desc_page = self._download_xml(
|
singer_desc_page = self._download_xml(
|
||||||
req, mid, 'Donwload singer description XML')
|
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
|
||||||
|
'Donwload singer description XML',
|
||||||
|
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
|
||||||
|
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
|
||||||
|
|
||||||
singer_desc = singer_desc_page.find('./data/info/desc').text
|
singer_desc = singer_desc_page.find('./data/info/desc').text
|
||||||
|
|
||||||
@ -217,10 +230,10 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
|
|||||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:album'
|
IE_NAME = 'qqmusic:album'
|
||||||
IE_DESC = 'QQ音乐 - 专辑'
|
IE_DESC = 'QQ音乐 - 专辑'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
_VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
|
'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '000gXCTb2AhRR1',
|
'id': '000gXCTb2AhRR1',
|
||||||
'title': '我们都是这样长大的',
|
'title': '我们都是这样长大的',
|
||||||
@ -228,7 +241,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
|
'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '002Y5a3b3AlCu3',
|
'id': '002Y5a3b3AlCu3',
|
||||||
'title': '그리고...',
|
'title': '그리고...',
|
||||||
@ -246,7 +259,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
|||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
|
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
|
||||||
) for song in album['list']
|
) for song in album['list']
|
||||||
]
|
]
|
||||||
album_name = album.get('name')
|
album_name = album.get('name')
|
||||||
@ -260,31 +273,30 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
|||||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:toplist'
|
IE_NAME = 'qqmusic:toplist'
|
||||||
IE_DESC = 'QQ音乐 - 排行榜'
|
IE_DESC = 'QQ音乐 - 排行榜'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
|
_VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://y.qq.com/#type=toplist&p=global_123',
|
'url': 'https://y.qq.com/n/yqq/toplist/123.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'global_123',
|
'id': '123',
|
||||||
'title': '美国iTunes榜',
|
'title': '美国iTunes榜',
|
||||||
},
|
'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
|
||||||
'playlist_count': 10,
|
|
||||||
}, {
|
|
||||||
'url': 'http://y.qq.com/#type=toplist&p=top_3',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'top_3',
|
|
||||||
'title': '巅峰榜·欧美',
|
|
||||||
'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'
|
|
||||||
'计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'
|
|
||||||
'歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:'
|
|
||||||
'登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放'
|
|
||||||
},
|
},
|
||||||
'playlist_count': 100,
|
'playlist_count': 100,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://y.qq.com/#type=toplist&p=global_106',
|
'url': 'https://y.qq.com/n/yqq/toplist/3.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'global_106',
|
'id': '3',
|
||||||
|
'title': '巅峰榜·欧美',
|
||||||
|
'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
|
||||||
|
},
|
||||||
|
'playlist_count': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'https://y.qq.com/n/yqq/toplist/106.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '106',
|
||||||
'title': '韩国Mnet榜',
|
'title': '韩国Mnet榜',
|
||||||
|
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
}]
|
}]
|
||||||
@ -292,18 +304,15 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
list_type, num_id = list_id.split("_")
|
|
||||||
|
|
||||||
toplist_json = self._download_json(
|
toplist_json = self._download_json(
|
||||||
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
|
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
|
||||||
% (list_type, num_id),
|
note='Download toplist page',
|
||||||
list_id, 'Download toplist page')
|
query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
|
||||||
|
|
||||||
entries = [
|
entries = [self.url_result(
|
||||||
self.url_result(
|
'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
|
||||||
'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
|
song['data']['songmid'])
|
||||||
) for song in toplist_json['songlist']
|
for song in toplist_json['songlist']]
|
||||||
]
|
|
||||||
|
|
||||||
topinfo = toplist_json.get('topinfo', {})
|
topinfo = toplist_json.get('topinfo', {})
|
||||||
list_name = topinfo.get('ListName')
|
list_name = topinfo.get('ListName')
|
||||||
@ -314,10 +323,10 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
|||||||
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:playlist'
|
IE_NAME = 'qqmusic:playlist'
|
||||||
IE_DESC = 'QQ音乐 - 歌单'
|
IE_DESC = 'QQ音乐 - 歌单'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://y.qq.com/#type=taoge&id=3462654915',
|
'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3462654915',
|
'id': '3462654915',
|
||||||
'title': '韩国5月新歌精选下旬',
|
'title': '韩国5月新歌精选下旬',
|
||||||
@ -326,7 +335,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
|||||||
'playlist_count': 40,
|
'playlist_count': 40,
|
||||||
'skip': 'playlist gone',
|
'skip': 'playlist gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://y.qq.com/#type=taoge&id=1374105607',
|
'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1374105607',
|
'id': '1374105607',
|
||||||
'title': '易入人心的华语民谣',
|
'title': '易入人心的华语民谣',
|
||||||
@ -339,8 +348,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
|||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
list_json = self._download_json(
|
list_json = self._download_json(
|
||||||
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
|
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
|
||||||
% list_id, list_id, 'Download list page',
|
list_id, 'Download list page',
|
||||||
|
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
|
||||||
transform_source=strip_jsonp)
|
transform_source=strip_jsonp)
|
||||||
if not len(list_json.get('cdlist', [])):
|
if not len(list_json.get('cdlist', [])):
|
||||||
if list_json.get('code'):
|
if list_json.get('code'):
|
||||||
@ -350,11 +360,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
|||||||
raise ExtractorError('Unable to get playlist info')
|
raise ExtractorError('Unable to get playlist info')
|
||||||
|
|
||||||
cdlist = list_json['cdlist'][0]
|
cdlist = list_json['cdlist'][0]
|
||||||
entries = [
|
entries = [self.url_result(
|
||||||
self.url_result(
|
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
|
||||||
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
|
for song in cdlist['songlist']]
|
||||||
) for song in cdlist['songlist']
|
|
||||||
]
|
|
||||||
|
|
||||||
list_name = cdlist.get('dissname')
|
list_name = cdlist.get('dissname')
|
||||||
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
||||||
|
@ -20,7 +20,8 @@ from ..utils import (
|
|||||||
class RadioCanadaIE(InfoExtractor):
|
class RadioCanadaIE(InfoExtractor):
|
||||||
IE_NAME = 'radiocanada'
|
IE_NAME = 'radiocanada'
|
||||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7184272',
|
'id': '7184272',
|
||||||
@ -29,11 +30,27 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||||
'upload_date': '20141023',
|
'upload_date': '20141023',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# empty Title
|
||||||
|
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7754998',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'letelejournal22h',
|
||||||
|
'description': 'INTEGRALE WEB 22H-TJ',
|
||||||
|
'upload_date': '20170720',
|
||||||
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
@ -59,6 +76,7 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
device_types.append('android')
|
device_types.append('android')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
error = None
|
||||||
# TODO: extract f4m formats
|
# TODO: extract f4m formats
|
||||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||||
for device_type in device_types:
|
for device_type in device_types:
|
||||||
@ -84,8 +102,8 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
if not v_url:
|
if not v_url:
|
||||||
continue
|
continue
|
||||||
if v_url == 'null':
|
if v_url == 'null':
|
||||||
raise ExtractorError('%s said: %s' % (
|
error = xpath_text(v_data, 'message')
|
||||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
continue
|
||||||
ext = determine_ext(v_url)
|
ext = determine_ext(v_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@ -129,6 +147,9 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
base_url + '/manifest.f4m', video_id,
|
base_url + '/manifest.f4m', video_id,
|
||||||
f4m_id='hds', fatal=False))
|
f4m_id='hds', fatal=False))
|
||||||
|
if not formats and error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -141,7 +162,7 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': get_meta('Title'),
|
'title': get_meta('Title') or get_meta('AV-nomEmission'),
|
||||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||||
'duration': int_or_none(get_meta('length')),
|
'duration': int_or_none(get_meta('length')),
|
||||||
|
@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
|
|||||||
media_type = media['type']
|
media_type = media['type']
|
||||||
if 'Audio' in media_type:
|
if 'Audio' in media_type:
|
||||||
relinker_info = {
|
relinker_info = {
|
||||||
'formats': {
|
'formats': [{
|
||||||
'format_id': media.get('formatoAudio'),
|
'format_id': media.get('formatoAudio'),
|
||||||
'url': media['audioUrl'],
|
'url': media['audioUrl'],
|
||||||
'ext': media.get('formatoAudio'),
|
'ext': media.get('formatoAudio'),
|
||||||
}
|
}]
|
||||||
}
|
}
|
||||||
elif 'Video' in media_type:
|
elif 'Video' in media_type:
|
||||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||||
|
114
youtube_dl/extractor/reddit.py
Normal file
114
youtube_dl/extractor/reddit.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RedditIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
|
||||||
|
'url': 'https://v.redd.it/zv89llsvexdz',
|
||||||
|
'md5': '655d06ace653ea3b87bccfb1b27ec99d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zv89llsvexdz',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'zv89llsvexdz',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
|
||||||
|
'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
|
||||||
|
mpd_id='dash', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RedditRIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zv89llsvexdz',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'That small heart attack.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1501941939,
|
||||||
|
'upload_date': '20170805',
|
||||||
|
'uploader': 'Antw87',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# imgur
|
||||||
|
'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# streamable
|
||||||
|
'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# youtube
|
||||||
|
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
url + '.json', video_id)[0]['data']['children'][0]['data']
|
||||||
|
|
||||||
|
video_url = data['url']
|
||||||
|
|
||||||
|
# Avoid recursing into the same reddit URL
|
||||||
|
if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
|
||||||
|
raise ExtractorError('No media found', expected=True)
|
||||||
|
|
||||||
|
over_18 = data.get('over_18')
|
||||||
|
if over_18 is True:
|
||||||
|
age_limit = 18
|
||||||
|
elif over_18 is False:
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_url,
|
||||||
|
'title': data.get('title'),
|
||||||
|
'thumbnail': data.get('thumbnail'),
|
||||||
|
'timestamp': float_or_none(data.get('created_utc')),
|
||||||
|
'uploader': data.get('author'),
|
||||||
|
'like_count': int_or_none(data.get('ups')),
|
||||||
|
'dislike_count': int_or_none(data.get('downs')),
|
||||||
|
'comment_count': int_or_none(data.get('num_comments')),
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': int_or_none(format_id),
|
'height': int_or_none(format_id),
|
||||||
})
|
})
|
||||||
else:
|
medias = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
|
||||||
|
'media definitions', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if medias and isinstance(medias, list):
|
||||||
|
for media in medias:
|
||||||
|
format_url = media.get('videoUrl')
|
||||||
|
if not format_url or not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
|
format_id = media.get('quality')
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(format_id),
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||||
formats.append({'url': video_url})
|
formats.append({'url': video_url})
|
||||||
@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor):
|
|||||||
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
@ -10,6 +10,7 @@ from ..compat import (
|
|||||||
compat_struct_unpack,
|
compat_struct_unpack,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
@ -84,6 +85,18 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'title': 'TODO',
|
'title': 'TODO',
|
||||||
},
|
},
|
||||||
'skip': 'The f4m manifest can\'t be used yet',
|
'skip': 'The f4m manifest can\'t be used yet',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||||
|
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4236788',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Servir y proteger - Capítulo 104 ',
|
||||||
|
'duration': 3222.0,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
if info['state'] == 'DESPU':
|
if info['state'] == 'DESPU':
|
||||||
raise ExtractorError('The video is no longer available', expected=True)
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
|
title = info['title']
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||||
png_request = sanitized_Request(png_url)
|
png_request = sanitized_Request(png_url)
|
||||||
png_request.add_header('Referer', url)
|
png_request.add_header('Referer', url)
|
||||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||||
video_url = _decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
if not video_url.endswith('.f4m'):
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
||||||
if '?' not in video_url:
|
if '?' not in video_url:
|
||||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||||
|
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
video_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if info.get('sbtFile') is not None:
|
if info.get('sbtFile') is not None:
|
||||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': title,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'thumbnail': info.get('image'),
|
'thumbnail': info.get('image'),
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
@ -7,43 +7,84 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
unified_strdate,
|
bool_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RutubeIE(InfoExtractor):
|
class RutubeBaseIE(InfoExtractor):
|
||||||
|
def _extract_video(self, video, video_id=None, require_title=True):
|
||||||
|
title = video['title'] if require_title else video.get('title')
|
||||||
|
|
||||||
|
age_limit = video.get('is_adult')
|
||||||
|
if age_limit is not None:
|
||||||
|
age_limit = 18 if age_limit is True else 0
|
||||||
|
|
||||||
|
uploader_id = try_get(video, lambda x: x['author']['id'])
|
||||||
|
category = try_get(video, lambda x: x['category']['name'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video.get('id') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': video.get('thumbnail_url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'uploader': try_get(video, lambda x: x['author']['name']),
|
||||||
|
'uploader_id': compat_str(uploader_id) if uploader_id else None,
|
||||||
|
'timestamp': unified_timestamp(video.get('created_ts')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'view_count': int_or_none(video.get('hits')),
|
||||||
|
'comment_count': int_or_none(video.get('comments_count')),
|
||||||
|
'is_live': bool_or_none(video.get('is_livestream')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube'
|
IE_NAME = 'rutube'
|
||||||
IE_DESC = 'Rutube videos'
|
IE_DESC = 'Rutube videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
|
'md5': '79938ade01294ef7e27574890d0d3769',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 80,
|
||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
|
'timestamp': 1381943602,
|
||||||
'upload_date': '20131016',
|
'upload_date': '20131016',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# It requires ffmpeg (m3u8 download)
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [mobj.group('url') for mobj in re.finditer(
|
return [mobj.group('url') for mobj in re.finditer(
|
||||||
@ -52,12 +93,12 @@ class RutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
# Some videos don't have the author field
|
info = self._extract_video(video, video_id)
|
||||||
author = video.get('author') or {}
|
|
||||||
|
|
||||||
options = self._download_json(
|
options = self._download_json(
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||||
@ -79,19 +120,8 @@ class RutubeIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info['formats'] = formats
|
||||||
'id': video['id'],
|
return info
|
||||||
'title': video['title'],
|
|
||||||
'description': video['description'],
|
|
||||||
'duration': video['duration'],
|
|
||||||
'view_count': video['hits'],
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': video['thumbnail_url'],
|
|
||||||
'uploader': author.get('name'),
|
|
||||||
'uploader_id': compat_str(author['id']) if author else None,
|
|
||||||
'upload_date': unified_strdate(video['created_ts']),
|
|
||||||
'age_limit': 18 if video['is_adult'] else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeEmbedIE(InfoExtractor):
|
class RutubeEmbedIE(InfoExtractor):
|
||||||
@ -103,7 +133,8 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
|
'timestamp': 1387830582,
|
||||||
'upload_date': '20131223',
|
'upload_date': '20131223',
|
||||||
'uploader_id': '297833',
|
'uploader_id': '297833',
|
||||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||||
@ -111,7 +142,7 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'Requires ffmpeg',
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/8083783',
|
'url': 'http://rutube.ru/play/embed/8083783',
|
||||||
@ -125,10 +156,51 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
canonical_url = self._html_search_regex(
|
canonical_url = self._html_search_regex(
|
||||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||||
'Canonical URL')
|
'Canonical URL')
|
||||||
return self.url_result(canonical_url, 'Rutube')
|
return self.url_result(canonical_url, RutubeIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class RutubeChannelIE(InfoExtractor):
|
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||||
|
def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
|
||||||
|
return self._PAGE_TEMPLATE % (playlist_id, page_num)
|
||||||
|
|
||||||
|
def _entries(self, playlist_id, *args, **kwargs):
|
||||||
|
next_page_url = None
|
||||||
|
for pagenum in itertools.count(1):
|
||||||
|
page = self._download_json(
|
||||||
|
next_page_url or self._next_page_url(
|
||||||
|
pagenum, playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, 'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
|
results = page.get('results')
|
||||||
|
if not results or not isinstance(results, list):
|
||||||
|
break
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
video_url = result.get('video_url')
|
||||||
|
if not video_url or not isinstance(video_url, compat_str):
|
||||||
|
continue
|
||||||
|
entry = self._extract_video(result, require_title=False)
|
||||||
|
entry.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': video_url,
|
||||||
|
'ie_key': RutubeIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
next_page_url = page.get('next')
|
||||||
|
if not next_page_url or not page.get('has_next'):
|
||||||
|
break
|
||||||
|
|
||||||
|
def _extract_playlist(self, playlist_id, *args, **kwargs):
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, kwargs.get('playlist_name'))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_playlist(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeChannelIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:channel'
|
IE_NAME = 'rutube:channel'
|
||||||
IE_DESC = 'Rutube channels'
|
IE_DESC = 'Rutube channels'
|
||||||
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||||
@ -142,27 +214,8 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||||
|
|
||||||
def _extract_videos(self, channel_id, channel_title=None):
|
|
||||||
entries = []
|
|
||||||
for pagenum in itertools.count(1):
|
|
||||||
page = self._download_json(
|
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
|
||||||
results = page['results']
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
|
|
||||||
if not page['has_next']:
|
|
||||||
break
|
|
||||||
return self.playlist_result(entries, channel_id, channel_title)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
channel_id = mobj.group('id')
|
|
||||||
return self._extract_videos(channel_id)
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeMovieIE(RutubeChannelIE):
|
|
||||||
IE_NAME = 'rutube:movie'
|
IE_NAME = 'rutube:movie'
|
||||||
IE_DESC = 'Rutube movies'
|
IE_DESC = 'Rutube movies'
|
||||||
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||||
@ -176,11 +229,11 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
movie = self._download_json(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie_name = movie['name']
|
return self._extract_playlist(
|
||||||
return self._extract_videos(movie_id, movie_name)
|
movie_id, playlist_name=movie.get('name'))
|
||||||
|
|
||||||
|
|
||||||
class RutubePersonIE(RutubeChannelIE):
|
class RutubePersonIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:person'
|
IE_NAME = 'rutube:person'
|
||||||
IE_DESC = 'Rutube person videos'
|
IE_DESC = 'Rutube person videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
||||||
@ -193,3 +246,37 @@ class RutubePersonIE(RutubeChannelIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
|
||||||
|
class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||||
|
IE_NAME = 'rutube:playlist'
|
||||||
|
IE_DESC = 'Rutube playlists'
|
||||||
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3097',
|
||||||
|
},
|
||||||
|
'playlist_count': 27,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
if not super(RutubePlaylistIE, cls).suitable(url):
|
||||||
|
return False
|
||||||
|
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||||
|
|
||||||
|
def _next_page_url(self, page_num, playlist_id, item_kind):
|
||||||
|
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
playlist_kind = qs['pl_type'][0]
|
||||||
|
playlist_id = qs['pl_id'][0]
|
||||||
|
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import (
|
from .common import (
|
||||||
InfoExtractor,
|
InfoExtractor,
|
||||||
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -31,6 +32,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||||
|
(?!stations/track)
|
||||||
(?P<uploader>[\w\d-]+)/
|
(?P<uploader>[\w\d-]+)/
|
||||||
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||||
(?P<title>[\w\d-]+)/?
|
(?P<title>[\w\d-]+)/?
|
||||||
@ -119,9 +121,24 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'license': 'cc-by-sa',
|
'license': 'cc-by-sa',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# private link, downloadable format
|
||||||
|
{
|
||||||
|
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||||
|
'md5': '64a60b16e617d41d0bef032b7f55441e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '340344461',
|
||||||
|
'ext': 'wav',
|
||||||
|
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
|
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||||
|
'uploader': 'Ori Uplift Music',
|
||||||
|
'upload_date': '20170831',
|
||||||
|
'duration': 7449,
|
||||||
|
'license': 'all-rights-reserved',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
|
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
|
||||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -159,11 +176,13 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'license': info.get('license'),
|
'license': info.get('license'),
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
|
query = {'client_id': self._CLIENT_ID}
|
||||||
|
if secret_token is not None:
|
||||||
|
query['secret_token'] = secret_token
|
||||||
if info.get('downloadable', False):
|
if info.get('downloadable', False):
|
||||||
# We can build a direct link to the song
|
# We can build a direct link to the song
|
||||||
format_url = (
|
format_url = update_url_query(
|
||||||
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
|
||||||
track_id, self._CLIENT_ID))
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'ext': info.get('original_format', 'mp3'),
|
'ext': info.get('original_format', 'mp3'),
|
||||||
@ -175,10 +194,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
format_dict = self._download_json(
|
format_dict = self._download_json(
|
||||||
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||||
track_id, 'Downloading track url', query={
|
track_id, 'Downloading track url', query=query)
|
||||||
'client_id': self._CLIENT_ID,
|
|
||||||
'secret_token': secret_token,
|
|
||||||
})
|
|
||||||
|
|
||||||
for key, stream_url in format_dict.items():
|
for key, stream_url in format_dict.items():
|
||||||
abr = int_or_none(self._search_regex(
|
abr = int_or_none(self._search_regex(
|
||||||
@ -215,7 +231,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'fallback',
|
'format_id': 'fallback',
|
||||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
'url': update_url_query(info['stream_url'], query),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -330,7 +346,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
||||||
|
_API_BASE = 'https://api.soundcloud.com'
|
||||||
|
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||||
|
|
||||||
|
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||||
|
COMMON_QUERY = {
|
||||||
|
'limit': 50,
|
||||||
|
'client_id': self._CLIENT_ID,
|
||||||
|
'linked_partitioning': '1',
|
||||||
|
}
|
||||||
|
|
||||||
|
query = COMMON_QUERY.copy()
|
||||||
|
query['offset'] = 0
|
||||||
|
|
||||||
|
next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for i in itertools.count():
|
||||||
|
response = self._download_json(
|
||||||
|
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
|
||||||
|
|
||||||
|
collection = response['collection']
|
||||||
|
if not collection:
|
||||||
|
break
|
||||||
|
|
||||||
|
def resolve_permalink_url(candidates):
|
||||||
|
for cand in candidates:
|
||||||
|
if isinstance(cand, dict):
|
||||||
|
permalink_url = cand.get('permalink_url')
|
||||||
|
entry_id = self._extract_id(cand)
|
||||||
|
if permalink_url and permalink_url.startswith('http'):
|
||||||
|
return permalink_url, entry_id
|
||||||
|
|
||||||
|
for e in collection:
|
||||||
|
permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
||||||
|
if permalink_url:
|
||||||
|
entries.append(self.url_result(permalink_url, video_id=entry_id))
|
||||||
|
|
||||||
|
next_href = response.get('next_href')
|
||||||
|
if not next_href:
|
||||||
|
break
|
||||||
|
|
||||||
|
parsed_next_href = compat_urlparse.urlparse(response['next_href'])
|
||||||
|
qs = compat_urlparse.parse_qs(parsed_next_href.query)
|
||||||
|
qs.update(COMMON_QUERY)
|
||||||
|
next_href = compat_urlparse.urlunparse(
|
||||||
|
parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': playlist_title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:(?:www|m)\.)?soundcloud\.com/
|
(?:(?:www|m)\.)?soundcloud\.com/
|
||||||
@ -385,16 +457,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
|||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_API_BASE = 'https://api.soundcloud.com'
|
|
||||||
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
|
||||||
|
|
||||||
_BASE_URL_MAP = {
|
_BASE_URL_MAP = {
|
||||||
'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
|
'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'tracks': '%s/users/%%s/tracks' % _API_BASE,
|
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
|
||||||
'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
|
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
|
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'likes': '%s/users/%%s/likes' % _API_V2_BASE,
|
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
|
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
}
|
}
|
||||||
|
|
||||||
_TITLE_MAP = {
|
_TITLE_MAP = {
|
||||||
@ -416,57 +485,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
|||||||
resolv_url, uploader, 'Downloading user info')
|
resolv_url, uploader, 'Downloading user info')
|
||||||
|
|
||||||
resource = mobj.group('rsrc') or 'all'
|
resource = mobj.group('rsrc') or 'all'
|
||||||
base_url = self._BASE_URL_MAP[resource] % user['id']
|
|
||||||
|
|
||||||
COMMON_QUERY = {
|
return self._extract_playlist(
|
||||||
'limit': 50,
|
self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
|
||||||
'client_id': self._CLIENT_ID,
|
'%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
|
||||||
'linked_partitioning': '1',
|
|
||||||
}
|
|
||||||
|
|
||||||
query = COMMON_QUERY.copy()
|
|
||||||
query['offset'] = 0
|
|
||||||
|
|
||||||
next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
|
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
IE_NAME = 'soundcloud:trackstation'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '286017854',
|
||||||
|
'title': 'Track station: your-text',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}]
|
||||||
|
|
||||||
entries = []
|
def _real_extract(self, url):
|
||||||
for i in itertools.count():
|
track_name = self._match_id(url)
|
||||||
response = self._download_json(
|
|
||||||
next_href, uploader, 'Downloading track page %s' % (i + 1))
|
|
||||||
|
|
||||||
collection = response['collection']
|
webpage = self._download_webpage(url, track_name)
|
||||||
if not collection:
|
|
||||||
break
|
|
||||||
|
|
||||||
def resolve_permalink_url(candidates):
|
track_id = self._search_regex(
|
||||||
for cand in candidates:
|
r'soundcloud:track-stations:(\d+)', webpage, 'track id')
|
||||||
if isinstance(cand, dict):
|
|
||||||
permalink_url = cand.get('permalink_url')
|
|
||||||
entry_id = self._extract_id(cand)
|
|
||||||
if permalink_url and permalink_url.startswith('http'):
|
|
||||||
return permalink_url, entry_id
|
|
||||||
|
|
||||||
for e in collection:
|
return self._extract_playlist(
|
||||||
permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
'%s/stations/soundcloud:track-stations:%s/tracks'
|
||||||
if permalink_url:
|
% (self._API_V2_BASE, track_id),
|
||||||
entries.append(self.url_result(permalink_url, video_id=entry_id))
|
track_id, 'Track station: %s' % track_name)
|
||||||
|
|
||||||
next_href = response.get('next_href')
|
|
||||||
if not next_href:
|
|
||||||
break
|
|
||||||
|
|
||||||
parsed_next_href = compat_urlparse.urlparse(response['next_href'])
|
|
||||||
qs = compat_urlparse.parse_qs(parsed_next_href.query)
|
|
||||||
qs.update(COMMON_QUERY)
|
|
||||||
next_href = compat_urlparse.urlunparse(
|
|
||||||
parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': compat_str(user['id']),
|
|
||||||
'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||||
|
@ -181,7 +181,8 @@ class SVTPlayIE(SVTBaseIE):
|
|||||||
|
|
||||||
if video_id:
|
if video_id:
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
|
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||||
|
video_id, headers=self.geo_verification_headers())
|
||||||
info_dict = self._extract_video(data, video_id)
|
info_dict = self._extract_video(data, video_id)
|
||||||
if not info_dict.get('title'):
|
if not info_dict.get('title'):
|
||||||
info_dict['title'] = re.sub(
|
info_dict['title'] = re.sub(
|
||||||
|
@ -1,48 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .jwplatform import JWPlatformIE
|
|
||||||
from ..utils import unified_strdate
|
|
||||||
|
|
||||||
|
|
||||||
class TeamFourStarIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '0WdZO31W',
|
|
||||||
'title': 'TFS Abridged Parody Episode 1',
|
|
||||||
'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'timestamp': 1394168400,
|
|
||||||
'upload_date': '20080508',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
|
||||||
r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
|
|
||||||
webpage, 'title')
|
|
||||||
video_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
|
|
||||||
webpage, 'date', fatal=False))
|
|
||||||
video_description = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'upload_date': video_date,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
'url': jwplatform_url,
|
|
||||||
}
|
|
@ -5,7 +5,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
ExtractorError,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -78,8 +77,10 @@ class TouTvIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path = self._match_id(url)
|
path = self._match_id(url)
|
||||||
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
||||||
|
# IsDrm does not necessarily mean the video is DRM protected (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/13994).
|
||||||
if metadata.get('IsDrm'):
|
if metadata.get('IsDrm'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
self.report_warning('This video is probably DRM protected.', path)
|
||||||
video_id = metadata['IdMedia']
|
video_id = metadata['IdMedia']
|
||||||
details = metadata['Details']
|
details = metadata['Details']
|
||||||
title = details['OriginalTitle']
|
title = details['OriginalTitle']
|
||||||
|
@ -18,7 +18,7 @@ class TV4IE(InfoExtractor):
|
|||||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||||
tv4play\.se/
|
tv4play\.se/
|
||||||
(?:
|
(?:
|
||||||
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
||||||
iframe/video/|
|
iframe/video/|
|
||||||
film/|
|
film/|
|
||||||
sport/|
|
sport/|
|
||||||
@ -63,6 +63,10 @@ class TV4IE(InfoExtractor):
|
|||||||
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TVN24IE(InfoExtractor):
|
class TVN24IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
||||||
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
||||||
@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
||||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
||||||
'thumbnail': 're:http://.*[.]jpeg',
|
'thumbnail': 're:https?://.*[.]jpeg',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
||||||
@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
|
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -15,16 +15,16 @@ from ..utils import (
|
|||||||
class TVPIE(InfoExtractor):
|
class TVPIE(InfoExtractor):
|
||||||
IE_NAME = 'tvp'
|
IE_NAME = 'tvp'
|
||||||
IE_DESC = 'Telewizja Polska'
|
IE_DESC = 'Telewizja Polska'
|
||||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
|
||||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '194536',
|
'id': '194536',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Czas honoru, I seria – odc. 13',
|
'title': 'Czas honoru, I seria – odc. 13',
|
||||||
'description': 'md5:76649d2014f65c99477be17f23a4dead',
|
'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||||
@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# page id is not the same as video id(#7799)
|
# page id is not the same as video id(#7799)
|
||||||
'url': 'http://vod.tvp.pl/22704887/08122015-1500',
|
'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
|
||||||
'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
|
'md5': '84cd3c8aec4840046e5ab712416b73d0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '22680786',
|
'id': '33908820',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Wiadomości, 08.12.2015, 15:00',
|
'title': 'Wiadomości, 28.09.2017, 19:30',
|
||||||
|
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||||
|
@ -15,7 +15,9 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||||
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
geo_country = self._search_regex(
|
geo_country = self._search_regex(
|
||||||
r'https?://[^/]+\.([a-z]{2})', url,
|
r'https?://[^/]+\.([a-z]{2})', url,
|
||||||
@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor):
|
|||||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
|
return self.url_result(
|
||||||
|
smuggle_url(
|
||||||
|
'mtg:%s' % video_id,
|
||||||
|
{'geo_countries': [
|
||||||
|
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
|
||||||
|
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class TwentyFourVideoIE(InfoExtractor):
|
class TwentyFourVideoIE(InfoExtractor):
|
||||||
IE_NAME = '24video'
|
IE_NAME = '24video'
|
||||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.24video.net/video/view/1044982',
|
'url': 'http://www.24video.net/video/view/1044982',
|
||||||
@ -60,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
duration = int_or_none(self._og_search_property(
|
duration = int_or_none(self._og_search_property(
|
||||||
'duration', webpage, 'duration', fatal=False))
|
'duration', webpage, 'duration', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
|
r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
|
||||||
webpage, 'upload date'))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
|
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
|
||||||
@ -72,7 +72,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
# Sets some cookies
|
# Sets some cookies
|
||||||
self._download_xml(
|
self._download_xml(
|
||||||
|
@ -50,7 +50,7 @@ class TwentyMinutenIE(InfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [m.group('url') for m in re.finditer(
|
return [m.group('url') for m in re.finditer(
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -28,7 +28,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TwitchBaseIE(InfoExtractor):
|
class TwitchBaseIE(InfoExtractor):
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
|
_VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
|
||||||
|
|
||||||
_API_BASE = 'https://api.twitch.tv'
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||||
@ -217,7 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
(?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||||
player\.twitch\.tv/\?.*?\bvideo=v
|
player\.twitch\.tv/\?.*?\bvideo=v
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
@ -458,7 +458,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?twitch\.tv/|
|
(?:(?:www|go)\.)?twitch\.tv/|
|
||||||
player\.twitch\.tv/\?.*?\bchannel=
|
player\.twitch\.tv/\?.*?\bchannel=
|
||||||
)
|
)
|
||||||
(?P<id>[^/#?]+)
|
(?P<id>[^/#?]+)
|
||||||
@ -489,6 +489,9 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://go.twitch.tv/food',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -229,7 +229,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
||||||
duration = float_or_none(config.get('duration')) or duration
|
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -242,8 +242,9 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
|
|
||||||
class TwitterIE(InfoExtractor):
|
class TwitterIE(InfoExtractor):
|
||||||
IE_NAME = 'twitter'
|
IE_NAME = 'twitter'
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
|
||||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||||
|
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
@ -255,6 +256,7 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
||||||
'uploader': 'FREE THE NIPPLE',
|
'uploader': 'FREE THE NIPPLE',
|
||||||
'uploader_id': 'freethenipple',
|
'uploader_id': 'freethenipple',
|
||||||
|
'duration': 12.922,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -305,11 +307,12 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '700207533655363584',
|
'id': '700207533655363584',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel',
|
'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel',
|
||||||
'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'uploader': 'Donte',
|
'uploader': 'あかさ',
|
||||||
'uploader_id': 'jaydingeer',
|
'uploader_id': 'jaydingeer',
|
||||||
|
'duration': 30.0,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -320,9 +323,9 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MIOxnrUteUd',
|
'id': 'MIOxnrUteUd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FilmDrunk - Vine of the day',
|
'title': 'Vince Mancini - Vine of the day',
|
||||||
'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
||||||
'uploader': 'FilmDrunk',
|
'uploader': 'Vince Mancini',
|
||||||
'uploader_id': 'Filmdrunk',
|
'uploader_id': 'Filmdrunk',
|
||||||
'timestamp': 1402826626,
|
'timestamp': 1402826626,
|
||||||
'upload_date': '20140615',
|
'upload_date': '20140615',
|
||||||
@ -337,6 +340,7 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
||||||
'uploader_id': 'captainamerica',
|
'uploader_id': 'captainamerica',
|
||||||
'uploader': 'Captain America',
|
'uploader': 'Captain America',
|
||||||
|
'duration': 3.17,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -364,10 +368,26 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
||||||
'uploader': 'عالم الأخبار',
|
'uploader': 'عالم الأخبار',
|
||||||
'uploader_id': 'news_al3alm',
|
'uploader_id': 'news_al3alm',
|
||||||
|
'duration': 277.4,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=http-]',
|
'format': 'best[format_id^=http-]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '910031516746514432',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
|
||||||
|
'uploader': 'Préfet de Guadeloupe',
|
||||||
|
'uploader_id': 'Prefet971',
|
||||||
|
'duration': 47.48,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -376,11 +396,15 @@ class TwitterIE(InfoExtractor):
|
|||||||
twid = mobj.group('id')
|
twid = mobj.group('id')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
self._TEMPLATE_URL % (user_id, twid), twid)
|
self._TEMPLATE_STATUSES_URL % twid, twid)
|
||||||
|
|
||||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||||
|
|
||||||
|
if user_id is None:
|
||||||
|
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||||
|
user_id = mobj.group('user_id')
|
||||||
|
|
||||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||||
|
|
||||||
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -73,7 +74,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
||||||
|
|
||||||
checkout_url = unescapeHTML(self._search_regex(
|
checkout_url = unescapeHTML(self._search_regex(
|
||||||
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
|
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
|
||||||
webpage, 'checkout url', group='url', default=None))
|
webpage, 'checkout url', group='url', default=None))
|
||||||
if checkout_url:
|
if checkout_url:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor):
|
|||||||
f = add_output_format_meta(f, format_id)
|
f = add_output_format_meta(f, format_id)
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
def extract_subtitles(track_list):
|
||||||
|
if not isinstance(track_list, list):
|
||||||
|
return
|
||||||
|
for track in track_list:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
if track.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
src = track.get('src')
|
||||||
|
if not src or not isinstance(src, compat_str):
|
||||||
|
continue
|
||||||
|
lang = track.get('language') or track.get(
|
||||||
|
'srclang') or track.get('label')
|
||||||
|
sub_dict = automatic_captions if track.get(
|
||||||
|
'autogenerated') is True else subtitles
|
||||||
|
sub_dict.setdefault(lang, []).append({
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
|
||||||
download_urls = asset.get('download_urls')
|
download_urls = asset.get('download_urls')
|
||||||
if isinstance(download_urls, dict):
|
if isinstance(download_urls, dict):
|
||||||
extract_formats(download_urls.get('Video'))
|
extract_formats(download_urls.get('Video'))
|
||||||
@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor):
|
|||||||
extract_formats(data.get('sources'))
|
extract_formats(data.get('sources'))
|
||||||
if not duration:
|
if not duration:
|
||||||
duration = int_or_none(data.get('duration'))
|
duration = int_or_none(data.get('duration'))
|
||||||
tracks = data.get('tracks')
|
extract_subtitles(data.get('tracks'))
|
||||||
if isinstance(tracks, list):
|
|
||||||
for track in tracks:
|
if not subtitles and not automatic_captions:
|
||||||
if not isinstance(track, dict):
|
text_tracks = self._parse_json(
|
||||||
continue
|
self._search_regex(
|
||||||
if track.get('kind') != 'captions':
|
r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
|
||||||
continue
|
'text tracks', default='{}', group='data'), video_id,
|
||||||
src = track.get('src')
|
transform_source=lambda s: js_to_json(unescapeHTML(s)),
|
||||||
if not src or not isinstance(src, compat_str):
|
fatal=False)
|
||||||
continue
|
extract_subtitles(text_tracks)
|
||||||
lang = track.get('language') or track.get(
|
|
||||||
'srclang') or track.get('label')
|
|
||||||
sub_dict = automatic_captions if track.get(
|
|
||||||
'autogenerated') is True else subtitles
|
|
||||||
sub_dict.setdefault(lang, []).append({
|
|
||||||
'url': src,
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user