mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-24 03:02:56 +08:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
b92da88117
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.21**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.12.14
|
||||
[debug] youtube-dl version 2018.01.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
13
.travis.yml
13
.travis.yml
@ -7,16 +7,21 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
sudo: false
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
include:
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
|
2
AUTHORS
2
AUTHORS
@ -231,3 +231,5 @@ John Dong
|
||||
Tatsuyuki Ishi
|
||||
Daniel Weber
|
||||
Kay Bouché
|
||||
Yang Hongbo
|
||||
Lei Wang
|
||||
|
145
ChangeLog
145
ChangeLog
@ -1,3 +1,144 @@
|
||||
version 2018.01.21
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
|
||||
* [utils] Improve scientific notation handling in js_to_json (#14789)
|
||||
|
||||
Extractors
|
||||
+ [southparkdk] Add support for southparkstudios.nu
|
||||
+ [southpark] Add support for collections (#14803)
|
||||
* [franceinter] Fix upload date extraction (#14996)
|
||||
+ [rtvs] Add support for rtvs.sk (#9242, #15187)
|
||||
* [restudy] Fix extraction and extend URL regular expression (#15347)
|
||||
* [youtube:live] Improve live detection (#15365)
|
||||
+ [springboardplatform] Add support for springboardplatform.com
|
||||
* [prosiebensat1] Add another clip id regular expression (#15290)
|
||||
- [ringtv] Remove extractor (#15345)
|
||||
|
||||
|
||||
version 2018.01.18
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Update client id (#15306)
|
||||
- [kamcord] Remove extractor (#15322)
|
||||
+ [spiegel] Add support for nexx videos (#15285)
|
||||
* [twitch] Fix authentication and error capture (#14090, #15264)
|
||||
* [vk] Detect more errors due to copyright complaints (#15259)
|
||||
|
||||
|
||||
version 2018.01.14
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix live streams extraction (#15202)
|
||||
* [wdr] Bypass geo restriction
|
||||
* [wdr] Rework extractors (#14598)
|
||||
+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
|
||||
+ [gamestar] Add support for gamepro.de (#3384)
|
||||
* [viafree] Skip rtmp formats (#15232)
|
||||
+ [pandoratv] Add support for mobile URLs (#12441)
|
||||
+ [pandoratv] Add support for new URL format (#15131)
|
||||
+ [ximalaya] Add support for ximalaya.com (#14687)
|
||||
+ [digg] Add support for digg.com (#15214)
|
||||
* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
|
||||
* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
|
||||
+ [weibo] Add extractor (#15079)
|
||||
+ [ok] Add support for live streams
|
||||
* [canalplus] Fix extraction (#15072)
|
||||
* [bilibili] Fix extraction (#15188)
|
||||
|
||||
|
||||
version 2018.01.07
|
||||
|
||||
Core
|
||||
* [utils] Fix youtube-dl under PyPy3 on Windows
|
||||
* [YoutubeDL] Output python implementation in debug header
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Add support for multiple embeds (#15192)
|
||||
* [mitele] Fix extraction (#15186)
|
||||
+ [motherless] Add support for groups (#15124)
|
||||
* [lynda] Relax URL regular expression (#15185)
|
||||
* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
|
||||
* [youku] Fix list extraction (#15135)
|
||||
* [openload] Fix extraction (#15166)
|
||||
* [lynda] Skip invalid subtitles (#15159)
|
||||
* [twitch] Pass video id to url_result when extracting playlist (#15139)
|
||||
* [rtve.es:alacarta] Fix extraction of some new URLs
|
||||
* [acast] Fix extraction (#15147)
|
||||
|
||||
|
||||
version 2017.12.31
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add container meta field for formats extracted
|
||||
in _parse_mpd_formats (#13616)
|
||||
+ [downloader/hls] Use HTTP headers for key request
|
||||
* [common] Use AACL as the default fourcc when AudioTag is 255
|
||||
* [extractor/common] Fix extraction of DASH formats with the same
|
||||
representation id (#15111)
|
||||
|
||||
Extractors
|
||||
+ [slutload] Add support for mobile URLs (#14806)
|
||||
* [abc:iview] Bypass geo restriction
|
||||
* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
|
||||
#15035, #15057, #15061, #15071, #15095, #15106)
|
||||
* [openload] Fix extraction (#15118)
|
||||
- [sandia] Remove extractor
|
||||
- [collegerama] Remove extractor
|
||||
+ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
|
||||
#11185, #14343)
|
||||
+ [ufctv] Add support for ufc.tv (#14520)
|
||||
* [pluralsight] Fix missing first line of subtitles (#11118)
|
||||
* [openload] Fallback on f-page extraction (#14665, #14879)
|
||||
* [vimeo] Improve password protected videos extraction (#15114)
|
||||
* [aws] Fix canonical/signed headers generation on python 2 (#15102)
|
||||
|
||||
|
||||
version 2017.12.28
|
||||
|
||||
Extractors
|
||||
+ [internazionale] Add support for internazionale.it (#14973)
|
||||
* [playtvak] Relax video regular expression and make description optional
|
||||
(#15037)
|
||||
+ [filmweb] Add support for filmweb.no (#8773, #10368)
|
||||
+ [23video] Add support for 23video.com
|
||||
+ [espn] Add support for fivethirtyeight.com (#6864)
|
||||
+ [umg:de] Add support for universal-music.de (#11582, #11584)
|
||||
+ [espn] Add support for espnfc and extract more formats (#8053)
|
||||
* [youku] Update ccode (#14880)
|
||||
+ [openload] Add support for oload.stream (#15070)
|
||||
* [youku] Fix list extraction (#15065)
|
||||
|
||||
|
||||
version 2017.12.23
|
||||
|
||||
Core
|
||||
* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
|
||||
+ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
|
||||
output template (#11427, #15018)
|
||||
+ [extractor/common] Introduce uploader, uploader_id and uploader_url
|
||||
meta fields for playlists (#11427, #15018)
|
||||
* [downloader/fragment] Encode filename of fragment being removed (#15020)
|
||||
+ [utils] Add another date format pattern (#14999)
|
||||
|
||||
Extractors
|
||||
+ [kaltura] Add another embed pattern for entry_id
|
||||
+ [7plus] Add support for 7plus.com.au (#15043)
|
||||
* [animeondemand] Relax login error regular expression
|
||||
+ [shahid] Add support for show pages (#7401)
|
||||
+ [youtube] Extract uploader, uploader_id and uploader_url for playlists
|
||||
(#11427, #15018)
|
||||
* [afreecatv] Improve format extraction (#15019)
|
||||
+ [cspan] Add support for audio only pages and catch page errors (#14995)
|
||||
+ [mailru] Add support for embed URLs (#14904)
|
||||
* [crunchyroll] Future-proof XML element checks (#15013)
|
||||
* [cbslocal] Fix timestamp extraction (#14999, #15000)
|
||||
* [discoverygo] Correct TTML subtitle extension
|
||||
* [vk] Make view count optional (#14979)
|
||||
* [disney] Skip Apple FairPlay formats (#14982)
|
||||
* [voot] Fix format extraction (#14758)
|
||||
|
||||
|
||||
version 2017.12.14
|
||||
|
||||
Core
|
||||
@ -148,8 +289,8 @@ Extractors
|
||||
+ [fxnetworks] Extract series metadata (#14603)
|
||||
+ [younow] Add support for younow.com (#9255, #9432, #12436)
|
||||
* [dctptv] Fix extraction (#14599)
|
||||
* [youtube] Restrict embed regex (#14600)
|
||||
* [vimeo] Restrict iframe embed regex (#14600)
|
||||
* [youtube] Restrict embed regular expression (#14600)
|
||||
* [vimeo] Restrict iframe embed regular expression (#14600)
|
||||
* [soundgasm] Improve extraction (#14588)
|
||||
- [myvideo] Remove extractor (#8557)
|
||||
+ [nbc] Add support for classic-tv videos (#14575)
|
||||
|
@ -1,7 +1,9 @@
|
||||
include README.md
|
||||
include test/*.py
|
||||
include test/*.json
|
||||
include LICENSE
|
||||
include AUTHORS
|
||||
include ChangeLog
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.fish
|
||||
include youtube-dl.1
|
||||
recursive-include docs Makefile conf.py *.rst
|
||||
recursive-include test *
|
||||
|
4
Makefile
4
Makefile
@ -110,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@ -122,7 +122,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
ChangeLog LICENSE README.md README.txt \
|
||||
ChangeLog AUTHORS LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
|
||||
youtube-dl
|
||||
|
@ -46,7 +46,7 @@ Or with [MacPorts](https://www.macports.org/):
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
@ -863,7 +863,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
|
5
devscripts/install_jython.sh
Executable file
5
devscripts/install_jython.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
|
||||
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
|
||||
$HOME/jython/bin/jython -m pip install nose
|
@ -3,6 +3,7 @@
|
||||
- **1up.com**
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@ -10,6 +11,7 @@
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **7plus**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
@ -126,7 +128,7 @@
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **Canvas**
|
||||
- **CanvasEen**: canvas.be and een.be
|
||||
- **CarambaTV**
|
||||
@ -169,7 +171,6 @@
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralFullEpisodes**
|
||||
@ -209,6 +210,7 @@
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
@ -268,6 +270,8 @@
|
||||
- **Fczenit**
|
||||
- **filmon**
|
||||
- **filmon:channel**
|
||||
- **Filmweb**
|
||||
- **FiveThirtyEight**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
@ -358,6 +362,7 @@
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
@ -378,7 +383,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
@ -444,6 +448,7 @@
|
||||
- **media.ccc.de**
|
||||
- **Medialaan**
|
||||
- **Mediaset**
|
||||
- **Mediasite**
|
||||
- **Medici**
|
||||
- **megaphone.fm**: megaphone.fm embedded players
|
||||
- **Meipai**: 美拍
|
||||
@ -473,6 +478,7 @@
|
||||
- **Moniker**: allmyvideos.net and vidspot.net
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
- **MotherlessGroup**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
@ -676,7 +682,6 @@
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
@ -697,6 +702,7 @@
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
@ -712,7 +718,6 @@
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@ -728,6 +733,7 @@
|
||||
- **Servus**
|
||||
- **Sexu**
|
||||
- **Shahid**
|
||||
- **ShahidShow**
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
@ -767,7 +773,7 @@
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
@ -886,7 +892,9 @@
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **UFCTV**
|
||||
- **UKTVPlay**
|
||||
- **umg:de**: Universal Music Deutschland
|
||||
- **Unistra**
|
||||
- **Unity**
|
||||
- **uol.com.br**
|
||||
@ -994,10 +1002,14 @@
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
@ -1017,6 +1029,8 @@
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
- **xiami:song**: 虾米音乐
|
||||
- **ximalaya**: 喜马拉雅FM
|
||||
- **ximalaya:album**: 喜马拉雅FM 专辑
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
|
1
setup.py
1
setup.py
@ -109,6 +109,7 @@ setup(
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Sergey M.',
|
||||
maintainer_email='dstftw@gmail.com',
|
||||
license='Unlicense',
|
||||
packages=[
|
||||
'youtube_dl',
|
||||
'youtube_dl.extractor', 'youtube_dl.downloader',
|
||||
|
@ -493,9 +493,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/13919
|
||||
# Also tests duplicate representation ids, see
|
||||
# https://github.com/rg3/youtube-dl/issues/15111
|
||||
'float_duration',
|
||||
'http://unknown/manifest.mpd',
|
||||
[{
|
||||
'manifest_url': 'http://unknown/manifest.mpd',
|
||||
'ext': 'm4a',
|
||||
'format_id': '318597',
|
||||
'format_note': 'DASH audio',
|
||||
'protocol': 'http_dash_segments',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'none',
|
||||
'tbr': 61.587,
|
||||
}, {
|
||||
'manifest_url': 'http://unknown/manifest.mpd',
|
||||
'ext': 'mp4',
|
||||
'format_id': '318597',
|
||||
|
@ -92,8 +92,8 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])()
|
||||
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
test_cases = test_case.get(
|
||||
'playlist', [] if is_playlist else [test_case])
|
||||
|
@ -814,6 +814,9 @@ class TestUtil(unittest.TestCase):
|
||||
inp = '''{"duration": "00:01:07"}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||
|
||||
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@ -885,6 +888,13 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{42:4.2e1}')
|
||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@ -2233,8 +2233,16 @@ class YoutubeDL(object):
|
||||
sys.exc_clear()
|
||||
except Exception:
|
||||
pass
|
||||
self._write_string('[debug] Python version %s - %s\n' % (
|
||||
platform.python_version(), platform_name()))
|
||||
|
||||
def python_implementation():
|
||||
impl_name = platform.python_implementation()
|
||||
if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
|
||||
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
||||
return impl_name
|
||||
|
||||
self._write_string('[debug] Python version %s (%s) - %s\n' % (
|
||||
platform.python_version(), python_implementation(),
|
||||
platform_name()))
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
|
@ -1,8 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
password = bytes_to_intlist(password.encode('utf-8'))
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
|
@ -1,14 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import collections
|
||||
import ctypes
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import itertools
|
||||
import optparse
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
@ -2906,14 +2909,44 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
except ImportError:
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('ascii')
|
||||
return base64.b64decode(s, *args, **kwargs)
|
||||
else:
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
|
||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
||||
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
|
||||
# 2. https://github.com/rg3/youtube-dl/pull/4392
|
||||
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
|
||||
real = ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
def resf(tpl, *args, **kwargs):
|
||||
funcname, dll = tpl
|
||||
return real((str(funcname), dll), *args, **kwargs)
|
||||
|
||||
return resf
|
||||
else:
|
||||
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
|
||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
|
@ -1,12 +1,12 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
bootstrap = compat_b64decode(node.text)
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
@ -349,7 +349,7 @@ class F4mFD(FragmentFD):
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||
metadata = compat_b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
|
||||
|
@ -163,7 +163,8 @@ class HlsFD(FragmentFD):
|
||||
return False
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
|
@ -1,6 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
@ -10,6 +13,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -101,21 +105,24 @@ class ABCIE(InfoExtractor):
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
|
||||
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZX9735A001S00',
|
||||
'id': 'ZW0898A003S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Diaries Of A Broken Mind',
|
||||
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
|
||||
'upload_date': '20161010',
|
||||
'uploader_id': 'abc2',
|
||||
'timestamp': 1476064920,
|
||||
'title': 'Series 5 Ep 3',
|
||||
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
|
||||
'upload_date': '20171228',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1514499187,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor):
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
format_urls = [
|
||||
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
|
||||
house_number = video_params.get('episodeHouseNumber')
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
'android.content.res.Resources'.encode('utf-8'),
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
|
||||
# May have higher quality video
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hds']['sd'], compat_str)
|
||||
if sd_url:
|
||||
format_urls.append(sd_url.replace('metered', 'um'))
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for format_url in format_urls:
|
||||
if format_url:
|
||||
formats.extend(
|
||||
self._extract_akamai_formats(format_url, video_id))
|
||||
for sd in ('sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
tokenize_url(sd_url, token), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
|
||||
@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '55c0097badd7095f494c99a172f86501',
|
||||
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'duration': 2797,
|
||||
'duration': 2766,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
|
||||
e = cast_data['result']['episode']
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
|
||||
'title': cast_data['name'],
|
||||
'description': cast_data.get('description'),
|
||||
'thumbnail': cast_data.get('image'),
|
||||
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
|
||||
'duration': int_or_none(cast_data.get('duration')),
|
||||
'url': e['mediaUrl'],
|
||||
'title': e['name'],
|
||||
'description': e.get('description'),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': int_or_none(e.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,13 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_ord
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
@ -48,9 +50,9 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
|
@ -85,8 +85,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p class="alert alert-danger">(.+?)</p>',
|
||||
response, 'error', default=None)
|
||||
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
|
||||
response, 'error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
@ -21,11 +21,11 @@ class AWSIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
headers['X-Api-Key'] = self._AWS_API_KEY
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
@ -33,9 +33,9 @@ class AWSIE(InfoExtractor):
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in headers.items():
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
signed_headers = ';'.join([header.lower() for header in headers.keys()])
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
aws_dict['uri'],
|
||||
|
@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor):
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url).encode('ascii')).decode('utf-8')
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for height, encoded_url in re.findall(
|
||||
|
@ -102,6 +102,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
@ -116,10 +117,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
headers = {
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=self.geo_verification_headers())
|
||||
headers=headers)
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'timestamp': 1441391203,
|
||||
'upload_date': '20150904',
|
||||
'uploader_id': '929656772001',
|
||||
'formats': 'mincount:22',
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
}, {
|
||||
# with rtmp streams
|
||||
@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'timestamp': 1433556729,
|
||||
'upload_date': '20150606',
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:41',
|
||||
'formats': 'mincount:39',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
def _parse_brightcove_metadata(self, json_data, video_id):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
@ -708,9 +657,64 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'uploader_id': json_data.get('account_id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
return self._parse_brightcove_metadata(json_data, video_id)
|
||||
|
@ -4,59 +4,36 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
# ExtractorError,
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:(?:football|www)\.)?cstar\.fr|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
)
|
||||
|
||||
'''
|
||||
IE_DESC = 'mycanal.fr and piwiplus.fr'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||
_SITE_ID_MAP = {
|
||||
'canalplus': 'cplus',
|
||||
'mycanal': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'cstar': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
# Only works for direct mp4 URLs
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
|
||||
'info_dict': {
|
||||
'id': '1405510',
|
||||
'display_id': 'pid1830-c-zapping',
|
||||
'id': '1397061',
|
||||
'display_id': 'lolywood',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zapping - 02/07/2016',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours',
|
||||
'upload_date': '20160702',
|
||||
'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
|
||||
'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
|
||||
'upload_date': '20160602',
|
||||
},
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
@ -70,64 +47,12 @@ class CanalplusIE(InfoExtractor):
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
|
||||
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
|
||||
'info_dict': {
|
||||
'id': '1443684',
|
||||
'display_id': 'pid6318-videos-integrales',
|
||||
'ext': 'mp4',
|
||||
'title': 'Guess my iep ! - TPMP - 07/04/2017',
|
||||
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
|
||||
'upload_date': '20170407',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'info_dict': {
|
||||
'id': '1420176',
|
||||
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||
'info_dict': {
|
||||
'id': '1416769',
|
||||
'display_id': 'pid7566-feminines-videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||
'upload_date': '20160921',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||
r'data-video=["\'](?P<id>\d+)'],
|
||||
webpage, 'video id', default=mobj.group('vid'), group='id')
|
||||
site_id = self._SITE_ID_MAP[site]
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
@ -161,7 +86,7 @@ class CanalplusIE(InfoExtractor):
|
||||
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
# the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
|
||||
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
|
||||
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
|
||||
'format_id': format_id,
|
||||
'preference': preference(format_id),
|
||||
|
@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor):
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
|
||||
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
|
@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor):
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
|
@ -1,93 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class CollegeRamaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
|
||||
'info_dict': {
|
||||
'id': '585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
|
||||
'md5': 'ef1fdded95bdf19b12c5999949419c92',
|
||||
'info_dict': {
|
||||
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_options_request = {
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': video_id,
|
||||
'QueryString': '',
|
||||
}
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
json.dumps(player_options_request))
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
|
||||
player_options = self._download_json(request, video_id)
|
||||
|
||||
presentation = player_options['d']['Presentation']
|
||||
title = presentation['Title']
|
||||
description = presentation.get('Description')
|
||||
thumbnail = None
|
||||
duration = float_or_none(presentation.get('Duration'), 1000)
|
||||
timestamp = int_or_none(presentation.get('UnixTime'), 1000)
|
||||
|
||||
formats = []
|
||||
for stream in presentation['Streams']:
|
||||
for video in stream['VideoUrls']:
|
||||
thumbnail_url = stream.get('ThumbnailUrl')
|
||||
if thumbnail_url:
|
||||
thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
|
||||
format_id = video['MediaType']
|
||||
if format_id == 'SS':
|
||||
continue
|
||||
formats.append({
|
||||
'url': video['Location'],
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@ -495,6 +495,16 @@ class InfoExtractor(object):
|
||||
self.to_screen('%s' % (note,))
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
if isinstance(url_or_request, compat_urllib_request.Request):
|
||||
url_or_request = update_Request(
|
||||
url_or_request, data=data, headers=headers, query=query)
|
||||
@ -524,15 +534,6 @@ class InfoExtractor(object):
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
@ -1879,6 +1880,7 @@ class InfoExtractor(object):
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
@ -2006,16 +2008,14 @@ class InfoExtractor(object):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == representation_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
||||
# is not necessarily unique within a Period thus formats with
|
||||
# the same `format_id` are quite possible. There are numerous examples
|
||||
# of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
|
||||
# https://github.com/rg3/youtube-dl/issues/13919)
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
@ -2055,7 +2055,7 @@ class InfoExtractor(object):
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
for track in stream.findall('QualityLevel'):
|
||||
fourcc = track.get('FourCC')
|
||||
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
|
||||
# TODO: add support for WVC1 and WMAP
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL'):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
@ -2404,7 +2404,7 @@ class InfoExtractor(object):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
|
@ -3,13 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
}
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
id = int(id)
|
||||
|
||||
def obfuscate_key_aux(count, modulo, start):
|
||||
|
@ -10,6 +10,7 @@ from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
aes_cbc_encrypt,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor):
|
||||
|
||||
rtn = self._parse_json(
|
||||
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
||||
base64.b64decode(encrypted_rtn)),
|
||||
compat_b64decode(encrypted_rtn)),
|
||||
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
||||
video_id)
|
||||
|
||||
|
56
youtube_dl/extractor/digg.py
Normal file
56
youtube_dl/extractor/digg.py
Normal file
@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class DiggIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# JWPlatform via provider
|
||||
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
|
||||
'info_dict': {
|
||||
'id': 'LcqvmS0b',
|
||||
'ext': 'mp4',
|
||||
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
|
||||
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
|
||||
'upload_date': '20180109',
|
||||
'timestamp': 1515530551,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Youtube via provider
|
||||
'url': 'http://digg.com/video/dog-boat-seal-play',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vimeo as regular embed
|
||||
'url': 'http://digg.com/video/dream-girl-short-film',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
|
||||
default='{}'), display_id, transform_source=js_to_json,
|
||||
fatal=False)
|
||||
|
||||
video_id = info.get('video_id')
|
||||
|
||||
if video_id:
|
||||
provider = info.get('provider_name')
|
||||
if provider == 'youtube':
|
||||
return self.url_result(
|
||||
video_id, ie='Youtube', video_id=video_id)
|
||||
elif provider == 'jwplayer':
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % video_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
return self.url_result(url, 'Generic')
|
@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor):
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||
compat_b64decode(files_base64).decode('utf-8'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor):
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(base64.b64decode((
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
).encode('ascii')).decode('utf-8'), video_id)
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -1,6 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@ -9,22 +12,27 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
class ESPNIE(OnceIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:\w+\.)+)?espn\.go|
|
||||
(?:www\.)?espn
|
||||
)\.com/
|
||||
(?:
|
||||
(?:
|
||||
video/clip|
|
||||
watch/player
|
||||
)
|
||||
(?:
|
||||
\?.*?\bid=|
|
||||
/_/id/
|
||||
)
|
||||
(?:
|
||||
(?:(?:\w+\.)+)?espn\.go|
|
||||
(?:www\.)?espn
|
||||
)\.com/
|
||||
(?:
|
||||
(?:
|
||||
video/(?:clip|iframe/twitter)|
|
||||
watch/player
|
||||
)
|
||||
(?:
|
||||
.*?\?.*?\bid=|
|
||||
/_/id/
|
||||
)
|
||||
)
|
||||
)|
|
||||
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip/_/id/17989860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor):
|
||||
|
||||
def traverse_source(source, base_source_id=None):
|
||||
for source_id, source in source.items():
|
||||
if isinstance(source, compat_str):
|
||||
if source_id == 'alert':
|
||||
continue
|
||||
elif isinstance(source, compat_str):
|
||||
extract_source(source, base_source_id)
|
||||
elif isinstance(source, dict):
|
||||
traverse_source(
|
||||
@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor):
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'smil':
|
||||
if OnceIE.suitable(source_url):
|
||||
formats.extend(self._extract_once_formats(source_url))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor):
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=source_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
f = {
|
||||
'url': source_url,
|
||||
'format_id': source_id,
|
||||
})
|
||||
}
|
||||
mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(1)),
|
||||
'fps': int(mobj.group(2)),
|
||||
'tbr': int(mobj.group(3)),
|
||||
})
|
||||
if source_id == 'mezzanine':
|
||||
f['preference'] = 1
|
||||
formats.append(f)
|
||||
|
||||
traverse_source(clip['links']['source'])
|
||||
links = clip.get('links', {})
|
||||
traverse_source(links.get('source', {}))
|
||||
traverse_source(links.get('mobile', {}))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clip.get('caption') or clip.get('description')
|
||||
@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor):
|
||||
class ESPNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/recap?gameId=400793786',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor):
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
||||
|
||||
class FiveThirtyEightIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
||||
'info_dict': {
|
||||
'id': '21846851',
|
||||
'ext': 'mp4',
|
||||
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
||||
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
||||
'timestamp': 1513960621,
|
||||
'upload_date': '20171222',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
@ -205,7 +205,6 @@ from .cnn import (
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
@ -260,6 +259,7 @@ from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
@ -322,6 +322,7 @@ from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
ESPNArticleIE,
|
||||
FiveThirtyEightIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .etonline import ETOnlineIE
|
||||
@ -344,6 +345,7 @@ from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
)
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
@ -464,6 +466,7 @@ from .indavideo import (
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
@ -487,7 +490,6 @@ from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
@ -573,6 +575,7 @@ from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .mediaset import MediasetIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .medici import MediciIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .meipai import MeipaiIE
|
||||
@ -606,7 +609,10 @@ from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
@ -875,7 +881,6 @@ from .revision3 import (
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@ -895,6 +900,7 @@ from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
@ -909,7 +915,6 @@ from .rutube import (
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@ -926,6 +931,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
from .seznamzpravy import (
|
||||
SeznamZpravyIE,
|
||||
@ -988,7 +994,7 @@ from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
@ -1044,7 +1050,6 @@ from .theplatform import (
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
@ -1122,6 +1127,7 @@ from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@ -1144,8 +1150,10 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ufctv import UFCTVIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
from .uol import UOLIE
|
||||
@ -1283,6 +1291,8 @@ from .watchbox import WatchBoxIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRPageIE,
|
||||
WDRElefantIE,
|
||||
WDRMobileIE,
|
||||
)
|
||||
from .webcaster import (
|
||||
@ -1293,6 +1303,10 @@ from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@ -1318,6 +1332,10 @@ from .xiami import (
|
||||
XiamiArtistIE,
|
||||
XiamiCollectionIE
|
||||
)
|
||||
from .ximalaya import (
|
||||
XimalayaIE,
|
||||
XimalayaAlbumIE
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
42
youtube_dl/extractor/filmweb.py
Normal file
42
youtube_dl/extractor/filmweb.py
Normal file
@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FilmwebIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
|
||||
_TEST = {
|
||||
'url': 'http://www.filmweb.no/trailere/article1264921.ece',
|
||||
'md5': 'e353f47df98e557d67edaceda9dece89',
|
||||
'info_dict': {
|
||||
'id': '13033574',
|
||||
'ext': 'mp4',
|
||||
'title': 'Det som en gang var',
|
||||
'upload_date': '20160316',
|
||||
'timestamp': 1458140101,
|
||||
'uploader_id': '12639966',
|
||||
'uploader': 'Live Roaldset',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_type, article_id = re.match(self._VALID_URL, url).groups()
|
||||
if article_type == 'filmnytt':
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
|
||||
embed_code = self._download_json(
|
||||
'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
|
||||
article_id, query={
|
||||
'articleId': article_id,
|
||||
})['embedCode']
|
||||
iframe_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': article_id,
|
||||
'url': iframe_url,
|
||||
'ie_key': 'TwentyThreeVideo',
|
||||
}
|
@ -33,7 +33,7 @@ class FranceInterIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@ -9,27 +11,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?game(?P<site>pro|star)\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
'md5': 'ee782f1f8050448c95c5cacd63bc851c',
|
||||
'info_dict': {
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406542020,
|
||||
'timestamp': 1406542380,
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
'duration': 17,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.gamestar.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO: there are multiple ld+json objects in the webpage,
|
||||
# while _search_json_ld finds only the first one
|
||||
@ -37,16 +46,17 @@ class GameStarIE(InfoExtractor):
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
|
||||
webpage, 'JSON-LD', group='json_ld'), video_id)
|
||||
info_dict = self._json_ld(json_ld, video_id)
|
||||
info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
|
||||
info_dict['title'] = remove_end(
|
||||
info_dict['title'], ' - Game%s' % site.title())
|
||||
|
||||
view_count = json_ld.get('interactionCount')
|
||||
view_count = int_or_none(json_ld.get('interactionCount'))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
r'<span>Kommentare</span>\s*<span[^>]+class=["\']count[^>]+>\s*\(\s*([0-9]+)',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id,
|
||||
'ext': 'mp4',
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
|
@ -100,6 +100,8 @@ from .megaphone import MegaphoneIE
|
||||
from .vzaar import VzaarIE
|
||||
from .channel9 import Channel9IE
|
||||
from .vshare import VShareIE
|
||||
from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -1925,6 +1927,33 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'vl14062007715967',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
|
||||
'md5': 'aecd089f55b1cb5a59032cb049d3a356',
|
||||
'info_dict': {
|
||||
'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
|
||||
'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
|
||||
'timestamp': 1474354800,
|
||||
'upload_date': '20160920',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
|
||||
'info_dict': {
|
||||
'id': '1731611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
|
||||
'description': 'md5:eb5f23826a027ba95277d105f248b825',
|
||||
'timestamp': 1516100691,
|
||||
'upload_date': '20180116',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
@ -2695,9 +2724,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(viewlift_url)
|
||||
|
||||
# Look for JWPlatform embeds
|
||||
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
||||
if jwplatform_url:
|
||||
return self.url_result(jwplatform_url, 'JWPlatform')
|
||||
jwplatform_urls = JWPlatformIE._extract_urls(webpage)
|
||||
if jwplatform_urls:
|
||||
return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
|
||||
|
||||
# Look for Digiteka embeds
|
||||
digiteka_url = DigitekaIE._extract_url(webpage)
|
||||
@ -2883,6 +2912,22 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
|
||||
|
||||
# Look for Mediasite embeds
|
||||
mediasite_urls = MediasiteIE._extract_urls(webpage)
|
||||
if mediasite_urls:
|
||||
entries = [
|
||||
self.url_result(smuggle_url(
|
||||
compat_urlparse.urljoin(url, mediasite_url),
|
||||
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
|
||||
for mediasite_url in mediasite_urls]
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
|
||||
if springboardplatform_urls:
|
||||
return self.playlist_from_matches(
|
||||
springboardplatform_urls, video_id, video_title,
|
||||
ie=SpringboardPlatformIE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
|
@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
if 'mediaKey' not in mkd:
|
||||
raise ExtractorError('Did not get a media key')
|
||||
|
||||
redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
|
||||
redirect_req = HEADRequest(redirect_url)
|
||||
req = self._request_webpage(
|
||||
redirect_req, video_id,
|
||||
|
@ -2,9 +2,8 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
encoded_id = self._search_regex(
|
||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
|
||||
|
||||
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
|
64
youtube_dl/extractor/internazionale.py
Normal file
64
youtube_dl/extractor/internazionale.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class InternazionaleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'md5': '3e39d32b66882c1218e305acbf8348ca',
|
||||
'info_dict': {
|
||||
'id': '265968',
|
||||
'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'ext': 'mp4',
|
||||
'title': 'Richard Linklater racconta una scena di Boyhood',
|
||||
'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
|
||||
'timestamp': 1424354635,
|
||||
'upload_date': '20150219',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
|
||||
|
||||
title = self._search_regex(
|
||||
DATA_RE % 'video-title', webpage, 'title', default=None,
|
||||
group='value') or self._og_search_title(webpage)
|
||||
|
||||
video_id = self._search_regex(
|
||||
DATA_RE % 'job-id', webpage, 'video id', group='value')
|
||||
video_path = self._search_regex(
|
||||
DATA_RE % 'video-path', webpage, 'video path', group='value')
|
||||
|
||||
video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_base + 'm3u8', display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@ -23,11 +23,14 @@ class JWPlatformIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
|
||||
urls = JWPlatformIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -125,9 +125,12 @@ class KalturaIE(InfoExtractor):
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*|
|
||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||
)
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage) or
|
||||
re.search(
|
||||
|
@ -1,71 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class KamcordIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.kamcord.com/v/hNYRduDgWb4',
|
||||
'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c',
|
||||
'info_dict': {
|
||||
'id': 'hNYRduDgWb4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drinking Madness',
|
||||
'uploader': 'jacksfilms',
|
||||
'uploader_id': '3044562',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__props\s*=\s*({.+?});?(?:\n|\s*</script)',
|
||||
webpage, 'video'),
|
||||
video_id)['video']
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['play']['hls'], video_id, 'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = video.get('user', {}).get('username')
|
||||
uploader_id = video.get('user', {}).get('id')
|
||||
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('heartCount'))
|
||||
comment_count = int_or_none(video.get('messageCount'))
|
||||
|
||||
preference_key = qualities(('small', 'medium', 'large'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
'preference': preference_key(thumbnail_id),
|
||||
} for thumbnail_id, thumbnail_url in (video.get('thumbnail') or {}).items()
|
||||
if isinstance(thumbnail_id, compat_str) and isinstance(thumbnail_url, compat_str)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
@ -9,6 +8,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
|
||||
def b64decode(s):
|
||||
return base64.b64decode(s.encode('utf-8')).decode('utf-8')
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for media in play_json['data']['video_info']['media'].values():
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
@ -220,6 +221,12 @@ class LimelightBaseIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_info_helper(self, pc, mobile, i, metadata):
|
||||
return self._extract_info(
|
||||
try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
|
||||
try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
|
||||
metadata)
|
||||
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
@ -282,10 +289,7 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'getMobilePlaylistByMediaId', 'properties',
|
||||
smuggled_data.get('source_url'))
|
||||
|
||||
return self._extract_info(
|
||||
pc['playlistItems'][0].get('streams', []),
|
||||
mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
|
||||
metadata)
|
||||
return self._extract_info_helper(pc, mobile, 0, metadata)
|
||||
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
@ -326,10 +330,7 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
'media', smuggled_data.get('source_url'))
|
||||
|
||||
entries = [
|
||||
self._extract_info(
|
||||
pc['playlistItems'][i].get('streams', []),
|
||||
mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
|
||||
medias['media_list'][i])
|
||||
self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
|
||||
for i in range(len(medias['media_list']))]
|
||||
|
||||
return self.playlist_result(entries, channel_id, pc['title'])
|
||||
|
@ -94,7 +94,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:lynda\.com|educourse\.ga)/
|
||||
(?:
|
||||
(?:[^/]+/){2,3}(?P<course_id>\d+)|
|
||||
player/embed
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
@ -113,6 +121,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@ -244,8 +255,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
if subs:
|
||||
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
|
||||
fixed_subs = self._fix_subtitles(subs)
|
||||
if fixed_subs:
|
||||
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||
else:
|
||||
return {}
|
||||
|
||||
@ -256,7 +268,15 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -1,13 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE):
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
|
||||
return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
|
||||
|
214
youtube_dl/extractor/mediasite.py
Normal file
214
youtube_dl/extractor/mediasite.py
Normal file
@ -0,0 +1,214 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class MediasiteIE(InfoExtractor):
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||
'info_dict': {
|
||||
'id': '2db6c271681e4f199af3c60d1f82869b1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
|
||||
'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
|
||||
'timestamp': 1474268400.0,
|
||||
'upload_date': '20160919',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
|
||||
'info_dict': {
|
||||
'id': '90bb363295d945d6b548c867d01181361d',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150429',
|
||||
'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
|
||||
'timestamp': 1430311380.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
|
||||
'info_dict': {
|
||||
'id': '585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
|
||||
'md5': 'ef1fdded95bdf19b12c5999949419c92',
|
||||
'info_dict': {
|
||||
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||
'info_dict': {
|
||||
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120409',
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||
_STREAM_TYPES = {
|
||||
0: 'video1', # the main video
|
||||
2: 'slide',
|
||||
3: 'presentation',
|
||||
4: 'video2', # screencast?
|
||||
5: 'video3',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
unescapeHTML(mobj.group('url'))
|
||||
for mobj in re.finditer(
|
||||
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
resource_id = mobj.group('id')
|
||||
query = mobj.group('query')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
|
||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||
r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
|
||||
default='/Mediasite/PlayerService/PlayerService.svc/json'))
|
||||
|
||||
player_options = self._download_json(
|
||||
'%s/GetPlayerOptions' % service_path, resource_id,
|
||||
headers={
|
||||
'Content-type': 'application/json; charset=utf-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
data=json.dumps({
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': resource_id,
|
||||
'QueryString': query,
|
||||
'UrlReferrer': data.get('UrlReferrer', ''),
|
||||
'UseScreenReader': False,
|
||||
}
|
||||
}).encode('utf-8'))['d']
|
||||
|
||||
presentation = player_options['Presentation']
|
||||
title = presentation['Title']
|
||||
|
||||
if presentation is None:
|
||||
raise ExtractorError(
|
||||
'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
|
||||
expected=True)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for snum, Stream in enumerate(presentation['Streams']):
|
||||
stream_type = Stream.get('StreamType')
|
||||
if stream_type is None:
|
||||
continue
|
||||
|
||||
video_urls = Stream.get('VideoUrls')
|
||||
if not isinstance(video_urls, list):
|
||||
video_urls = []
|
||||
|
||||
stream_id = self._STREAM_TYPES.get(
|
||||
stream_type, 'type%u' % stream_type)
|
||||
|
||||
stream_formats = []
|
||||
for unum, VideoUrl in enumerate(video_urls):
|
||||
video_url = VideoUrl.get('Location')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
media_type = VideoUrl.get('MediaType')
|
||||
if media_type == 'SS':
|
||||
stream_formats.extend(self._extract_ism_formats(
|
||||
video_url, resource_id,
|
||||
ism_id='%s-%u.%u' % (stream_id, snum, unum),
|
||||
fatal=False))
|
||||
elif media_type == 'Dash':
|
||||
stream_formats.extend(self._extract_mpd_formats(
|
||||
video_url, resource_id,
|
||||
mpd_id='%s-%u.%u' % (stream_id, snum, unum),
|
||||
fatal=False))
|
||||
else:
|
||||
stream_formats.append({
|
||||
'format_id': '%s-%u.%u' % (stream_id, snum, unum),
|
||||
'url': video_url,
|
||||
'ext': mimetype2ext(VideoUrl.get('MimeType')),
|
||||
})
|
||||
|
||||
# TODO: if Stream['HasSlideContent']:
|
||||
# synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
|
||||
# from Stream['Slides']
|
||||
# this will require writing a custom downloader...
|
||||
|
||||
# disprefer 'secondary' streams
|
||||
if stream_type != 0:
|
||||
for fmt in stream_formats:
|
||||
fmt['preference'] = -1
|
||||
|
||||
thumbnail_url = Stream.get('ThumbnailUrl')
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'id': '%s-%u' % (stream_id, snum),
|
||||
'url': urljoin(redirect_url, thumbnail_url),
|
||||
'preference': -1 if stream_type != 0 else 0,
|
||||
})
|
||||
formats.extend(stream_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# XXX: Presentation['Presenters']
|
||||
# XXX: Presentation['Transcript']
|
||||
|
||||
return {
|
||||
'id': resource_id,
|
||||
'title': title,
|
||||
'description': presentation.get('Description'),
|
||||
'duration': float_or_none(presentation.get('Duration'), 1000),
|
||||
'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -42,31 +42,33 @@ class MiTeleBaseIE(InfoExtractor):
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
video_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -1,12 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
@ -79,7 +79,7 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
if encrypted_play_info is not None:
|
||||
# Decode
|
||||
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||
encrypted_play_info = compat_b64decode(encrypted_play_info)
|
||||
else:
|
||||
# New path
|
||||
full_info_json = self._parse_json(self._html_search_regex(
|
||||
@ -109,7 +109,7 @@ class MixcloudIE(InfoExtractor):
|
||||
kpa_target = encrypted_play_info
|
||||
else:
|
||||
kps = ['https://', 'http://']
|
||||
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||
kpa_target = compat_b64decode(info_json['streamInfo']['url'])
|
||||
for kp in kps:
|
||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||
for quote in ["'", '"']:
|
||||
@ -165,7 +165,7 @@ class MixcloudIE(InfoExtractor):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
|
@ -4,8 +4,11 @@ import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
@ -114,3 +117,86 @@ class MotherlessIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
|
||||
class MotherlessGroupIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/g/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'description': 'Hot and sexy scenes from "regular" movies... '
|
||||
'Beautiful actresses fully nude... A looot of '
|
||||
'skin! :)Enjoy!',
|
||||
},
|
||||
'playlist_mincount': 662,
|
||||
}, {
|
||||
'url': 'http://motherless.com/gv/sex_must_be_funny',
|
||||
'info_dict': {
|
||||
'id': 'sex_must_be_funny',
|
||||
'title': 'Sex must be funny',
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if MotherlessIE.suitable(url)
|
||||
else super(MotherlessGroupIE, cls).suitable(url))
|
||||
|
||||
def _extract_entries(self, webpage, base):
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
|
||||
webpage):
|
||||
video_url = compat_urlparse.urljoin(base, mobj.group('href'))
|
||||
if not MotherlessIE.suitable(video_url):
|
||||
continue
|
||||
video_id = MotherlessIE._match_id(video_url)
|
||||
title = mobj.group('title')
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
|
||||
video_title=title))
|
||||
# Alternative fallback
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(base, '/' + video_id),
|
||||
ie=MotherlessIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
|
||||
webpage = self._download_webpage(page_url, group_id)
|
||||
title = self._search_regex(
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
||||
webpage, 'page_count'), 'page_count')
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': playlist
|
||||
}
|
||||
|
@ -190,10 +190,12 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
ext = determine_ext(src, None)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
src, video_id, 'mp4', m3u8_id='hls',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
else:
|
||||
quality = f.get('quality')
|
||||
ff = {
|
||||
|
@ -19,11 +19,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||
'md5': '0b62089b479e06681abaaca9d204f152',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
@ -35,7 +35,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Video has been blocked',
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
@ -99,6 +98,9 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://mobile.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ok.ru/live/484531969818',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -184,6 +186,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
assert title
|
||||
if provider == 'LIVE_TV_APP':
|
||||
info['title'] = self._live_title(title)
|
||||
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||
|
||||
formats = [{
|
||||
@ -210,6 +216,20 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
if fmt_type:
|
||||
fmt['quality'] = quality(fmt_type)
|
||||
|
||||
# Live formats
|
||||
m3u8_url = metadata.get('hlsMasterPlaylistUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
|
||||
m3u8_id='hls', fatal=False))
|
||||
rtmp_url = metadata.get('rtmpUrl')
|
||||
if rtmp_url:
|
||||
formats.append({
|
||||
'url': rtmp_url,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
|
@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@ -12,7 +16,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||
if not url_data:
|
||||
continue
|
||||
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||
s_url = compat_b64decode(url_data).decode('utf-8')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
|
@ -112,6 +112,8 @@ class PhantomJSwrapper(object):
|
||||
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||
|
||||
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||
self._TMP_FILES = {}
|
||||
|
||||
self.exe = check_executable('phantomjs', ['-v'])
|
||||
if not self.exe:
|
||||
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||
@ -130,7 +132,6 @@ class PhantomJSwrapper(object):
|
||||
self.options = {
|
||||
'timeout': timeout,
|
||||
}
|
||||
self._TMP_FILES = {}
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmp.close()
|
||||
@ -140,7 +141,7 @@ class PhantomJSwrapper(object):
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
try:
|
||||
os.remove(self._TMP_FILES[name].name)
|
||||
except (IOError, OSError):
|
||||
except (IOError, OSError, KeyError):
|
||||
pass
|
||||
|
||||
def _save_cookies(self, url):
|
||||
@ -242,7 +243,7 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@ -283,12 +284,20 @@ class OpenloadIE(InfoExtractor):
|
||||
# for title and ext
|
||||
'url': 'https://openload.co/embed/Sxz5sADo82g/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
|
||||
# via https://openload.co/f/e-Ixz9ZR5L0/
|
||||
'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.openload.link/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.stream/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@ -301,20 +310,34 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://openload.co/embed/%s/' % video_id
|
||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
for path in ('embed', 'f'):
|
||||
page_url = url_pattern % path
|
||||
last = path == 'f'
|
||||
webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading %s webpage' % path,
|
||||
headers=headers, fatal=last)
|
||||
if not webpage:
|
||||
continue
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
if not last:
|
||||
continue
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
break
|
||||
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
|
||||
|
||||
decoded_id = get_element_by_id('streamurl', webpage)
|
||||
decoded_id = (get_element_by_id('streamurl', webpage) or
|
||||
get_element_by_id('streamuri', webpage) or
|
||||
get_element_by_id('streamurj', webpage))
|
||||
|
||||
if not decoded_id:
|
||||
raise ExtractorError('Can\'t find stream URL', video_id=video_id)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
@ -323,7 +346,7 @@ class OpenloadIE(InfoExtractor):
|
||||
'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
entries = self._parse_html5_media_entries(page_url, webpage, video_id)
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -18,7 +20,14 @@ from ..utils import (
|
||||
class PandoraTVIE(InfoExtractor):
|
||||
IE_NAME = 'pandora.tv'
|
||||
IE_DESC = '판도라TV'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format
|
||||
(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format
|
||||
m\.pandora\.tv/?\? # mobile
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||
'info_dict': {
|
||||
@ -53,14 +62,25 @@ class PandoraTVIE(InfoExtractor):
|
||||
# Test metadata only
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('prgid', [None])[0]
|
||||
user_id = qs.get('ch_userid', [None])[0]
|
||||
if any(not f for f in (video_id, user_id,)):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if not user_id or not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('prgid', [None])[0]
|
||||
user_id = qs.get('ch_userid', [None])[0]
|
||||
if any(not f for f in (video_id, user_id,)):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
data = self._download_json(
|
||||
'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s'
|
||||
|
@ -24,7 +24,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
'id': 'A150730_150323_hodinovy-manzel_kuko',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vyžeňte vosy a sršně ze zahrady',
|
||||
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
|
||||
'description': 'md5:4436e61b7df227a093778efb7e373571',
|
||||
'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 279,
|
||||
'timestamp': 1438732860,
|
||||
@ -36,9 +36,19 @@ class PlaytvakIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'A150624_164934_planespotting_cat',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
|
||||
'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, { # another live stream, this one without Misc.videoFLV
|
||||
'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
|
||||
'info_dict': {
|
||||
'id': 'A151218_145728_hlavni-nadrazi_plap',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@ -95,7 +105,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info_url = self._html_search_regex(
|
||||
r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(info_url)
|
||||
|
||||
@ -160,7 +170,7 @@ class PlaytvakIE(InfoExtractor):
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
'description', webpage, 'description', default=None)
|
||||
timestamp = None
|
||||
duration = None
|
||||
if not is_live:
|
||||
|
@ -171,12 +171,12 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
for num, current in enumerate(subs):
|
||||
current = subs[num]
|
||||
start, text = (
|
||||
float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
|
||||
float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
|
||||
dict_get(current, TEXT_KEYS))
|
||||
if start is None or text is None:
|
||||
continue
|
||||
end = duration if num == len(subs) - 1 else float_or_none(
|
||||
dict_get(subs[num + 1], TIME_OFFSET_KEYS))
|
||||
dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
|
||||
if end is None:
|
||||
continue
|
||||
srt += os.linesep.join(
|
||||
|
@ -344,6 +344,8 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
|
@ -5,8 +5,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class RestudyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.restudy.dk/video/play/id/1637',
|
||||
'info_dict': {
|
||||
'id': '1637',
|
||||
@ -18,7 +18,10 @@ class RestudyIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -29,7 +32,7 @@ class RestudyIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
formats = self._extract_smil_formats(
|
||||
'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
|
||||
'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -1,44 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
|
||||
'md5': 'd25945f5df41cdca2d2587165ac28720',
|
||||
'info_dict': {
|
||||
'id': '857645',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||
'description': 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id').split('-')[0]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if mobj.group('type') == 'news':
|
||||
video_id = self._search_regex(
|
||||
r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/
|
||||
embed_iframe/[0-9]+/video/([0-9]+)/''',
|
||||
webpage, 'real video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'addthis:description="([^"]+)"',
|
||||
webpage, 'description', fatal=False)
|
||||
final_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4' % video_id
|
||||
thumbnail_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg' % video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
}
|
@ -1,12 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
@ -142,11 +142,11 @@ class RTL2YouIE(RTL2YouBaseIE):
|
||||
stream_data = self._download_json(
|
||||
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
|
||||
|
||||
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
stream_url = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(data)),
|
||||
bytes_to_intlist(compat_b64decode(data)),
|
||||
bytes_to_intlist(self._AES_KEY),
|
||||
bytes_to_intlist(base64.b64decode(iv))
|
||||
bytes_to_intlist(compat_b64decode(iv))
|
||||
))
|
||||
if b'rtl2_you_video_not_found' in stream_url:
|
||||
raise ExtractorError('video not found', expected=True)
|
||||
|
@ -7,6 +7,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -21,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = base64.b64decode(png.encode('utf-8'))
|
||||
encrypted_data = compat_b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index - 4:]
|
||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
||||
@ -31,6 +32,9 @@ def _decrypt_url(png):
|
||||
hash_index = data.index('#')
|
||||
alphabet_data = data[:hash_index]
|
||||
url_data = data[hash_index + 1:]
|
||||
if url_data[0] == 'H' and url_data[3] == '%':
|
||||
# remove useless HQ%% at the start
|
||||
url_data = url_data[4:]
|
||||
|
||||
alphabet = []
|
||||
e = 0
|
||||
|
47
youtube_dl/extractor/rtvs.py
Normal file
47
youtube_dl/extractor/rtvs.py
Normal file
@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RTVSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# radio archive
|
||||
'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
|
||||
'md5': '134d5d6debdeddf8a5d761cbc9edacb8',
|
||||
'info_dict': {
|
||||
'id': '414872',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ostrov pokladov 1 časť.mp3'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# tv archive
|
||||
'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
|
||||
'md5': '85e2c55cf988403b70cac24f5c086dc6',
|
||||
'info_dict': {
|
||||
'id': '63118',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amaro Džives - Náš deň',
|
||||
'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'playlist url', group='url')
|
||||
|
||||
data = self._download_json(
|
||||
playlist_url, video_id, 'Downloading playlist')[0]
|
||||
return self._parse_jwplayer_data(data, video_id=video_id)
|
@ -1,65 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class SandiaIE(InfoExtractor):
|
||||
IE_DESC = 'Sandia National Laboratories'
|
||||
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
|
||||
_TEST = {
|
||||
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||
'info_dict': {
|
||||
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120409',
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
presentation_data = self._download_json(
|
||||
'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
video_id, data=json.dumps({
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': video_id,
|
||||
'QueryString': '',
|
||||
}
|
||||
}), headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})['d']['Presentation']
|
||||
|
||||
title = presentation_data['Title']
|
||||
|
||||
formats = []
|
||||
for stream in presentation_data.get('Streams', []):
|
||||
for fd in stream.get('VideoUrls', []):
|
||||
formats.append({
|
||||
'format_id': fd['MediaType'],
|
||||
'format_note': fd['MimeType'].partition('/')[2],
|
||||
'ext': mimetype2ext(fd['MimeType']),
|
||||
'url': fd['Location'],
|
||||
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': presentation_data.get('Description'),
|
||||
'formats': formats,
|
||||
'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
|
||||
'duration': int_or_none(presentation_data.get('Duration'), 1000),
|
||||
}
|
67
youtube_dl/extractor/sevenplus.py
Normal file
67
youtube_dl/extractor/sevenplus.py
Normal file
@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import update_url_query
|
||||
|
||||
|
||||
class SevenPlusIE(BrightcoveNewIE):
|
||||
IE_NAME = '7plus'
|
||||
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
|
||||
'info_dict': {
|
||||
'id': 'BEAT-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
|
||||
'description': 'md5:37718bea20a8eedaca7f7361af566131',
|
||||
'uploader_id': '5303576322001',
|
||||
'upload_date': '20171031',
|
||||
'timestamp': 1509440068,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
|
||||
for source in media.get('sources', {}):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
source['src'] = update_url_query(src, {'rule': ''})
|
||||
|
||||
info = self._parse_brightcove_metadata(media, episode_id)
|
||||
|
||||
content = self._download_json(
|
||||
'https://component-cdn.swm.digital/content/' + path,
|
||||
episode_id, headers={
|
||||
'market-id': 4,
|
||||
}, fatal=False) or {}
|
||||
for item in content.get('items', {}):
|
||||
if item.get('componentData', {}).get('componentType') == 'infoPanel':
|
||||
for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
|
||||
value = item.get(src_key)
|
||||
if value:
|
||||
info[dst_key] = value
|
||||
|
||||
return info
|
@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -22,8 +21,8 @@ class SharedBaseIE(InfoExtractor):
|
||||
|
||||
video_url = self._extract_video_url(webpage, video_id, url)
|
||||
|
||||
title = base64.b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||
title = compat_b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
|
||||
@ -92,5 +91,4 @@ class VivoIE(SharedBaseIE):
|
||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream', group='url'),
|
||||
video_id,
|
||||
transform_source=lambda x: base64.b64decode(
|
||||
x.encode('ascii')).decode('utf-8'))[0]
|
||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
||||
|
@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||
'md5': '868309628ba00fd488cf516a113fd717',
|
||||
'info_dict': {
|
||||
@ -15,11 +17,17 @@ class SlutloadIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.*?\.jpg'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# mobile site
|
||||
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
|
||||
webpage = self._download_webpage(desktop_url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
|
@ -136,9 +136,28 @@ class SoundcloudIE(InfoExtractor):
|
||||
'license': 'all-rights-reserved',
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
{
|
||||
'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
|
||||
'md5': '59c7872bc44e5d99b7211891664760c2',
|
||||
'info_dict': {
|
||||
'id': '309699954',
|
||||
'ext': 'mp3',
|
||||
'title': 'Sideways (Prod. Mad Real)',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'uploader': 'garyvee',
|
||||
'upload_date': '20170226',
|
||||
'duration': 207,
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'license': 'all-rights-reserved',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
|
||||
_CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@staticmethod
|
||||
@ -160,7 +179,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
name = full_title or track_id
|
||||
if quiet:
|
||||
self.report_extraction(name)
|
||||
thumbnail = info.get('artwork_url')
|
||||
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
||||
if isinstance(thumbnail, compat_str):
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
ext = 'mp3'
|
||||
|
@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
class SouthParkIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'southpark.cc.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
@ -20,6 +20,9 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
'timestamp': 1112760000,
|
||||
'upload_date': '20050406',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@ -41,7 +44,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
|
||||
class SouthParkDeIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -70,12 +73,15 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SouthParkNlIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -90,7 +96,7 @@ class SouthParkNlIE(SouthParkIE):
|
||||
|
||||
class SouthParkDkIE(SouthParkIE):
|
||||
IE_NAME = 'southparkstudios.dk'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -100,4 +106,10 @@ class SouthParkDkIE(SouthParkIE):
|
||||
'description': 'Butters is convinced he\'s living in a virtual reality.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxEmbedIE
|
||||
from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
@ -51,6 +54,10 @@ class SpiegelIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# nexx video
|
||||
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -61,6 +68,14 @@ class SpiegelIE(InfoExtractor):
|
||||
if SpiegeltvIE.suitable(handle.geturl()):
|
||||
return self.url_result(handle.geturl(), 'Spiegeltv')
|
||||
|
||||
nexx_id = self._search_regex(
|
||||
r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None)
|
||||
if nexx_id:
|
||||
domain_id = NexxIE._extract_domain_id(webpage) or '748'
|
||||
return self.url_result(
|
||||
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
|
||||
video_id=nexx_id)
|
||||
|
||||
video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
|
||||
|
||||
title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
|
||||
|
@ -1,38 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .wdr import WDRBaseIE
|
||||
from ..utils import get_element_by_attribute
|
||||
|
||||
|
||||
class SportschauIE(WDRBaseIE):
|
||||
IE_NAME = 'Sportschau'
|
||||
_VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P<id>[^/#?]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1140188',
|
||||
'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen',
|
||||
'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.',
|
||||
'upload_date': '20160615',
|
||||
},
|
||||
'skip': 'Geo-restricted to Germany',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_attribute('class', 'headline', webpage)
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
info = self._extract_wdr_video(webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return info
|
125
youtube_dl/extractor/springboardplatform.py
Normal file
125
youtube_dl/extractor/springboardplatform.py
Normal file
@ -0,0 +1,125 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class SpringboardPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
cms\.springboardplatform\.com/
|
||||
(?:
|
||||
(?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
|
||||
xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
|
||||
'md5': '5c3cb7b5c55740d482561099e920f192',
|
||||
'info_dict': {
|
||||
'id': '981017',
|
||||
'ext': 'mp4',
|
||||
'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
|
||||
'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1409132328,
|
||||
'upload_date': '20140827',
|
||||
'duration': 193,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
index = mobj.group('index') or mobj.group('index_2')
|
||||
|
||||
video = self._download_xml(
|
||||
'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s'
|
||||
% (index, video_id), video_id)
|
||||
|
||||
item = xpath_element(video, './/item', 'item', fatal=True)
|
||||
|
||||
content = xpath_element(
|
||||
item, './{http://search.yahoo.com/mrss/}content', 'content',
|
||||
fatal=True)
|
||||
title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
|
||||
|
||||
video_url = content.attrib['url']
|
||||
|
||||
if 'error_video.mp4' in video_url:
|
||||
raise ExtractorError(
|
||||
'Video %s no longer exists' % video_id, expected=True)
|
||||
|
||||
duration = int_or_none(content.get('duration'))
|
||||
tbr = int_or_none(content.get('bitrate'))
|
||||
filesize = int_or_none(content.get('fileSize'))
|
||||
width = int_or_none(content.get('width'))
|
||||
height = int_or_none(content.get('height'))
|
||||
|
||||
description = unescapeHTML(xpath_text(
|
||||
item, './description', 'description'))
|
||||
thumbnail = xpath_attr(
|
||||
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
|
||||
'thumbnail')
|
||||
|
||||
timestamp = unified_timestamp(xpath_text(
|
||||
item, './{http://cms.springboardplatform.com/namespaces.html}created',
|
||||
'timestamp'))
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'tbr': tbr,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
m3u8_format = formats[0].copy()
|
||||
m3u8_format.update({
|
||||
'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
formats.append(m3u8_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -58,7 +58,7 @@ class TBSIE(TurnerBaseIE):
|
||||
continue
|
||||
if stream_data.get('playlistProtection') == 'spe':
|
||||
m3u8_url = self._add_akamai_spe_token(
|
||||
'http://www.%s.com/service/token_spe' % site,
|
||||
'http://token.vgtf.net/token/token_spe',
|
||||
m3u8_url, media_id, {
|
||||
'url': url,
|
||||
'site_name': site[:3].upper(),
|
||||
|
@ -1,18 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
from ..compat import compat_ord
|
||||
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
@ -97,7 +99,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
for i in range(len(cur_fragments)):
|
||||
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
|
||||
try:
|
||||
raw_data = base64.b64decode(cur_sequence)
|
||||
raw_data = compat_b64decode(cur_sequence)
|
||||
if compat_ord(raw_data[0]) == compat_ord('{'):
|
||||
return json.loads(raw_data.decode('utf-8'))
|
||||
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
|
||||
|
@ -1,106 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class TheSixtyOneIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
|
||||
(?:.*?/)*
|
||||
(?:
|
||||
s|
|
||||
song/comments/list|
|
||||
song
|
||||
)/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$'''
|
||||
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
|
||||
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
|
||||
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
|
||||
'md5': '821cc43b0530d3222e3e2b70bb4622ea',
|
||||
'info_dict': {
|
||||
'id': 'SrE3zD7s1jt',
|
||||
'ext': 'mp3',
|
||||
'title': 'CASIO - Unicorn War Mixtape',
|
||||
'thumbnail': 're:^https?://.*_desktop$',
|
||||
'upload_date': '20071217',
|
||||
'duration': 3208,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_DECODE_MAP = {
|
||||
'x': 'a',
|
||||
'm': 'b',
|
||||
'w': 'c',
|
||||
'q': 'd',
|
||||
'n': 'e',
|
||||
'p': 'f',
|
||||
'a': '0',
|
||||
'h': '1',
|
||||
'e': '2',
|
||||
'u': '3',
|
||||
's': '4',
|
||||
'i': '5',
|
||||
'o': '6',
|
||||
'y': '7',
|
||||
'r': '8',
|
||||
'c': '9'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
self._SONG_URL_TEMPLATE.format(song_id), song_id)
|
||||
|
||||
song_data = self._parse_json(self._search_regex(
|
||||
r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id)
|
||||
|
||||
if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
|
||||
song_data['audio_server'] = 's3.amazonaws.com'
|
||||
else:
|
||||
song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
|
||||
|
||||
keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
|
||||
url = self._SONG_FILE_URL_TEMPLATE.format(
|
||||
"".join(reversed(keys)), **song_data)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
'ext': 'mp3',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': '{artist:} - {name:}'.format(**song_data),
|
||||
'formats': formats,
|
||||
'comment_count': song_data.get('comments_count'),
|
||||
'duration': song_data.get('play_time'),
|
||||
'like_count': song_data.get('score'),
|
||||
'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
|
||||
'upload_date': unified_strdate(song_data.get('publish_date')),
|
||||
}
|
@ -1,9 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class TutvIE(InfoExtractor):
|
||||
@ -26,7 +27,7 @@ class TutvIE(InfoExtractor):
|
||||
|
||||
data_content = self._download_webpage(
|
||||
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
|
||||
video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8')
|
||||
video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
|
||||
|
||||
return {
|
||||
'id': internal_id,
|
||||
|
@ -273,6 +273,8 @@ class TVPlayIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
if smuggled_data.get('skip_rtmp'):
|
||||
continue
|
||||
m = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
@ -434,6 +436,10 @@ class ViafreeIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
'mtg:%s' % video_id,
|
||||
{'geo_countries': [
|
||||
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
|
||||
{
|
||||
'geo_countries': [
|
||||
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
|
||||
# rtmp host mtgfs.fplive.net for viafree is unresolvable
|
||||
'skip_rtmp': True,
|
||||
}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
77
youtube_dl/extractor/twentythreevideo.py
Normal file
77
youtube_dl/extractor/twentythreevideo.py
Normal file
@ -0,0 +1,77 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TwentyThreeVideoIE(InfoExtractor):
|
||||
IE_NAME = '23video'
|
||||
_VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
||||
_TEST = {
|
||||
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
||||
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
||||
'info_dict': {
|
||||
'id': '20448876',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video Marketing Minute: Personalized Video',
|
||||
'timestamp': 1513855354,
|
||||
'upload_date': '20171221',
|
||||
'uploader_id': '12258964',
|
||||
'uploader': 'Rasmus Bysted',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
||||
base_url = 'https://video.%s' % domain
|
||||
photo_data = self._download_json(
|
||||
base_url + '/api/photo/list?' + query, photo_id, query={
|
||||
'format': 'json',
|
||||
}, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo']
|
||||
title = photo_data['title']
|
||||
|
||||
formats = []
|
||||
|
||||
audio_path = photo_data.get('audio_download')
|
||||
if audio_path:
|
||||
formats.append({
|
||||
'format_id': 'audio',
|
||||
'url': base_url + audio_path,
|
||||
'filesize': int_or_none(photo_data.get('audio_size')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
def add_common_info_to_list(l, template, id_field, id_value):
|
||||
f_base = template % id_value
|
||||
f_path = photo_data.get(f_base + 'download')
|
||||
if not f_path:
|
||||
return
|
||||
l.append({
|
||||
id_field: id_value,
|
||||
'url': base_url + f_path,
|
||||
'width': int_or_none(photo_data.get(f_base + 'width')),
|
||||
'height': int_or_none(photo_data.get(f_base + 'height')),
|
||||
'filesize': int_or_none(photo_data.get(f_base + 'size')),
|
||||
})
|
||||
|
||||
for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'):
|
||||
add_common_info_to_list(formats, 'video_%s_', 'format_id', f)
|
||||
|
||||
thumbnails = []
|
||||
for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'):
|
||||
add_common_info_to_list(thumbnails, '%s_', 'id', t)
|
||||
|
||||
return {
|
||||
'id': photo_id,
|
||||
'title': title,
|
||||
'timestamp': int_or_none(photo_data.get('creation_date_epoch')),
|
||||
'duration': int_or_none(photo_data.get('video_length')),
|
||||
'view_count': int_or_none(photo_data.get('view_count')),
|
||||
'comment_count': int_or_none(photo_data.get('number_of_comments')),
|
||||
'uploader_id': photo_data.get('user_id'),
|
||||
'uploader': photo_data.get('display_name'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -85,10 +85,15 @@ class TwitchBaseIE(InfoExtractor):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None)
|
||||
fail(response['message'])
|
||||
fail(response.get('message') or response['errors'][0])
|
||||
raise
|
||||
|
||||
redirect_url = urljoin(post_url, response['redirect'])
|
||||
if 'Authenticated successfully' in response.get('message', ''):
|
||||
return None, None
|
||||
|
||||
redirect_url = urljoin(
|
||||
post_url,
|
||||
response.get('redirect') or response['redirect_path'])
|
||||
return self._download_webpage_handle(
|
||||
redirect_url, None, 'Downloading login redirect page',
|
||||
headers=headers)
|
||||
@ -106,6 +111,10 @@ class TwitchBaseIE(InfoExtractor):
|
||||
'password': password,
|
||||
})
|
||||
|
||||
# Successful login
|
||||
if not redirect_page:
|
||||
return
|
||||
|
||||
if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
|
||||
# TODO: Add mechanism to request an SMS or phone call
|
||||
tfa_token = self._get_tfa_info('two-factor authentication token')
|
||||
@ -358,9 +367,16 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
break
|
||||
offset += limit
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry) for entry in orderedSet(entries)],
|
||||
[self._make_url_result(entry) for entry in orderedSet(entries)],
|
||||
channel_id, channel_name)
|
||||
|
||||
def _make_url_result(self, url):
|
||||
try:
|
||||
video_id = 'v%s' % TwitchVodIE._match_id(url)
|
||||
return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
|
||||
except AssertionError:
|
||||
return self.url_result(url)
|
||||
|
||||
def _extract_playlist_page(self, response):
|
||||
videos = response.get('videos')
|
||||
return [video['url'] for video in videos] if videos else []
|
||||
|
55
youtube_dl/extractor/ufctv.py
Normal file
55
youtube_dl/extractor/ufctv.py
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class UFCTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
|
||||
'info_dict': {
|
||||
'id': '34167',
|
||||
'ext': 'mp4',
|
||||
'title': 'UFC 219 Countdown: Full Episode',
|
||||
'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
|
||||
'timestamp': 1513962360,
|
||||
'upload_date': '20171222',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
video_data = self._download_json(url, display_id, query={
|
||||
'format': 'json',
|
||||
})
|
||||
video_id = str(video_data['id'])
|
||||
title = video_data['name']
|
||||
m3u8_url = self._download_json(
|
||||
'https://www.ufc.tv/service/publishpoint', video_id, query={
|
||||
'type': 'video',
|
||||
'format': 'json',
|
||||
'id': video_id,
|
||||
}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||
})['path']
|
||||
m3u8_url = m3u8_url.replace('_iphone.', '.')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': parse_duration(video_data.get('runtime')),
|
||||
'timestamp': parse_iso8601(video_data.get('releaseDate')),
|
||||
'formats': formats,
|
||||
}
|
103
youtube_dl/extractor/umg.py
Normal file
103
youtube_dl/extractor/umg.py
Normal file
@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class UMGDeIE(InfoExtractor):
|
||||
IE_NAME = 'umg:de'
|
||||
IE_DESC = 'Universal Music Deutschland'
|
||||
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
|
||||
'md5': 'ebd90f48c80dcc82f77251eb1902634f',
|
||||
'info_dict': {
|
||||
'id': '457803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jedes Wort ist Gold wert',
|
||||
'timestamp': 1513591800,
|
||||
'upload_date': '20171218',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://api.universal-music.de/graphql',
|
||||
video_id, query={
|
||||
'query': '''{
|
||||
universalMusic(channel:16) {
|
||||
video(id:%s) {
|
||||
headline
|
||||
formats {
|
||||
formatId
|
||||
url
|
||||
type
|
||||
width
|
||||
height
|
||||
mimeType
|
||||
fileSize
|
||||
}
|
||||
duration
|
||||
createdDate
|
||||
}
|
||||
}
|
||||
}''' % video_id})['data']['universalMusic']['video']
|
||||
|
||||
title = video_data['headline']
|
||||
hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
|
||||
def add_m3u8_format(format_id):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
hls_url_template % format_id, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal='False')
|
||||
if m3u8_formats and m3u8_formats[0].get('height'):
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
for f in video_data.get('formats', []):
|
||||
f_url = f.get('url')
|
||||
mime_type = f.get('mimeType')
|
||||
if not f_url or mime_type == 'application/mxf':
|
||||
continue
|
||||
fmt = {
|
||||
'url': f_url,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': parse_filesize(f.get('fileSize')),
|
||||
}
|
||||
f_type = f.get('type')
|
||||
if f_type == 'Image':
|
||||
thumbnails.append(fmt)
|
||||
elif f_type == 'Video':
|
||||
format_id = f.get('formatId')
|
||||
if format_id:
|
||||
fmt['format_id'] = format_id
|
||||
if mime_type == 'video/mp4':
|
||||
add_m3u8_format(format_id)
|
||||
urlh = self._request_webpage(f_url, video_id, fatal=False)
|
||||
if urlh:
|
||||
first_byte = urlh.read(1)
|
||||
if first_byte not in (b'F', b'\x00'):
|
||||
continue
|
||||
formats.append(fmt)
|
||||
if not formats:
|
||||
for format_id in (867, 836, 940):
|
||||
add_m3u8_format(format_id)
|
||||
self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -468,11 +468,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
request = sanitized_Request(url, headers=headers)
|
||||
try:
|
||||
webpage, urlh = self._download_webpage_handle(request, video_id)
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
# Some URLs redirect to ondemand can't be extracted with
|
||||
# this extractor right away thus should be passed through
|
||||
# ondemand extractor (e.g. https://vimeo.com/73445910)
|
||||
if VimeoOndemandIE.suitable(urlh.geturl()):
|
||||
return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
|
||||
if VimeoOndemandIE.suitable(redirect_url):
|
||||
return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
errmsg = ee.cause.read()
|
||||
@ -541,15 +542,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||
if '_video_password_verified' in data:
|
||||
raise ExtractorError('video password verification failed!')
|
||||
self._verify_video_password(url, video_id, webpage)
|
||||
self._verify_video_password(redirect_url, video_id, webpage)
|
||||
return self._real_extract(
|
||||
smuggle_url(url, {'_video_password_verified': 'verified'}))
|
||||
smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract info section',
|
||||
cause=e)
|
||||
else:
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(url, video_id)
|
||||
config = self._verify_player_video_password(redirect_url, video_id)
|
||||
|
||||
def is_rented():
|
||||
if '>You rented this title.<' in webpage:
|
||||
|
@ -318,9 +318,14 @@ class VKIE(VKBaseIE):
|
||||
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
|
||||
expected=True)
|
||||
|
||||
ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
|
||||
|
||||
ERRORS = {
|
||||
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
|
||||
'Video %s has been removed from public access due to rightholder complaint.',
|
||||
ERROR_COPYRIGHT,
|
||||
|
||||
r'>The video .*? was removed from public access by request of the copyright holder.<':
|
||||
ERROR_COPYRIGHT,
|
||||
|
||||
r'<!>Please log in or <':
|
||||
'Video %s is only available for registered users, '
|
||||
|
@ -16,7 +16,7 @@ class VootIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
|
||||
'info_dict': {
|
||||
'id': '441353',
|
||||
'id': '0_8ledb18o',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
|
||||
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
|
||||
@ -59,9 +59,10 @@ class VootIE(InfoExtractor):
|
||||
|
||||
media = media_info['assets']
|
||||
|
||||
entry_id = media['EntryId']
|
||||
title = media['MediaName']
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + media['EntryId'],
|
||||
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
|
||||
video_id, 'mp4', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -83,7 +84,8 @@ class VootIE(InfoExtractor):
|
||||
episode_number = int_or_none(value)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'extractor_key': 'Kaltura',
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
|
@ -4,49 +4,50 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
class WDRBaseIE(InfoExtractor):
|
||||
def _extract_wdr_video(self, webpage, display_id):
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
||||
# to the page in a multiline "videoLink"-tag)
|
||||
json_metadata = self._html_search_regex(
|
||||
r'''(?sx)class=
|
||||
(?:
|
||||
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
||||
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
||||
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
''',
|
||||
webpage, 'media link', default=None, group='data')
|
||||
class WDRIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TEST = {
|
||||
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1557833',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
||||
'upload_date': '20180112',
|
||||
},
|
||||
}
|
||||
|
||||
if not json_metadata:
|
||||
return
|
||||
|
||||
media_link_obj = self._parse_json(json_metadata, display_id,
|
||||
transform_source=js_to_json)
|
||||
jsonp_url = media_link_obj['mediaObj']['url']
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
jsonp_url, display_id, transform_source=strip_jsonp)
|
||||
url, video_id, transform_source=strip_jsonp)
|
||||
|
||||
metadata_tracker_data = metadata['trackerData']
|
||||
metadata_media_resource = metadata['mediaResource']
|
||||
is_live = metadata.get('mediaType') == 'live'
|
||||
|
||||
tracker_data = metadata['trackerData']
|
||||
media_resource = metadata['mediaResource']
|
||||
|
||||
formats = []
|
||||
|
||||
# check if the metadata contains a direct URL to a file
|
||||
for kind, media_resource in metadata_media_resource.items():
|
||||
for kind, media_resource in media_resource.items():
|
||||
if kind not in ('dflt', 'alt'):
|
||||
continue
|
||||
|
||||
@ -57,13 +58,13 @@ class WDRBaseIE(InfoExtractor):
|
||||
ext = determine_ext(medium_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
medium_url, display_id, 'mp4', 'm3u8_native',
|
||||
medium_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls'))
|
||||
elif ext == 'f4m':
|
||||
manifest_url = update_url_query(
|
||||
medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url, display_id, f4m_id='hds', fatal=False))
|
||||
manifest_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
medium_url, 'stream', fatal=False))
|
||||
@ -73,7 +74,7 @@ class WDRBaseIE(InfoExtractor):
|
||||
}
|
||||
if ext == 'unknown_video':
|
||||
urlh = self._request_webpage(
|
||||
medium_url, display_id, note='Determining extension')
|
||||
medium_url, video_id, note='Determining extension')
|
||||
ext = urlhandle_detect_ext(urlh)
|
||||
a_format['ext'] = ext
|
||||
formats.append(a_format)
|
||||
@ -81,30 +82,30 @@ class WDRBaseIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
caption_url = metadata_media_resource.get('captionURL')
|
||||
caption_url = media_resource.get('captionURL')
|
||||
if caption_url:
|
||||
subtitles['de'] = [{
|
||||
'url': caption_url,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
|
||||
title = metadata_tracker_data['trackerClipTitle']
|
||||
title = tracker_data['trackerClipTitle']
|
||||
|
||||
return {
|
||||
'id': metadata_tracker_data.get('trackerClipId', display_id),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
|
||||
'id': tracker_data.get('trackerClipId', video_id),
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'alt_title': tracker_data.get('trackerClipSubcategory'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')),
|
||||
'upload_date': unified_strdate(tracker_data.get('trackerClipAirTime')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class WDRIE(WDRBaseIE):
|
||||
class WDRPageIE(InfoExtractor):
|
||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||
_PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
|
||||
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -124,6 +125,7 @@ class WDRIE(WDRBaseIE):
|
||||
'ext': 'ttml',
|
||||
}]},
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
|
||||
@ -139,19 +141,17 @@ class WDRIE(WDRBaseIE):
|
||||
'is_live': False,
|
||||
'subtitles': {}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-103364',
|
||||
'id': 'mdb-1406149',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'index',
|
||||
'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'alt_title': 'WDR Fernsehen Live',
|
||||
'upload_date': None,
|
||||
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
|
||||
'upload_date': '20150101',
|
||||
'is_live': True,
|
||||
'subtitles': {}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@ -159,19 +159,18 @@ class WDRIE(WDRBaseIE):
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||
'playlist_mincount': 8,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'aktuelle-stunde/aktuelle-stunde-120',
|
||||
'id': 'aktuelle-stunde-120',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1323501',
|
||||
'id': 'mdb-1552552',
|
||||
'ext': 'mp4',
|
||||
'upload_date': 're:^[0-9]{8}$',
|
||||
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
||||
'description': 'Die Seite mit der Maus -',
|
||||
},
|
||||
'skip': 'The id changes from week to week because of the new episode'
|
||||
},
|
||||
@ -183,7 +182,6 @@ class WDRIE(WDRBaseIE):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130919',
|
||||
'title': 'Sachgeschichte - Achterbahn ',
|
||||
'description': 'Die Seite mit der Maus -',
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -191,52 +189,114 @@ class WDRIE(WDRBaseIE):
|
||||
# Live stream, MD5 unstable
|
||||
'info_dict': {
|
||||
'id': 'mdb-869971',
|
||||
'ext': 'flv',
|
||||
'title': 'COSMO Livestream',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20160101',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1556012',
|
||||
'ext': 'mp4',
|
||||
'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"',
|
||||
'upload_date': '20180111',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url_type = mobj.group('type')
|
||||
page_url = mobj.group('page_url')
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info_dict = self._extract_wdr_video(webpage, display_id)
|
||||
entries = []
|
||||
|
||||
if not info_dict:
|
||||
# Article with several videos
|
||||
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
||||
# to the page in a multiline "videoLink"-tag)
|
||||
for mobj in re.finditer(
|
||||
r'''(?sx)class=
|
||||
(?:
|
||||
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
||||
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
||||
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
''', webpage):
|
||||
media_link_obj = self._parse_json(
|
||||
mobj.group('data'), display_id, transform_source=js_to_json,
|
||||
fatal=False)
|
||||
if not media_link_obj:
|
||||
continue
|
||||
jsonp_url = try_get(
|
||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||
if jsonp_url:
|
||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||
|
||||
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(page_url + href[0], 'WDR')
|
||||
for href in re.findall(
|
||||
r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
|
||||
webpage)
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, mobj.group('href')),
|
||||
ie=WDRPageIE.ie_key())
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
|
||||
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
|
||||
]
|
||||
|
||||
if entries: # Playlist page
|
||||
return self.playlist_result(entries, playlist_id=display_id)
|
||||
return self.playlist_result(entries, playlist_id=display_id)
|
||||
|
||||
raise ExtractorError('No downloadable streams found', expected=True)
|
||||
|
||||
is_live = url_type == 'live'
|
||||
class WDRElefantIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
|
||||
'info_dict': {
|
||||
'title': 'Folge Oster-Spezial 2015',
|
||||
'id': 'mdb-1088195',
|
||||
'ext': 'mp4',
|
||||
'age_limit': None,
|
||||
'upload_date': '20150406'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
if is_live:
|
||||
info_dict.update({
|
||||
'title': self._live_title(info_dict['title']),
|
||||
'upload_date': None,
|
||||
})
|
||||
elif 'upload_date' not in info_dict:
|
||||
info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date'))
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
info_dict.update({
|
||||
'description': self._html_search_meta('Description', webpage),
|
||||
'is_live': is_live,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
# Table of Contents seems to always be at this address, so fetch it directly.
|
||||
# The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
|
||||
table_of_contents = self._download_json(
|
||||
'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5',
|
||||
display_id)
|
||||
if display_id not in table_of_contents:
|
||||
raise ExtractorError(
|
||||
'No entry in site\'s table of contents for this URL. '
|
||||
'Is the fragment part of the URL (after the #) correct?',
|
||||
expected=True)
|
||||
xml_metadata_path = table_of_contents[display_id]['xmlPath']
|
||||
xml_metadata = self._download_xml(
|
||||
'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path,
|
||||
display_id)
|
||||
zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
|
||||
if zmdb_url_element is None:
|
||||
raise ExtractorError(
|
||||
'%s is not a video' % display_id, expected=True)
|
||||
return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key())
|
||||
|
||||
|
||||
class WDRMobileIE(InfoExtractor):
|
||||
|
140
youtube_dl/extractor/weibo.py
Normal file
140
youtube_dl/extractor/weibo.py
Normal file
@ -0,0 +1,140 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class WeiboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
|
||||
'info_dict': {
|
||||
'id': 'Fp6RGfbff',
|
||||
'ext': 'mp4',
|
||||
'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
visitor_url = urlh.geturl()
|
||||
|
||||
if 'passport.weibo.com' in visitor_url:
|
||||
# first visit
|
||||
visitor_data = self._download_json(
|
||||
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
||||
note='Generating first-visit data',
|
||||
transform_source=strip_jsonp,
|
||||
headers={'Referer': visitor_url},
|
||||
data=urlencode_postdata({
|
||||
'cb': 'gen_callback',
|
||||
'fp': json.dumps({
|
||||
'os': '2',
|
||||
'browser': 'Gecko57,0,0,0',
|
||||
'fonts': 'undefined',
|
||||
'screenInfo': '1440*900*24',
|
||||
'plugins': '',
|
||||
}),
|
||||
}))
|
||||
|
||||
tid = visitor_data['data']['tid']
|
||||
cnfd = '%03d' % visitor_data['data']['confidence']
|
||||
|
||||
self._download_webpage(
|
||||
'https://passport.weibo.com/visitor/visitor', video_id,
|
||||
note='Running first-visit callback',
|
||||
query={
|
||||
'a': 'incarnate',
|
||||
't': tid,
|
||||
'w': 2,
|
||||
'c': cnfd,
|
||||
'cb': 'cross_domain',
|
||||
'from': 'weibo',
|
||||
'_rand': random.random(),
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Revisiting webpage')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
|
||||
video_formats = compat_parse_qs(self._search_regex(
|
||||
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
|
||||
|
||||
formats = []
|
||||
supported_resolutions = (480, 720)
|
||||
for res in supported_resolutions:
|
||||
vid_urls = video_formats.get(compat_str(res))
|
||||
if not vid_urls or not isinstance(vid_urls, list):
|
||||
continue
|
||||
|
||||
vid_url = vid_urls[0]
|
||||
formats.append({
|
||||
'url': vid_url,
|
||||
'height': res,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._og_search_property(
|
||||
'nick-name', webpage, 'uploader', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class WeiboMobileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
|
||||
_TEST = {
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
|
||||
'info_dict': {
|
||||
'id': '4189191225395228',
|
||||
'ext': 'mp4',
|
||||
'title': '午睡当然是要甜甜蜜蜜的啦',
|
||||
'uploader': '柴犬柴犬'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage = self._download_webpage(url, video_id, note='visit the page')
|
||||
|
||||
weibo_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
|
||||
webpage, 'js_code', flags=re.DOTALL),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
status_data = weibo_info.get('status', {})
|
||||
page_info = status_data.get('page_info')
|
||||
title = status_data['status_title']
|
||||
uploader = status_data.get('user', {}).get('screen_name')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'url': page_info['media_info']['stream_url']
|
||||
}
|
233
youtube_dl/extractor/ximalaya.py
Normal file
233
youtube_dl/extractor/ximalaya.py
Normal file
@ -0,0 +1,233 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class XimalayaBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['CN']
|
||||
|
||||
|
||||
class XimalayaIE(XimalayaBaseIE):
|
||||
IE_NAME = 'ximalaya'
|
||||
IE_DESC = '喜马拉雅FM'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/sound/(?P<id>[0-9]+)'
|
||||
_USER_URL_FORMAT = '%s://www.ximalaya.com/zhubo/%i/'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ximalaya.com/61425525/sound/47740352/',
|
||||
'info_dict': {
|
||||
'id': '47740352',
|
||||
'ext': 'm4a',
|
||||
'uploader': '小彬彬爱听书',
|
||||
'uploader_id': 61425525,
|
||||
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
|
||||
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
|
||||
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
|
||||
'thumbnails': [
|
||||
{
|
||||
'name': 'cover_url',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
{
|
||||
'name': 'cover_url_142',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
'width': 180,
|
||||
'height': 180
|
||||
}
|
||||
],
|
||||
'categories': ['renwen', '人文'],
|
||||
'duration': 93,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
|
||||
'info_dict': {
|
||||
'id': '47740352',
|
||||
'ext': 'm4a',
|
||||
'uploader': '小彬彬爱听书',
|
||||
'uploader_id': 61425525,
|
||||
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
|
||||
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
|
||||
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
|
||||
'thumbnails': [
|
||||
{
|
||||
'name': 'cover_url',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
{
|
||||
'name': 'cover_url_142',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
'width': 180,
|
||||
'height': 180
|
||||
}
|
||||
],
|
||||
'categories': ['renwen', '人文'],
|
||||
'duration': 93,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ximalaya.com/11045267/sound/15705996/',
|
||||
'info_dict': {
|
||||
'id': '15705996',
|
||||
'ext': 'm4a',
|
||||
'uploader': '李延隆老师',
|
||||
'uploader_id': 11045267,
|
||||
'uploader_url': 'https://www.ximalaya.com/zhubo/11045267/',
|
||||
'title': 'Lesson 1 Excuse me!',
|
||||
'description': "contains:Listen to the tape then answer\xa0this question. Whose handbag is it?\n"
|
||||
"听录音,然后回答问题,这是谁的手袋?",
|
||||
'thumbnails': [
|
||||
{
|
||||
'name': 'cover_url',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
{
|
||||
'name': 'cover_url_142',
|
||||
'url': r're:^https?://.*\.jpg$',
|
||||
'width': 180,
|
||||
'height': 180
|
||||
}
|
||||
],
|
||||
'categories': ['train', '外语'],
|
||||
'duration': 40,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
is_m = 'm.ximalaya' in url
|
||||
scheme = 'https' if url.startswith('https') else 'http'
|
||||
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id,
|
||||
note='Download sound page for %s' % audio_id,
|
||||
errnote='Unable to get sound page')
|
||||
|
||||
audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id)
|
||||
audio_info = self._download_json(audio_info_file, audio_id,
|
||||
'Downloading info json %s' % audio_info_file,
|
||||
'Unable to download info file')
|
||||
|
||||
formats = []
|
||||
for bps, k in (('24k', 'play_path_32'), ('64k', 'play_path_64')):
|
||||
if audio_info.get(k):
|
||||
formats.append({
|
||||
'format_id': bps,
|
||||
'url': audio_info[k],
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for k in audio_info.keys():
|
||||
# cover pics kyes like: cover_url', 'cover_url_142'
|
||||
if k.startswith('cover_url'):
|
||||
thumbnail = {'name': k, 'url': audio_info[k]}
|
||||
if k == 'cover_url_142':
|
||||
thumbnail['width'] = 180
|
||||
thumbnail['height'] = 180
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
audio_uploader_id = audio_info.get('uid')
|
||||
|
||||
if is_m:
|
||||
audio_description = self._html_search_regex(r'(?s)<section\s+class=["\']content[^>]+>(.+?)</section>',
|
||||
webpage, 'audio_description', fatal=False)
|
||||
else:
|
||||
audio_description = self._html_search_regex(r'(?s)<div\s+class=["\']rich_intro[^>]*>(.+?</article>)',
|
||||
webpage, 'audio_description', fatal=False)
|
||||
|
||||
if not audio_description:
|
||||
audio_description_file = '%s://www.ximalaya.com/sounds/%s/rich_intro' % (scheme, audio_id)
|
||||
audio_description = self._download_webpage(audio_description_file, audio_id,
|
||||
note='Downloading description file %s' % audio_description_file,
|
||||
errnote='Unable to download descrip file',
|
||||
fatal=False)
|
||||
audio_description = audio_description.strip() if audio_description else None
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'uploader': audio_info.get('nickname'),
|
||||
'uploader_id': audio_uploader_id,
|
||||
'uploader_url': self._USER_URL_FORMAT % (scheme, audio_uploader_id) if audio_uploader_id else None,
|
||||
'title': audio_info['title'],
|
||||
'thumbnails': thumbnails,
|
||||
'description': audio_description,
|
||||
'categories': list(filter(None, (audio_info.get('category_name'), audio_info.get('category_title')))),
|
||||
'duration': audio_info.get('duration'),
|
||||
'view_count': audio_info.get('play_count'),
|
||||
'like_count': audio_info.get('favorites_count'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class XimalayaAlbumIE(XimalayaBaseIE):
|
||||
IE_NAME = 'ximalaya:album'
|
||||
IE_DESC = '喜马拉雅FM 专辑'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/album/(?P<id>[0-9]+)'
|
||||
_TEMPLATE_URL = '%s://www.ximalaya.com/%s/album/%s/'
|
||||
_BASE_URL_TEMPL = '%s://www.ximalaya.com%s'
|
||||
_LIST_VIDEO_RE = r'<a[^>]+?href="(?P<url>/%s/sound/(?P<id>\d+)/?)"[^>]+?title="(?P<title>[^>]+)">'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ximalaya.com/61425525/album/5534601/',
|
||||
'info_dict': {
|
||||
'title': '唐诗三百首(含赏析)',
|
||||
'id': '5534601',
|
||||
},
|
||||
'playlist_count': 312,
|
||||
}, {
|
||||
'url': 'http://m.ximalaya.com/61425525/album/5534601',
|
||||
'info_dict': {
|
||||
'title': '唐诗三百首(含赏析)',
|
||||
'id': '5534601',
|
||||
},
|
||||
'playlist_count': 312,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
self.scheme = scheme = 'https' if url.startswith('https') else 'http'
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uid, playlist_id = mobj.group('uid'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(self._TEMPLATE_URL % (scheme, uid, playlist_id), playlist_id,
|
||||
note='Download album page for %s' % playlist_id,
|
||||
errnote='Unable to get album info')
|
||||
|
||||
title = self._html_search_regex(r'detailContent_title[^>]*><h1(?:[^>]+)?>([^<]+)</h1>',
|
||||
webpage, 'title', fatal=False)
|
||||
|
||||
return self.playlist_result(self._entries(webpage, playlist_id, uid), playlist_id, title)
|
||||
|
||||
def _entries(self, page, playlist_id, uid):
|
||||
html = page
|
||||
for page_num in itertools.count(1):
|
||||
for entry in self._process_page(html, uid):
|
||||
yield entry
|
||||
|
||||
next_url = self._search_regex(r'<a\s+href=(["\'])(?P<more>[\S]+)\1[^>]+rel=(["\'])next\3',
|
||||
html, 'list_next_url', default=None, group='more')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
next_full_url = self._BASE_URL_TEMPL % (self.scheme, next_url)
|
||||
html = self._download_webpage(next_full_url, playlist_id)
|
||||
|
||||
def _process_page(self, html, uid):
|
||||
find_from = html.index('album_soundlist')
|
||||
for mobj in re.finditer(self._LIST_VIDEO_RE % uid, html[find_from:]):
|
||||
yield self.url_result(self._BASE_URL_TEMPL % (self.scheme, mobj.group('url')),
|
||||
XimalayaIE.ie_key(),
|
||||
mobj.group('id'),
|
||||
mobj.group('title'))
|
@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0501',
|
||||
'ccode': '0507',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
@ -241,13 +241,23 @@ class YoukuShowIE(InfoExtractor):
|
||||
# Ongoing playlist. The initial page is the last one
|
||||
'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No data-id value.
|
||||
'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Wrong number of reload_id.
|
||||
'url': 'http://list.youku.com/show/id_z20eb4acaf5c211e3b2ad.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_entries(self, playlist_data_url, show_id, note, query):
|
||||
query['callback'] = 'cb'
|
||||
playlist_data = self._download_json(
|
||||
playlist_data_url, show_id, query=query, note=note,
|
||||
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
|
||||
transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
|
||||
if playlist_data is None:
|
||||
return [None, None]
|
||||
drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
|
||||
get_element_by_class('p-drama-half-row', playlist_data))
|
||||
if drama_list is None:
|
||||
@ -276,9 +286,9 @@ class YoukuShowIE(InfoExtractor):
|
||||
r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
|
||||
# The first reload_id has the same items as first_page
|
||||
reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
|
||||
entries.extend(initial_entries)
|
||||
for idx, reload_id in enumerate(reload_ids):
|
||||
if reload_id == first_page_reload_id:
|
||||
entries.extend(initial_entries)
|
||||
continue
|
||||
_, new_entries = self._extract_entries(
|
||||
'http://list.youku.com/show/episode', show_id,
|
||||
@ -287,8 +297,8 @@ class YoukuShowIE(InfoExtractor):
|
||||
'id': page_config['showid'],
|
||||
'stage': reload_id,
|
||||
})
|
||||
entries.extend(new_entries)
|
||||
|
||||
if new_entries is not None:
|
||||
entries.extend(new_entries)
|
||||
desc = self._html_search_meta('description', webpage, fatal=False)
|
||||
playlist_title = desc.split(',')[0] if desc else None
|
||||
detail_li = get_element_by_class('p-intro', webpage)
|
||||
|
@ -1810,7 +1810,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': video_info['conn'][0],
|
||||
'player_url': player_url,
|
||||
}]
|
||||
elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
|
||||
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
|
||||
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
||||
if 'rtmpe%3Dyes' in encoded_url_map:
|
||||
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
|
||||
@ -2530,10 +2530,11 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||
webpage = self._download_webpage(url, channel_id, fatal=False)
|
||||
if webpage:
|
||||
page_type = self._og_search_property(
|
||||
'type', webpage, 'page type', default=None)
|
||||
'type', webpage, 'page type', default='')
|
||||
video_id = self._html_search_meta(
|
||||
'videoId', webpage, 'video id', default=None)
|
||||
if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
|
||||
if page_type.startswith('video') and video_id and re.match(
|
||||
r'^[0-9A-Za-z_-]{11}$', video_id):
|
||||
return self.url_result(video_id, YoutubeIE.ie_key())
|
||||
return self.url_result(base_url)
|
||||
|
||||
|
@ -39,6 +39,7 @@ from .compat import (
|
||||
compat_HTMLParser,
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_ctypes_WINFUNCTYPE,
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_html_entities,
|
||||
@ -1330,24 +1331,24 @@ def _windows_write_string(s, out):
|
||||
if fileno not in WIN_OUTPUT_IDS:
|
||||
return False
|
||||
|
||||
GetStdHandle = ctypes.WINFUNCTYPE(
|
||||
GetStdHandle = compat_ctypes_WINFUNCTYPE(
|
||||
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
|
||||
(b'GetStdHandle', ctypes.windll.kernel32))
|
||||
('GetStdHandle', ctypes.windll.kernel32))
|
||||
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
|
||||
|
||||
WriteConsoleW = ctypes.WINFUNCTYPE(
|
||||
WriteConsoleW = compat_ctypes_WINFUNCTYPE(
|
||||
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
|
||||
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
|
||||
ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
|
||||
ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
|
||||
written = ctypes.wintypes.DWORD(0)
|
||||
|
||||
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
|
||||
GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
|
||||
FILE_TYPE_CHAR = 0x0002
|
||||
FILE_TYPE_REMOTE = 0x8000
|
||||
GetConsoleMode = ctypes.WINFUNCTYPE(
|
||||
GetConsoleMode = compat_ctypes_WINFUNCTYPE(
|
||||
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
|
||||
ctypes.POINTER(ctypes.wintypes.DWORD))(
|
||||
(b'GetConsoleMode', ctypes.windll.kernel32))
|
||||
('GetConsoleMode', ctypes.windll.kernel32))
|
||||
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
|
||||
|
||||
def not_a_console(handle):
|
||||
@ -2266,7 +2267,7 @@ def js_to_json(code):
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
||||
{comment}|,(?={skip}[\]}}])|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*|
|
||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||
[0-9]+(?={skip}:)
|
||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.12.14'
|
||||
__version__ = '2018.01.21'
|
||||
|
Loading…
Reference in New Issue
Block a user