mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-09 19:12:51 +08:00
Merge remote-tracking branch 'rg3/master'
This commit is contained in:
commit
ab5454db1c
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.17**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.03.10
|
||||
[debug] youtube-dl version 2017.04.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
3
AUTHORS
3
AUTHORS
@ -209,3 +209,6 @@ Olivier Bilodeau
|
||||
Lars Vierbergen
|
||||
Juanjo Benages
|
||||
Xiao Di Guan
|
||||
Thomas Winant
|
||||
Daniel Twardowski
|
||||
Jeremie Jarosh
|
||||
|
204
ChangeLog
204
ChangeLog
@ -1,8 +1,210 @@
|
||||
version <unreleased>
|
||||
version 2017.04.17
|
||||
|
||||
Extractors
|
||||
* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
|
||||
add support for channel and channelList embeds
|
||||
* [generic] Extract multiple Limelight embeds (#12761)
|
||||
+ [itv] Extract series metadata
|
||||
* [itv] Fix RTMP formats downloading (#12759)
|
||||
* [itv] Use native HLS downloader by default
|
||||
+ [go90] Extract subtitles (#12752)
|
||||
+ [go90] Extract series metadata (#12752)
|
||||
|
||||
|
||||
version 2017.04.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Apply expand_path after output template substitution
|
||||
+ [YoutubeDL] Propagate overridden meta fields to extraction results of type
|
||||
url (#11163)
|
||||
|
||||
Extractors
|
||||
+ [generic] Extract RSS entries as url_transparent (#11163)
|
||||
+ [streamango] Add support for streamango.com (#12643)
|
||||
+ [wsj:article] Add support for articles (#12558)
|
||||
* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
|
||||
URLs (#9163, #12005, #12178, #12480)
|
||||
+ [udemy] Add support for react rendition (#12744)
|
||||
|
||||
|
||||
version 2017.04.15
|
||||
|
||||
Extractors
|
||||
* [youku] Fix fileid extraction (#12741, #12743)
|
||||
|
||||
|
||||
version 2017.04.14
|
||||
|
||||
Core
|
||||
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
|
||||
+ [adobepass] Improve Comcast and Verison login code (#10803)
|
||||
+ [adobepass] Add support for Verizon (#10803)
|
||||
|
||||
Extractors
|
||||
+ [aenetworks] Add support for specials (#12723)
|
||||
+ [hbo] Extract HLS formats
|
||||
+ [go90] Add support for go90.com (#10127)
|
||||
+ [tv2hu] Add support for tv2.hu (#10509)
|
||||
+ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
|
||||
* [youtube] Improve HLS formats extraction
|
||||
* [afreecatv] Fix extraction for videos with different key layout (#12718)
|
||||
- [youtube] Remove explicit preference for audio-only and video-only formats in
|
||||
order not to break sorting when new formats appear
|
||||
* [canalplus] Bypass geo restriction
|
||||
|
||||
|
||||
version 2017.04.11
|
||||
|
||||
Extractors
|
||||
* [afreecatv] Fix extraction (#12706)
|
||||
+ [generic] Add support for <object> YouTube embeds (#12637)
|
||||
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
|
||||
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
|
||||
+ [bbccouk] Add support for https protocol in media selector (#12701)
|
||||
* [curiositystream] Fix extraction (#12638)
|
||||
* [adn] Update subtitle decryption key
|
||||
* [chaturbate] Fix extraction (#12665, #12688, #12690)
|
||||
|
||||
|
||||
version 2017.04.09
|
||||
|
||||
Extractors
|
||||
+ [medici] Add support for medici.tv (#3406)
|
||||
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
||||
+ [npo:live] Add support for default URL (#12555)
|
||||
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
||||
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
|
||||
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
||||
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
||||
+ [kaltura] Add support for iframe embeds (#12679)
|
||||
* [airmozilla] Fix extraction (#12670)
|
||||
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
|
||||
+ [raiplay] Extract subtitles
|
||||
+ [xfileshare] Add support for vidlo.us (#12660)
|
||||
+ [xfileshare] Add support for vidbom.com (#12661)
|
||||
+ [aenetworks] Add more video URL regular expressions (#12657)
|
||||
+ [odnoklassniki] Fix format sorting for 1080p quality
|
||||
+ [rtl2] Add support for you.rtl2.de (#10257)
|
||||
+ [vshare] Add support for vshare.io (#12278)
|
||||
|
||||
|
||||
version 2017.04.03
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add censorship check for TransTelekom ISP
|
||||
* [extractor/common] Move censorship checks to a separate method
|
||||
|
||||
Extractors
|
||||
+ [discoveryvr] Add support for discoveryvr.com (#12578)
|
||||
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
|
||||
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
|
||||
|
||||
|
||||
version 2017.04.02
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Return early when extraction of url_transparent fails
|
||||
|
||||
Extractors
|
||||
* [rai] Fix and improve extraction (#11790)
|
||||
+ [vrv] Add support for series pages
|
||||
* [limelight] Improve extraction for audio only formats
|
||||
* [funimation] Fix extraction (#10696, #11773)
|
||||
+ [xfileshare] Add support for vidabc.com (#12589)
|
||||
+ [xfileshare] Improve extraction and extract hls formats
|
||||
+ [crunchyroll] Pass geo verifcation proxy
|
||||
+ [cwtv] Extract ISM formats
|
||||
+ [tvplay] Bypass geo restriction
|
||||
+ [vrv] Add support for vrv.co
|
||||
+ [packtpub] Add support for packtpub.com (#12610)
|
||||
+ [generic] Pass base_url to _parse_jwplayer_data
|
||||
+ [adn] Add support for animedigitalnetwork.fr (#4866)
|
||||
+ [allocine] Extract more metadata
|
||||
* [allocine] Fix extraction (#12592)
|
||||
* [openload] Fix extraction
|
||||
|
||||
|
||||
version 2017.03.26
|
||||
|
||||
Core
|
||||
* Don't raise an error if JWPlayer config data is not a Javascript object
|
||||
literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
|
||||
* Expand environment variables for options representing paths (#12556)
|
||||
+ [utils] Introduce expand_path
|
||||
* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
|
||||
|
||||
Extractors
|
||||
* [afreecatv] Fix extraction (#12179)
|
||||
+ [atvat] Add support for atv.at (#5325)
|
||||
+ [fox] Add metadata extraction (#12391)
|
||||
+ [atresplayer] Extract DASH formats
|
||||
+ [atresplayer] Extract HD manifest (#12548)
|
||||
* [atresplayer] Fix login error detection (#12548)
|
||||
* [franceculture] Fix extraction (#12547)
|
||||
* [youtube] Improve URL regular expression (#12538)
|
||||
* [generic] Do not follow redirects to the same URL
|
||||
|
||||
|
||||
version 2017.03.24
|
||||
|
||||
Extractors
|
||||
- [9c9media] Remove mp4 URL extraction request
|
||||
+ [bellmedia] Add support for etalk.ca and space.ca (#12447)
|
||||
* [channel9] Fix extraction (#11323)
|
||||
* [cloudy] Fix extraction (#12525)
|
||||
+ [hbo] Add support for free episode URLs and new formats extraction (#12519)
|
||||
* [condenast] Fix extraction and style (#12526)
|
||||
* [viu] Relax URL regular expression (#12529)
|
||||
|
||||
|
||||
version 2017.03.22
|
||||
|
||||
Extractors
|
||||
- [pluralsight] Omit module title from video title (#12506)
|
||||
* [pornhub] Decode obfuscated video URL (#12470, #12515)
|
||||
* [senateisvp] Allow https URL scheme for embeds (#12512)
|
||||
|
||||
|
||||
version 2017.03.20
|
||||
|
||||
Core
|
||||
+ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
|
||||
output template
|
||||
+ [adobepass] Detect and output error on authz token extraction (#12472)
|
||||
|
||||
Extractors
|
||||
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
||||
+ [toongoggles] Add support for toongoggles.com (#12171)
|
||||
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
||||
+ [discoverynetworks] Add support for more domains and bypass geo restiction
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
version 2017.03.16
|
||||
|
||||
Core
|
||||
+ [postprocessor/ffmpeg] Add support for flac
|
||||
+ [extractor/common] Extract SMIL formats from jwplayer
|
||||
|
||||
Extractors
|
||||
+ [generic] Add forgotten return for jwplayer formats
|
||||
* [redbulltv] Improve extraction
|
||||
|
||||
|
||||
version 2017.03.15
|
||||
|
||||
Core
|
||||
* Fix missing subtitles if --add-metadata is used (#12423)
|
||||
|
||||
Extractors
|
||||
* [facebook] Make title optional (#12443)
|
||||
+ [mitele] Add support for ooyala videos (#12430)
|
||||
* [openload] Fix extraction (#12435, #12446)
|
||||
* [streamable] Update API URL (#12433)
|
||||
+ [crunchyroll] Extract season name (#12428)
|
||||
* [discoverygo] Bypass geo restriction
|
||||
+ [discoverygo:playlist] Add support for playlists (#12424)
|
||||
|
||||
|
||||
version 2017.03.10
|
||||
|
||||
|
13
README.md
13
README.md
@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
is 10), or "infinite" (DASH and hlsnative
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
is 10), or "infinite" (DASH, hlsnative and
|
||||
ISM)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
|
||||
and ISM)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
@ -375,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
(requires ffmpeg or avconv and ffprobe or
|
||||
avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"vorbis", "mp3", "m4a", "opus", or "wav";
|
||||
"best" by default; No effect without -x
|
||||
"flac", "mp3", "m4a", "opus", "vorbis", or
|
||||
"wav"; "best" by default; No effect without
|
||||
-x
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||
a value between 0 (better) and 9 (worse)
|
||||
for VBR or a specific bitrate like 128K
|
||||
|
@ -28,6 +28,7 @@
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
- **AddAnime**
|
||||
- **ADN**: Anime Digital Network
|
||||
- **AdobeTV**
|
||||
- **AdobeTVChannel**
|
||||
- **AdobeTVShow**
|
||||
@ -67,6 +68,7 @@
|
||||
- **arte.tv:playlist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **ATVAt**
|
||||
- **AudiMedia**
|
||||
- **AudioBoom**
|
||||
- **audiomack**
|
||||
@ -108,6 +110,7 @@
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BostonGlobe**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **BravoTV**
|
||||
@ -124,7 +127,7 @@
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **Canvas**: canvas.be and een.be
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
@ -142,6 +145,7 @@
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
- **CeskaTelevize**
|
||||
- **CeskaTelevizePorady**
|
||||
- **channel9**: Channel 9
|
||||
- **CharlieRose**
|
||||
- **Chaturbate**
|
||||
@ -208,6 +212,9 @@
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
@ -301,6 +308,7 @@
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **Go90**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
@ -309,8 +317,8 @@
|
||||
- **GPUTechConf**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HBO**
|
||||
- **HBOEpisode**
|
||||
- **hbo**
|
||||
- **hbo:episode**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
- **HellPorno**
|
||||
@ -424,6 +432,8 @@
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **Medialaan**
|
||||
- **Medici**
|
||||
- **Meipai**: 美拍
|
||||
- **MelonVOD**
|
||||
- **META**
|
||||
@ -567,6 +577,8 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@ -624,7 +636,7 @@
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
- **RaiTV**
|
||||
- **RaiPlay**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@ -649,7 +661,9 @@
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **RTL2**
|
||||
- **rtl2**
|
||||
- **rtl2:you**
|
||||
- **rtl2:you:series**
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
@ -731,6 +745,7 @@
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **Streamango**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@ -771,17 +786,18 @@
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **ToonGoggles**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
@ -804,9 +820,11 @@
|
||||
- **Tutv**
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
@ -883,7 +901,7 @@
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier**: vier.be and vijf.be
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
@ -920,7 +938,10 @@
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **Vrak**
|
||||
- **VRT**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
- **VShare**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
@ -946,9 +967,10 @@
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
|
@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, expect_dict
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
def test_extract_jwplayer_data_realworld(self):
|
||||
# from http://www.suffolk.edu/sjc/
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._extract_jwplayer_data(r'''
|
||||
<script type='text/javascript'>
|
||||
jwplayer('my-video').setup({
|
||||
file: 'rtmp://192.138.214.154/live/sjclive',
|
||||
fallback: 'true',
|
||||
width: '95%',
|
||||
aspectratio: '16:9',
|
||||
primary: 'flash',
|
||||
mediaid:'XEgvuql4'
|
||||
});
|
||||
</script>
|
||||
''', None, require_title=False),
|
||||
{
|
||||
'id': 'XEgvuql4',
|
||||
'formats': [{
|
||||
'url': 'rtmp://192.138.214.154/live/sjclive',
|
||||
'ext': 'flv'
|
||||
}]
|
||||
})
|
||||
|
||||
# from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._extract_jwplayer_data(r'''
|
||||
<script type="text/javascript">
|
||||
jwplayer("mediaplayer").setup({
|
||||
'videoid': "7564",
|
||||
'width': "100%",
|
||||
'aspectratio': "16:9",
|
||||
'stretching': "exactfit",
|
||||
'autostart': 'false',
|
||||
'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
|
||||
'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
|
||||
'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
|
||||
'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
|
||||
'logo.hide': true,
|
||||
'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
|
||||
'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
|
||||
'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
|
||||
'controlbar': 'bottom',
|
||||
'modes': [
|
||||
{type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
|
||||
],
|
||||
'provider': 'http'
|
||||
});
|
||||
//noinspection JSAnnotator
|
||||
invideo.setup({
|
||||
adsUrl: "/banner-iframe/?zoneId=32",
|
||||
adsUrl2: "",
|
||||
autostart: false
|
||||
});
|
||||
</script>
|
||||
''', 'dummy', require_title=False),
|
||||
{
|
||||
'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
|
||||
'formats': [{
|
||||
'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
|
||||
'ext': 'flv'
|
||||
}]
|
||||
})
|
||||
|
||||
# from http://www.indiedb.com/games/king-machine/videos
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._extract_jwplayer_data(r'''
|
||||
<script>
|
||||
jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
|
||||
videoAnalytics("play");
|
||||
}).once("complete", function(event) {
|
||||
videoAnalytics("completed");
|
||||
});
|
||||
</script>
|
||||
''', 'dummy'),
|
||||
{
|
||||
'title': 'king machine trailer 1',
|
||||
'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
|
||||
'formats': [{
|
||||
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
|
||||
'height': 360,
|
||||
'ext': 'mp4'
|
||||
}, {
|
||||
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
|
||||
'height': 720,
|
||||
'ext': 'mp4'
|
||||
}]
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -755,6 +755,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'_type': 'url_transparent',
|
||||
'url': 'foo2:',
|
||||
'ie_key': 'Foo2',
|
||||
'title': 'foo1 title'
|
||||
}
|
||||
|
||||
class Foo2IE(InfoExtractor):
|
||||
@ -771,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
_VALID_URL = r'foo3:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return _make_result([{'url': TEST_URL}])
|
||||
return _make_result([{'url': TEST_URL}], title='foo3 title')
|
||||
|
||||
ydl.add_info_extractor(Foo1IE(ydl))
|
||||
ydl.add_info_extractor(Foo2IE(ydl))
|
||||
@ -779,6 +780,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
ydl.extract_info('foo1:')
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'foo1 title')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -27,11 +27,11 @@ from youtube_dl.compat import (
|
||||
class TestCompat(unittest.TestCase):
|
||||
def test_compat_getenv(self):
|
||||
test_str = 'тест'
|
||||
compat_setenv('YOUTUBE-DL-TEST', test_str)
|
||||
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
||||
compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
|
||||
self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
|
||||
|
||||
def test_compat_setenv(self):
|
||||
test_var = 'YOUTUBE-DL-TEST'
|
||||
test_var = 'YOUTUBE_DL_COMPAT_SETENV'
|
||||
test_str = 'тест'
|
||||
compat_setenv(test_var, test_str)
|
||||
compat_getenv(test_var)
|
||||
|
@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase):
|
||||
|
||||
maxDiff = None
|
||||
|
||||
def __str__(self):
|
||||
"""Identify each test with the `add_ie` attribute, if available."""
|
||||
|
||||
def strclass(cls):
|
||||
"""From 2.7's unittest; 2.6 had _strclass so we can't import it."""
|
||||
return '%s.%s' % (cls.__module__, cls.__name__)
|
||||
|
||||
add_ie = getattr(self, self._testMethodName).add_ie
|
||||
return '%s (%s)%s:' % (self._testMethodName,
|
||||
strclass(self.__class__),
|
||||
' [%s]' % add_ie if add_ie else '')
|
||||
|
||||
def setUp(self):
|
||||
self.defs = defs
|
||||
|
||||
@ -139,7 +151,7 @@ def generator(test_case, tname):
|
||||
try_num = 1
|
||||
while True:
|
||||
try:
|
||||
# We're not using .download here sine that is just a shim
|
||||
# We're not using .download here since that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(
|
||||
@ -187,7 +199,16 @@ def generator(test_case, tname):
|
||||
self.assertEqual(
|
||||
test_case['playlist_duration_sum'], got_duration)
|
||||
|
||||
for tc in test_cases:
|
||||
# Generalize both playlists and single videos to unified format for
|
||||
# simplicity
|
||||
if 'entries' not in res_dict:
|
||||
res_dict['entries'] = [res_dict]
|
||||
|
||||
for tc_num, tc in enumerate(test_cases):
|
||||
tc_res_dict = res_dict['entries'][tc_num]
|
||||
# First, check test cases' data against extracted data alone
|
||||
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
|
||||
# Now, check downloaded file consistency
|
||||
tc_filename = get_tc_filename(tc)
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||
@ -205,13 +226,14 @@ def generator(test_case, tname):
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
# Finally, check test cases' data again but this time against
|
||||
# extracted data from info JSON file written during processing
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
self.assertTrue(
|
||||
os.path.exists(info_json_fn),
|
||||
'Missing info file %s' % info_json_fn)
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
|
||||
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||
finally:
|
||||
try_rm_tcs_files()
|
||||
@ -233,6 +255,8 @@ for n, test_case in enumerate(defs):
|
||||
i += 1
|
||||
test_method = generator(test_case, tname)
|
||||
test_method.__name__ = str(tname)
|
||||
ie_list = test_case.get('add_ie')
|
||||
test_method.add_ie = ie_list and ','.join(ie_list)
|
||||
setattr(TestDownload, test_method.__name__, test_method)
|
||||
del test_method
|
||||
|
||||
|
@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
||||
NPOIE,
|
||||
ComedyCentralIE,
|
||||
NRKTVIE,
|
||||
RaiTVIE,
|
||||
RaiPlayIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||
|
||||
|
||||
class TestRaiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiTVIE
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiPlayIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
@ -56,6 +56,7 @@ from youtube_dl.utils import (
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
expand_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
remove_start,
|
||||
@ -95,6 +96,8 @@ from youtube_dl.utils import (
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_getenv,
|
||||
compat_setenv,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
@ -214,6 +217,18 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_expand_path(self):
|
||||
def env(var):
|
||||
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
||||
|
||||
compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
|
||||
self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
|
||||
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
|
||||
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
|
||||
self.assertEqual(
|
||||
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
||||
'%s/expanded' % compat_getenv('HOME'))
|
||||
|
||||
def test_prepend_extension(self):
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||
|
@ -29,7 +29,6 @@ import random
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
@ -54,6 +53,7 @@ from .utils import (
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
expand_path,
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
@ -672,8 +672,7 @@ class YoutubeDL(object):
|
||||
FORMAT_RE.format(numeric_field),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
filename = expand_path(outtmpl % template_dict)
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
@ -837,6 +836,12 @@ class YoutubeDL(object):
|
||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info, download=False, process=False)
|
||||
|
||||
# extract_info may return None when ignoreerrors is enabled and
|
||||
# extraction failed with an error, don't crash and return early
|
||||
# in this case
|
||||
if not info:
|
||||
return info
|
||||
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url', 'ie_key'):
|
||||
@ -845,11 +850,18 @@ class YoutubeDL(object):
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
# Extracted info may not be a video result (i.e.
|
||||
# info.get('_type', 'video') != video) but rather an url or
|
||||
# url_transparent. In such cases outer metadata (from ie_result)
|
||||
# should be propagated to inner one (info). For this to happen
|
||||
# _type of info should be overridden with url_transparent. This
|
||||
# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
|
||||
if new_result.get('_type') == 'url':
|
||||
new_result['_type'] = 'url_transparent'
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type == 'playlist' or result_type == 'multi_video':
|
||||
elif result_type in ('playlist', 'multi_video'):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
@ -1872,6 +1884,7 @@ class YoutubeDL(object):
|
||||
"""Download a given list of URLs."""
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
if (len(url_list) > 1 and
|
||||
outtmpl != '-' and
|
||||
'%' not in outtmpl and
|
||||
self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(outtmpl)
|
||||
@ -2169,7 +2182,7 @@ class YoutubeDL(object):
|
||||
if opts_cookiefile is None:
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = compat_expanduser(opts_cookiefile)
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
|
@ -16,7 +16,6 @@ from .options import (
|
||||
parseOpts,
|
||||
)
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getpass,
|
||||
compat_shlex_split,
|
||||
workaround_optparse_bug9161,
|
||||
@ -26,6 +25,7 @@ from .utils import (
|
||||
decodeOption,
|
||||
DEFAULT_OUTTMPL,
|
||||
DownloadError,
|
||||
expand_path,
|
||||
match_filter_func,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
@ -88,7 +88,7 @@ def _real_main(argv=None):
|
||||
batchfd = sys.stdin
|
||||
else:
|
||||
batchfd = io.open(
|
||||
compat_expanduser(opts.batchfile),
|
||||
expand_path(opts.batchfile),
|
||||
'r', encoding='utf-8', errors='ignore')
|
||||
batch_urls = read_batch_urls(batchfd)
|
||||
if opts.verbose:
|
||||
@ -196,7 +196,7 @@ def _real_main(argv=None):
|
||||
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||
raise ValueError('Playlist end must be greater than playlist start')
|
||||
if opts.extractaudio:
|
||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||
if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||
parser.error('invalid audio format specified')
|
||||
if opts.audioquality:
|
||||
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
||||
@ -238,7 +238,7 @@ def _real_main(argv=None):
|
||||
|
||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
@ -449,7 +449,7 @@ def _real_main(argv=None):
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
|
||||
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||
else:
|
||||
retcode = ydl.download(all_urls)
|
||||
except MaxDownloadsReached:
|
||||
|
@ -8,8 +8,11 @@ import re
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .compat import compat_expanduser, compat_getenv
|
||||
from .utils import write_json_file
|
||||
from .compat import compat_getenv
|
||||
from .utils import (
|
||||
expand_path,
|
||||
write_json_file,
|
||||
)
|
||||
|
||||
|
||||
class Cache(object):
|
||||
@ -21,7 +24,7 @@ class Cache(object):
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dl')
|
||||
return compat_expanduser(res)
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
||||
|
@ -2692,7 +2692,7 @@ else:
|
||||
userhome = pwent.pw_dir
|
||||
userhome = userhome.rstrip('/')
|
||||
return (userhome + path[i:]) or '/'
|
||||
elif compat_os_name == 'nt' or compat_os_name == 'ce':
|
||||
elif compat_os_name in ('nt', 'ce'):
|
||||
def compat_expanduser(path):
|
||||
"""Expand ~ and ~user constructs.
|
||||
|
||||
|
@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
||||
return HlsFD
|
||||
|
||||
|
@ -34,7 +34,7 @@ class HlsFD(FragmentFD):
|
||||
def can_download(manifest, info_dict):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||
@ -52,7 +52,9 @@ class HlsFD(FragmentFD):
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
check_results.append(can_decrypt_frag or not is_aes128_enc)
|
||||
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
@ -100,6 +102,7 @@ class HlsFD(FragmentFD):
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
byte_range = {}
|
||||
frags_filenames = []
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
@ -114,11 +117,14 @@ class HlsFD(FragmentFD):
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
'http_headers': headers,
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
@ -167,6 +173,13 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
splitted_byte_range = line[17:].split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
|
||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||
return False
|
||||
|
||||
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
|
136
youtube_dl/extractor/adn.py
Normal file
136
youtube_dl/extractor/adn.py
Normal file
@ -0,0 +1,136 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
}
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
'http://animedigitalnetwork.fr/' + sub_path,
|
||||
video_id, fatal=False)
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
||||
None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
|
||||
subtitles = {}
|
||||
for sub_lang, sub in subtitles_json.items():
|
||||
srt = ''
|
||||
for num, current in enumerate(sub):
|
||||
start, end, text = (
|
||||
float_or_none(current.get('startTime')),
|
||||
float_or_none(current.get('endTime')),
|
||||
current.get('text'))
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
srt += os.linesep.join(
|
||||
(
|
||||
'%d' % num,
|
||||
'%s --> %s' % (
|
||||
srt_subtitles_timecode(start),
|
||||
srt_subtitles_timecode(end)),
|
||||
text,
|
||||
os.linesep,
|
||||
))
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
}, {
|
||||
'ext': 'srt',
|
||||
'data': srt,
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(self._search_regex(
|
||||
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
|
||||
|
||||
video_info = {}
|
||||
video_info_str = self._search_regex(
|
||||
r'videoInfo\s*=\s*({.+});', webpage,
|
||||
'video info', fatal=False)
|
||||
if video_info_str:
|
||||
video_info = self._parse_json(
|
||||
video_info_str, video_id, fatal=False) or {}
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
title = metas.get('title') or video_info['title']
|
||||
links = player_config.get('links') or {}
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
for load_balancer_url in qualities.values():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id, fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
continue
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
error = options.get('error')
|
||||
if not formats and error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
}
|
@ -41,6 +41,11 @@ MSO_INFO = {
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Verizon': {
|
||||
'name': 'Verizon FiOS',
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'thr030': {
|
||||
'name': '3 Rivers Communications'
|
||||
},
|
||||
@ -1384,40 +1389,72 @@ class AdobePassIE(InfoExtractor):
|
||||
# Comcast page flow varies by video site and whether you
|
||||
# are on Comcast's network.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
# Check for Comcast auto login
|
||||
if 'automatically signing you in' in provider_redirect_page:
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'window\.location\s*=\s*[\'"]([^\'"]+)',
|
||||
provider_redirect_page, 'oauth redirect')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(
|
||||
oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
else:
|
||||
if '<form name="signin"' in provider_redirect_page:
|
||||
# already have the form, just fill it
|
||||
provider_login_page_res = provider_redirect_page_res
|
||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||
# redirects to the login page
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'content="0;\s*url=([^\'"]+)',
|
||||
provider_redirect_page, 'meta refresh redirect')
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
oauth_redirect_url,
|
||||
video_id, 'Downloading Provider Login Page')
|
||||
oauth_redirect_url, video_id,
|
||||
'Downloading Provider Login Page')
|
||||
else:
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
provider_redirect_page_res,
|
||||
'Downloading Provider Login Page')
|
||||
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
mvpd_confirm_page_res = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
||||
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
elif mso_id == 'Verizon':
|
||||
# In general, if you're connecting from a Verizon-assigned IP,
|
||||
# you will not actually pass your credentials.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
if 'Please wait ...' in provider_redirect_page:
|
||||
saml_redirect_url = self._html_search_regex(
|
||||
r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
|
||||
provider_redirect_page,
|
||||
'SAML Redirect URL', group='url')
|
||||
saml_login_page = self._download_webpage(
|
||||
saml_redirect_url, video_id,
|
||||
'Downloading SAML Login Page')
|
||||
else:
|
||||
saml_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
saml_login_page, urlh = saml_login_page_res
|
||||
if 'Please try again.' in saml_login_page:
|
||||
raise ExtractorError(
|
||||
'We\'re sorry, but either the User ID or Password entered is not correct.')
|
||||
saml_login_url = self._search_regex(
|
||||
r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
|
||||
saml_login_page, 'SAML Login URL', group='url')
|
||||
saml_response_json = self._download_json(
|
||||
saml_login_url, video_id, 'Downloading SAML Response',
|
||||
headers={'Content-Type': 'text/xml'})
|
||||
self._download_webpage(
|
||||
saml_response_json['targetValue'], video_id,
|
||||
'Confirming Login', data=urlencode_postdata({
|
||||
'SAMLResponse': saml_response_json['SAMLResponse'],
|
||||
'RelayState': saml_response_json['RelayState']
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
else:
|
||||
# Normal, non-Comcast flow
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
@ -1458,6 +1495,8 @@ class AdobePassIE(InfoExtractor):
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
if '<error' in authorize:
|
||||
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||
|
@ -23,7 +23,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/full-special
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||
@ -65,6 +77,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
@ -75,8 +90,8 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id
|
||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
@ -107,7 +122,10 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
|
||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
|
@ -4,15 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_xpath
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@ -43,7 +39,8 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
}
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
@ -71,6 +68,76 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||
'info_dict': {
|
||||
'id': '18650793',
|
||||
'ext': 'mp4',
|
||||
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '윈아디',
|
||||
'uploader_id': 'badkids',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||
'info_dict': {
|
||||
'id': '10481652',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'duration': 6492,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '20160502_c4c62b9d_174361386_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 3601,
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '20160502_39e739bb_174361386_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 2891,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
@ -85,42 +152,77 @@ class AfreecaTVIE(InfoExtractor):
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = m.group('part')
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='afbbs.afreecatv.com:8080',
|
||||
path='/api/video/get_video_info.php'))
|
||||
|
||||
video_xml = self._download_xml(
|
||||
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, query={'nTitleNo': video_id})
|
||||
|
||||
if xpath_element(video_xml, './track/video/file') is None:
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
expected=True)
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title')
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
||||
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(video_xml, './track/duration',
|
||||
'duration'))
|
||||
duration = int_or_none(xpath_text(
|
||||
video_xml, './track/duration', 'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
entries = []
|
||||
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
|
||||
video_key = self.parse_video_key(video_file.get('key', ''))
|
||||
if not video_key:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(video_file.get('duration')),
|
||||
'url': video_file.text,
|
||||
common_entry = {
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
info = common_entry.copy()
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
if not video_url:
|
||||
entries = []
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
upload_date = self._search_regex(
|
||||
r'^(\d{8})_', key, 'upload date', default=None)
|
||||
file_duration = int_or_none(file_element.get('duration'))
|
||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
title = title if one else '%s (part %d)' % (title, file_num)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
})
|
||||
entries.append(file_info)
|
||||
entries_info = info.copy()
|
||||
entries_info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries,
|
||||
})
|
||||
return entries_info
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
@ -131,17 +233,18 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
info['url'] = entries[0]['url']
|
||||
info['upload_date'] = entries[0].get('upload_date')
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'No files found for the specified AfreecaTV video, either'
|
||||
' the URL is incorrect or the video has been made private.',
|
||||
expected=True)
|
||||
app, playpath = video_url.split('mp4:')
|
||||
info.update({
|
||||
'url': app,
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + playpath,
|
||||
'rtmp_live': True, # downloading won't end without this
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
|
||||
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
|
||||
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
|
||||
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
|
||||
metadata = self._parse_json(jwconfig, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source['file'],
|
||||
'ext': source['type'],
|
||||
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
|
||||
'format': source['label'],
|
||||
'height': int(source['label'].rstrip('p')),
|
||||
} for source in metadata['playlist'][0]['sources']]
|
||||
self._sort_formats(formats)
|
||||
jwconfig = self._parse_json(self._search_regex(
|
||||
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'thumbnail': metadata['playlist'][0].get('image'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@ -2,9 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 39,
|
||||
'timestamp': 1404273600,
|
||||
'upload_date': '20140702',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
|
||||
@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 69,
|
||||
'timestamp': 1385659800,
|
||||
'upload_date': '20131128',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||
@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 144,
|
||||
'timestamp': 1397589900,
|
||||
'upload_date': '20140415',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
|
||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||
if model:
|
||||
model_data = self._parse_json(model, display_id)
|
||||
|
||||
for video_url in model_data['sources'].values():
|
||||
video = model_data['videos'][0]
|
||||
title = video['title']
|
||||
for video_url in video['sources'].values():
|
||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': video_url,
|
||||
})
|
||||
|
||||
title = model_data['title']
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||
' - AlloCiné')
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
||||
format_id = key[:-len('Path')]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': value,
|
||||
})
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title'
|
||||
).strip(), ' - AlloCiné')
|
||||
duration, view_count, timestamp = [None] * 3
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
|
@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor):
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
|
||||
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
|
||||
response, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
|
||||
if format_id == 'token' or not video_url.startswith('http'):
|
||||
continue
|
||||
if 'geodeswowsmpra3player' in video_url:
|
||||
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||
continue
|
||||
else:
|
||||
f4m_url = video_url[:-9] + '/manifest.f4m'
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
video_url_hd = video_url.replace('free_es', 'es')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
|
||||
fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
|
||||
fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
path_data = player.get('pathData')
|
||||
|
73
youtube_dl/extractor/atvat.py
Normal file
73
youtube_dl/extractor/atvat.py
Normal file
@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class ATVAtIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
|
||||
'md5': 'c3b6b975fb3150fc628572939df205f2',
|
||||
'info_dict': {
|
||||
'id': '1698447',
|
||||
'ext': 'mp4',
|
||||
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
||||
parts = []
|
||||
for part in video_data.get('parts', []):
|
||||
part_id = part['id']
|
||||
part_title = part['title']
|
||||
|
||||
formats = []
|
||||
for source in part.get('sources', []):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, part_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source.get('delivery'),
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
parts.append({
|
||||
'id': part_id,
|
||||
'title': part_title,
|
||||
'thumbnail': part.get('preview_image_url'),
|
||||
'duration': int_or_none(part.get('duration')),
|
||||
'is_live': part.get('is_livestream'),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'entries': parts,
|
||||
}
|
@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': bitrate,
|
||||
'tbr': bitrate,
|
||||
'vcodec': encoding,
|
||||
})
|
||||
else:
|
||||
@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'acodec': encoding,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
if protocol == 'http':
|
||||
if protocol in ('http', 'https'):
|
||||
# Direct link
|
||||
fmt.update({
|
||||
'url': href,
|
||||
@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
})
|
||||
else:
|
||||
continue
|
||||
formats.append(fmt)
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
@ -407,7 +409,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
if kind not in ('programme', 'radioProgramme'):
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int_or_none(item.get('duration'))
|
||||
@ -448,7 +450,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
if kind not in ('programme', 'radioProgramme'):
|
||||
continue
|
||||
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||
|
@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space
|
||||
space|
|
||||
etalk
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor):
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
72
youtube_dl/extractor/bostonglobe.py
Normal file
72
youtube_dl/extractor/bostonglobe.py
Normal file
@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class BostonGlobeIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
|
||||
'md5': '0a62181079c85c2d2b618c9a738aedaf',
|
||||
'info_dict': {
|
||||
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
|
||||
'id': '5320421710001',
|
||||
'ext': 'mp4',
|
||||
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
|
||||
'timestamp': 1486877593,
|
||||
'upload_date': '20170212',
|
||||
'uploader_id': '245991542',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Embedded youtube video; we hand it off to the Generic extractor.
|
||||
'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
|
||||
'md5': '582b40327089d5c0c949b3c54b13c24b',
|
||||
'info_dict': {
|
||||
'title': "Who Is Matt Damon's Favorite Batman?",
|
||||
'id': 'ZW1QCnlA6Qc',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170217',
|
||||
'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
|
||||
'uploader': 'The Late Late Show with James Corden',
|
||||
'uploader_id': 'TheLateLateShow',
|
||||
},
|
||||
'expected_warnings': ['404'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
page_title = self._og_search_title(webpage, default=None)
|
||||
|
||||
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
|
||||
entries = []
|
||||
for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
|
||||
attrs = extract_attributes(video)
|
||||
|
||||
video_id = attrs.get('data-brightcove-video-id')
|
||||
account_id = attrs.get('data-account')
|
||||
player_id = attrs.get('data-player')
|
||||
embed = attrs.get('data-embed')
|
||||
|
||||
if video_id and account_id and player_id and embed:
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
|
||||
if len(entries) == 0:
|
||||
return self.url_result(url, 'Generic')
|
||||
elif len(entries) == 1:
|
||||
return self.url_result(entries[0], 'BrightcoveNew')
|
||||
else:
|
||||
return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
|
@ -17,6 +17,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
@ -109,6 +110,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': '710858724001',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# playlist with 'videoList'
|
||||
@ -487,12 +489,13 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
def _extract_urls(ie, webpage):
|
||||
# Reference:
|
||||
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
|
||||
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||
# 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
|
||||
# 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||
|
||||
entries = []
|
||||
|
||||
@ -501,22 +504,48 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
|
||||
# Look for embed_in_page embeds [2]
|
||||
for video_id, account_id, player_id, embed in re.findall(
|
||||
# According to examples from [3] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
# According to [4] data-video-id may be prefixed with ref:
|
||||
r'''(?sx)
|
||||
<video[^>]+
|
||||
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video\s+[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
)
|
||||
)?
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
attrs = extract_attributes(video)
|
||||
|
||||
# According to examples from [4] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
video_id = attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
account_id = account_id or attrs.get('data-account')
|
||||
if not account_id:
|
||||
continue
|
||||
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
|
||||
account_id, player_id, embed, video_id)
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
# embeds are considered ambiguous since they are matched based only
|
||||
# on data-video-id and data-account attributes and in the wild may
|
||||
# not be brightcove embeds at all. Let's check reconstructed
|
||||
# brightcove URLs in case of such embeds and only process valid
|
||||
# ones. By this we ensure there is indeed a brightcove embed.
|
||||
if not script_tag and not ie._is_valid_url(
|
||||
bc_url, video_id, 'possible brightcove video'):
|
||||
continue
|
||||
|
||||
entries.append(bc_url)
|
||||
|
||||
return entries
|
||||
|
||||
|
@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
# ExtractorError,
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
@ -45,6 +45,9 @@ class CanalplusIE(InfoExtractor):
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
# Only works for direct mp4 URLs
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'info_dict': {
|
||||
@ -56,6 +59,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'upload_date': '20160702',
|
||||
},
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
'info_dict': {
|
||||
'id': '1108190',
|
||||
@ -65,19 +69,20 @@ class CanalplusIE(InfoExtractor):
|
||||
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
|
||||
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
|
||||
# geo restricted, bypassed
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
|
||||
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
|
||||
'info_dict': {
|
||||
'id': '1420213',
|
||||
'id': '1443684',
|
||||
'display_id': 'pid6318-videos-integrales',
|
||||
'ext': 'mp4',
|
||||
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
|
||||
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
|
||||
'upload_date': '20161014',
|
||||
'title': 'Guess my iep ! - TPMP - 07/04/2017',
|
||||
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
|
||||
'upload_date': '20170407',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'info_dict': {
|
||||
@ -134,15 +139,15 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
|
||||
|
||||
fmt_url = next(iter(media.get('VIDEOS')))
|
||||
if '/geo' in fmt_url.lower():
|
||||
response = self._request_webpage(
|
||||
HEADRequest(fmt_url), video_id,
|
||||
'Checking if the video is georestricted')
|
||||
if '/blocage' in response.geturl():
|
||||
raise ExtractorError(
|
||||
'The video is not available in your country',
|
||||
expected=True)
|
||||
# _, fmt_url = next(iter(media['VIDEOS'].items()))
|
||||
# if '/geo' in fmt_url.lower():
|
||||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in media['VIDEOS'].items():
|
||||
|
@ -7,6 +7,7 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
IE_DESC = 'canvas.be and een.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
|
@ -12,13 +12,14 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Alternativní průvodce současným queer světem',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494876844842',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'duration': 10.2,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
typ = self._html_search_regex(
|
||||
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||
episode_id = self._html_search_regex(
|
||||
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||
type_ = None
|
||||
episode_id = None
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
|
||||
default='{}'), playlist_id)
|
||||
if playlist:
|
||||
type_ = playlist.get('type')
|
||||
episode_id = playlist.get('id')
|
||||
|
||||
if not type_:
|
||||
type_ = self._html_search_regex(
|
||||
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
|
||||
webpage, 'type')
|
||||
if not episode_id:
|
||||
episode_id = self._html_search_regex(
|
||||
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
|
||||
webpage, 'episode_id')
|
||||
|
||||
data = {
|
||||
'playlist[0][type]': typ,
|
||||
'playlist[0][type]': type_,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestSource': 'iVysilani',
|
||||
@ -160,8 +148,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls-%s' % format_id, fatal=False)
|
||||
else:
|
||||
stream_formats = self._extract_mpd_formats(
|
||||
@ -246,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
yield line
|
||||
|
||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||
|
||||
|
||||
class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Alternativní průvodce současným queer světem',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494876844842',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'duration': 10.2,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = unescapeHTML(self._search_regex(
|
||||
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'iframe player url', group='url'))
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
@ -4,62 +4,62 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
Common extractor for channel9.msdn.com.
|
||||
|
||||
The type of provided URL (video or playlist) is determined according to
|
||||
meta Search.PageType from web page HTML rather than URL itself, as it is
|
||||
not always possible to do.
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'md5': '32083d4eaf1946db6d454313f44510ca',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'id': '6c413323-383a-49dc-88f9-a22800cab024',
|
||||
'ext': 'wmv',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
|
||||
'duration': 4576,
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1377717420,
|
||||
'upload_date': '20130828',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
||||
'Mads Kristensen'],
|
||||
'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
|
||||
'ext': 'wmv',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
|
||||
'duration': 1540,
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1386381991,
|
||||
'upload_date': '20131207',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
}, {
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ranges for the Standard Library',
|
||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
||||
'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
|
||||
'duration': 5646,
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'upload_date': '20150930',
|
||||
'timestamp': 1443640735,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor):
|
||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||
'title': 'Channel 9',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||
'only_matching': True,
|
||||
@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
def _formats_from_html(self, html):
|
||||
FORMAT_REGEX = r'''
|
||||
(?x)
|
||||
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
|
||||
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
|
||||
(?:<div\s+class="popup\s+rounded">\s*
|
||||
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
||||
</div>)? # File size part may be missing
|
||||
'''
|
||||
quality = qualities((
|
||||
'MP3', 'MP4',
|
||||
'Low Quality WMV', 'Low Quality MP4',
|
||||
'Mid Quality WMV', 'Mid Quality MP4',
|
||||
'High Quality WMV', 'High Quality MP4'))
|
||||
formats = [{
|
||||
'url': x.group('url'),
|
||||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize_approx': parse_filesize(x.group('filesize')),
|
||||
'quality': quality(x.group('quality')),
|
||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html))]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_title(self, html):
|
||||
title = self._html_search_meta('title', html, 'title')
|
||||
if title is None:
|
||||
title = self._og_search_title(html)
|
||||
TITLE_SUFFIX = ' (Channel 9)'
|
||||
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
return title
|
||||
|
||||
def _extract_description(self, html):
|
||||
DESCRIPTION_REGEX = r'''(?sx)
|
||||
<div\s+class="entry-content">\s*
|
||||
<div\s+id="entry-body">\s*
|
||||
(?P<description>.+?)\s*
|
||||
</div>\s*
|
||||
</div>
|
||||
'''
|
||||
m = re.search(DESCRIPTION_REGEX, html)
|
||||
if m is not None:
|
||||
return m.group('description')
|
||||
return self._html_search_meta('description', html, 'description')
|
||||
|
||||
def _extract_duration(self, html):
|
||||
m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
||||
|
||||
def _extract_slides(self, html):
|
||||
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
|
||||
return m.group('slidesurl') if m is not None else None
|
||||
|
||||
def _extract_zip(self, html):
|
||||
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
|
||||
return m.group('zipurl') if m is not None else None
|
||||
|
||||
def _extract_avg_rating(self, html):
|
||||
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
|
||||
return float(m.group('avgrating')) if m is not None else 0
|
||||
|
||||
def _extract_rating_count(self, html):
|
||||
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
|
||||
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
|
||||
|
||||
def _extract_view_count(self, html):
|
||||
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
|
||||
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
|
||||
|
||||
def _extract_comment_count(self, html):
|
||||
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
|
||||
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
|
||||
|
||||
def _fix_count(self, count):
|
||||
return int(str(count).replace(',', '')) if count is not None else None
|
||||
|
||||
def _extract_authors(self, html):
|
||||
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
|
||||
if m is None:
|
||||
return None
|
||||
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
|
||||
|
||||
def _extract_session_code(self, html):
|
||||
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
|
||||
return m.group('code') if m is not None else None
|
||||
|
||||
def _extract_session_day(self, html):
|
||||
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
|
||||
return m.group('day').strip() if m is not None else None
|
||||
|
||||
def _extract_session_room(self, html):
|
||||
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
|
||||
return m.group('room') if m is not None else None
|
||||
|
||||
def _extract_session_speakers(self, html):
|
||||
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
||||
|
||||
def _extract_content(self, html, content_path):
|
||||
# Look for downloadable content
|
||||
formats = self._formats_from_html(html)
|
||||
slides = self._extract_slides(html)
|
||||
zip_ = self._extract_zip(html)
|
||||
|
||||
# Nothing to download
|
||||
if len(formats) == 0 and slides is None and zip_ is None:
|
||||
self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
|
||||
return
|
||||
|
||||
# Extract meta
|
||||
title = self._extract_title(html)
|
||||
description = self._extract_description(html)
|
||||
thumbnail = self._og_search_thumbnail(html)
|
||||
duration = self._extract_duration(html)
|
||||
avg_rating = self._extract_avg_rating(html)
|
||||
rating_count = self._extract_rating_count(html)
|
||||
view_count = self._extract_view_count(html)
|
||||
comment_count = self._extract_comment_count(html)
|
||||
|
||||
common = {
|
||||
'_type': 'video',
|
||||
'id': content_path,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'avg_rating': avg_rating,
|
||||
'rating_count': rating_count,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
result = []
|
||||
|
||||
if slides is not None:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Slides', 'url': slides})
|
||||
result.append(d)
|
||||
|
||||
if zip_ is not None:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Zip', 'url': zip_})
|
||||
result.append(d)
|
||||
|
||||
if len(formats) > 0:
|
||||
d = common.copy()
|
||||
d.update({'title': title, 'formats': formats})
|
||||
result.append(d)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_entry_item(self, html, content_path):
|
||||
contents = self._extract_content(html, content_path)
|
||||
if contents is None:
|
||||
return contents
|
||||
|
||||
if len(contents) > 1:
|
||||
raise ExtractorError('Got more than one entry')
|
||||
result = contents[0]
|
||||
result['authors'] = self._extract_authors(html)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_session(self, html, content_path):
|
||||
contents = self._extract_content(html, content_path)
|
||||
if contents is None:
|
||||
return contents
|
||||
|
||||
session_meta = {
|
||||
'session_code': self._extract_session_code(html),
|
||||
'session_day': self._extract_session_day(html),
|
||||
'session_room': self._extract_session_room(html),
|
||||
'session_speakers': self._extract_session_speakers(html),
|
||||
}
|
||||
|
||||
for content in contents:
|
||||
content.update(session_meta)
|
||||
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor):
|
||||
return self.playlist_result(entries, video_id, title_text)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
content_path = mobj.group('contentpath')
|
||||
rss = mobj.group('rss')
|
||||
content_path, rss = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if rss:
|
||||
return self._extract_list(content_path, url)
|
||||
@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, content_path, 'Downloading web page')
|
||||
|
||||
page_type = self._search_regex(
|
||||
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
||||
webpage, 'page type', default=None, group='pagetype')
|
||||
if page_type:
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
return self._extract_session(webpage, content_path)
|
||||
elif page_type == 'Event':
|
||||
return self._extract_list(content_path)
|
||||
episode_data = self._search_regex(
|
||||
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
|
||||
if episode_data:
|
||||
episode_data = self._parse_json(unescapeHTML(
|
||||
episode_data), content_path)
|
||||
content_id = episode_data['contentId']
|
||||
is_session = '/Sessions(' in episode_data['api']
|
||||
content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
|
||||
if is_session:
|
||||
content_url += '?$expand=Speakers'
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
else: # Assuming list
|
||||
content_url += '?$expand=Authors'
|
||||
content_data = self._download_json(content_url, content_id)
|
||||
title = content_data['Title']
|
||||
|
||||
QUALITIES = (
|
||||
'mp3',
|
||||
'wmv', 'mp4',
|
||||
'wmv-low', 'mp4-low',
|
||||
'wmv-mid', 'mp4-mid',
|
||||
'wmv-high', 'mp4-high',
|
||||
)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
def quality(quality_id, format_url):
|
||||
return (len(QUALITIES) if '_Source.' in format_url
|
||||
else quality_key(quality_id))
|
||||
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
SITE_QUALITIES = {
|
||||
'MP3': 'mp3',
|
||||
'MP4': 'mp4',
|
||||
'Low Quality WMV': 'wmv-low',
|
||||
'Low Quality MP4': 'mp4-low',
|
||||
'Mid Quality WMV': 'wmv-mid',
|
||||
'Mid Quality MP4': 'mp4-mid',
|
||||
'High Quality WMV': 'wmv-high',
|
||||
'High Quality MP4': 'mp4-high',
|
||||
}
|
||||
|
||||
formats_select = self._search_regex(
|
||||
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
|
||||
'formats select', default=None)
|
||||
if formats_select:
|
||||
for mobj in re.finditer(
|
||||
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
|
||||
formats_select):
|
||||
format_url = mobj.group('url')
|
||||
if format_url in urls:
|
||||
continue
|
||||
urls.add(format_url)
|
||||
format_id = mobj.group('format')
|
||||
quality_id = SITE_QUALITIES.get(format_id, format_id)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': quality_id,
|
||||
'quality': quality(quality_id, format_url),
|
||||
'vcodec': 'none' if quality_id == 'mp3' else None,
|
||||
})
|
||||
|
||||
API_QUALITIES = {
|
||||
'VideoMP4Low': 'mp4-low',
|
||||
'VideoWMV': 'wmv-mid',
|
||||
'VideoMP4Medium': 'mp4-mid',
|
||||
'VideoMP4High': 'mp4-high',
|
||||
'VideoWMVHQ': 'wmv-hq',
|
||||
}
|
||||
|
||||
for format_id, q in API_QUALITIES.items():
|
||||
q_url = content_data.get(format_id)
|
||||
if not q_url or q_url in urls:
|
||||
continue
|
||||
urls.add(q_url)
|
||||
formats.append({
|
||||
'url': q_url,
|
||||
'format_id': q,
|
||||
'quality': quality(q, q_url),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
slides = content_data.get('Slides')
|
||||
zip_file = content_data.get('ZipFile')
|
||||
|
||||
if not formats and not slides and not zip_file:
|
||||
raise ExtractorError(
|
||||
'None of recording, slides or zip are available for %s' % content_path)
|
||||
|
||||
subtitles = {}
|
||||
for caption in content_data.get('Captions', []):
|
||||
caption_url = caption.get('Url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('Language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
common = {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
|
||||
'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
|
||||
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
|
||||
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
|
||||
'avg_rating': int_or_none(content_data.get('Rating')),
|
||||
'rating_count': int_or_none(content_data.get('RatingCount')),
|
||||
'view_count': int_or_none(content_data.get('Views')),
|
||||
'comment_count': int_or_none(content_data.get('CommentCount')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
if is_session:
|
||||
speakers = []
|
||||
for s in content_data.get('Speakers', []):
|
||||
speaker_name = s.get('FullName')
|
||||
if not speaker_name:
|
||||
continue
|
||||
speakers.append(speaker_name)
|
||||
|
||||
common.update({
|
||||
'session_code': content_data.get('Code'),
|
||||
'session_room': content_data.get('Room'),
|
||||
'session_speakers': speakers,
|
||||
})
|
||||
else:
|
||||
authors = []
|
||||
for a in content_data.get('Authors', []):
|
||||
author_name = a.get('DisplayName')
|
||||
if not author_name:
|
||||
continue
|
||||
authors.append(author_name)
|
||||
common['authors'] = authors
|
||||
|
||||
contents = []
|
||||
|
||||
if slides:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Slides', 'url': slides})
|
||||
contents.append(d)
|
||||
|
||||
if zip_file:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Zip', 'url': zip_file})
|
||||
contents.append(d)
|
||||
|
||||
if formats:
|
||||
d = common.copy()
|
||||
d.update({'title': title, 'formats': formats})
|
||||
contents.append(d)
|
||||
return self.playlist_result(contents)
|
||||
else:
|
||||
return self._extract_list(content_path)
|
||||
|
@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
|
||||
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
|
||||
m3u8_urls = []
|
||||
|
||||
if not m3u8_formats:
|
||||
for m in re.finditer(
|
||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
|
||||
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
|
||||
'url').replace('_fast', '')
|
||||
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
|
||||
if m3u8_url not in m3u8_urls:
|
||||
m3u8_urls.append(m3u8_url)
|
||||
|
||||
if not m3u8_urls:
|
||||
error = self._search_regex(
|
||||
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
||||
@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
|
||||
formats = []
|
||||
for m3u8_id, m3u8_url in m3u8_formats:
|
||||
for m3u8_url in m3u8_urls:
|
||||
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4',
|
||||
# ffmpeg skips segments for fast m3u8
|
||||
|
@ -1,97 +1,56 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
remove_end,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?cloudy\.ec/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||
_MAX_TRIES = 2
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'md5': '29832b05028ead1b58be86bf319397ca',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
'upload_date': '20130913',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
|
||||
form = {
|
||||
'file': video_id,
|
||||
'key': file_key,
|
||||
}
|
||||
|
||||
if error_url:
|
||||
form.update({
|
||||
'numOfErrors': try_num,
|
||||
'errorCode': '404',
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
player_data = self._download_webpage(
|
||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
|
||||
if 'error' in data:
|
||||
raise ExtractorError(
|
||||
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
||||
expected=True)
|
||||
|
||||
title = data.get('title', [None])[0]
|
||||
if title:
|
||||
title = remove_end(title, '&asdasdas').strip()
|
||||
|
||||
video_url = data.get('url', [None])[0]
|
||||
|
||||
if video_url:
|
||||
try:
|
||||
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = self._EMBED_URL % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
return self._extract_video(video_id, file_key)
|
||||
webpage = self._download_webpage(
|
||||
'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
|
||||
|
||||
if webpage:
|
||||
info.update({
|
||||
'title': self._search_regex(
|
||||
r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
|
||||
'upload date', fatal=False)),
|
||||
'view_count': str_to_int(self._search_regex(
|
||||
r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
|
||||
})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
|
||||
info['id'] = video_id
|
||||
|
||||
return info
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
@ -36,34 +37,35 @@ from ..utils import (
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_m3u8_attributes,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
mimetype2ext,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -546,6 +548,34 @@ class InfoExtractor(object):
|
||||
|
||||
return encoding
|
||||
|
||||
def __check_blocked(self, content):
|
||||
first_block = content[:512]
|
||||
if ('<title>Access to this site is blocked</title>' in content and
|
||||
'Websense' in first_block):
|
||||
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||
blocked_iframe = self._html_search_regex(
|
||||
r'<iframe src="([^"]+)"', content,
|
||||
'Websense information URL', default=None)
|
||||
if blocked_iframe:
|
||||
msg += ' Visit %s for more details' % blocked_iframe
|
||||
raise ExtractorError(msg, expected=True)
|
||||
if '<title>The URL you requested has been blocked</title>' in first_block:
|
||||
msg = (
|
||||
'Access to this webpage has been blocked by Indian censorship. '
|
||||
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||
block_msg = self._html_search_regex(
|
||||
r'</h1><p>(.*?)</p>',
|
||||
content, 'block message', default=None)
|
||||
if block_msg:
|
||||
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
|
||||
'blocklist.rkn.gov.ru' in content):
|
||||
raise ExtractorError(
|
||||
'Access to this webpage has been blocked by decision of the Russian government. '
|
||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
@ -587,25 +617,7 @@ class InfoExtractor(object):
|
||||
except LookupError:
|
||||
content = webpage_bytes.decode('utf-8', 'replace')
|
||||
|
||||
if ('<title>Access to this site is blocked</title>' in content and
|
||||
'Websense' in content[:512]):
|
||||
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||
blocked_iframe = self._html_search_regex(
|
||||
r'<iframe src="([^"]+)"', content,
|
||||
'Websense information URL', default=None)
|
||||
if blocked_iframe:
|
||||
msg += ' Visit %s for more details' % blocked_iframe
|
||||
raise ExtractorError(msg, expected=True)
|
||||
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||
msg = (
|
||||
'Access to this webpage has been blocked by Indian censorship. '
|
||||
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||
block_msg = self._html_search_regex(
|
||||
r'</h1><p>(.*?)</p>',
|
||||
content, 'block message', default=None)
|
||||
if block_msg:
|
||||
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
self.__check_blocked(content)
|
||||
|
||||
return content
|
||||
|
||||
@ -714,6 +726,13 @@ class InfoExtractor(object):
|
||||
video_info['title'] = video_title
|
||||
return video_info
|
||||
|
||||
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
||||
"""Returns a playlist"""
|
||||
@ -1760,7 +1779,7 @@ class InfoExtractor(object):
|
||||
if content_type == 'text':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif content_type == 'video' or content_type == 'audio':
|
||||
elif content_type in ('video', 'audio'):
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
@ -2161,18 +2180,24 @@ class InfoExtractor(object):
|
||||
})
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
def _find_jwplayer_data(webpage):
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
try:
|
||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
||||
video_id=video_id,
|
||||
transform_source=transform_source)
|
||||
except ExtractorError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(jwplayer_data, dict):
|
||||
return jwplayer_data
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
@ -2233,11 +2258,17 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
@ -2247,6 +2278,9 @@ class InfoExtractor(object):
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in (
|
||||
'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
|
@ -9,13 +9,14 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
remove_end,
|
||||
extract_attributes,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor):
|
||||
'upload_date': '20130314',
|
||||
'timestamp': 1363219200,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
|
||||
'info_dict': {
|
||||
'id': '58d1865bfd2e6126e2000015',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Only True Surprise? Trump’s an Idiot',
|
||||
'uploader': 'gq',
|
||||
'upload_date': '20170321',
|
||||
'timestamp': 1490126427,
|
||||
},
|
||||
}, {
|
||||
# JS embed
|
||||
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
|
||||
@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor):
|
||||
})
|
||||
video_id = query['videoId']
|
||||
video_info = None
|
||||
info_page = self._download_webpage(
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', query=query, fatal=False)
|
||||
video_id, 'Downloading video info', fatal=False, query=query)
|
||||
if info_page:
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||
else:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=query)
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
||||
video_id, transform_source=js_to_json)['video']
|
||||
|
||||
title = video_info['title']
|
||||
|
||||
formats = []
|
||||
for fdata in video_info.get('sources', [{}])[0]:
|
||||
for fdata in video_info['sources']:
|
||||
src = fdata.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
quality = fdata.get('quality')
|
||||
formats.append({
|
||||
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||
@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor):
|
||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
||||
url_type = 'embed'
|
||||
|
||||
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
|
||||
if url_type == 'series':
|
||||
|
@ -390,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
else:
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(webpage_url), video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
@ -565,7 +567,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(url), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@ -46,9 +48,50 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for encoding in media.get('encodings', []):
|
||||
m3u8_url = encoding.get('master_playlist_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
@ -60,16 +103,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + limelight_media_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'tags': media.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
@ -78,14 +119,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
'timestamp': 1448388615,
|
||||
'upload_date': '20151124',
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,7 +144,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
'playlist_mincount': 12,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor):
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
video_metadata = video_data['assetFields']
|
||||
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||
if ism_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
ism_url, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor):
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
video_metadata = video_data['assetFields']
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
|
@ -9,13 +9,13 @@ from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class TlcDeIE(InfoExtractor):
|
||||
IE_NAME = 'tlc.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
|
||||
class DiscoveryNetworksDeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||
'info_dict': {
|
||||
'id': '3235167922001',
|
||||
@ -29,7 +29,13 @@ class TlcDeIE(InfoExtractor):
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '1659832546',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.discovery.de/#5332316765001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -39,5 +45,8 @@ class TlcDeIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||
brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
|
||||
'BrightcoveNew', brightcove_id)
|
59
youtube_dl/extractor/discoveryvr.py
Normal file
59
youtube_dl/extractor/discoveryvr.py
Normal file
@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class DiscoveryVRIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
|
||||
'md5': '32b1929798c464a54356378b7912eca4',
|
||||
'info_dict': {
|
||||
'id': 'discovery-vr-an-introduction',
|
||||
'ext': 'mp4',
|
||||
'title': 'Discovery VR - An Introduction',
|
||||
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
bootstrap_data = self._search_regex(
|
||||
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
|
||||
webpage, 'bootstrap data')
|
||||
bootstrap_data = self._parse_json(
|
||||
bootstrap_data.encode('utf-8').decode('unicode_escape'),
|
||||
display_id)
|
||||
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
|
||||
video_data = next(video for video in videos if video.get('slug') == display_id)
|
||||
|
||||
series = video_data.get('showTitle')
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
formats = []
|
||||
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
|
||||
f_url = video_data.get(f)
|
||||
if not f_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'duration': parse_duration(video_data.get('runTime')),
|
||||
'formats': formats,
|
||||
'episode': episode,
|
||||
'series': series,
|
||||
}
|
@ -19,6 +19,7 @@ from .acast import (
|
||||
ACastChannelIE,
|
||||
)
|
||||
from .addanime import AddAnimeIE
|
||||
from .adn import ADNIE
|
||||
from .adobetv import (
|
||||
AdobeTVIE,
|
||||
AdobeTVShowIE,
|
||||
@ -71,6 +72,7 @@ from .arte import (
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .atvat import ATVAtIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
@ -117,6 +119,7 @@ from .bleacherreport import (
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .bravotv import BravoTVIE
|
||||
@ -162,7 +165,10 @@ from .ccc import CCCIE
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .ceskatelevize import (
|
||||
CeskaTelevizeIE,
|
||||
CeskaTelevizePoradyIE,
|
||||
)
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
@ -269,6 +275,8 @@ from .discoverygo import (
|
||||
DiscoveryGoIE,
|
||||
DiscoveryGoPlaylistIE,
|
||||
)
|
||||
from .discoverynetworks import DiscoveryNetworksDeIE
|
||||
from .discoveryvr import DiscoveryVRIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
@ -376,6 +384,7 @@ from .globo import (
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .go90 import Go90IE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
@ -533,6 +542,7 @@ from .mangomolo import (
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .medici import MediciIE
|
||||
from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .meta import METAIE
|
||||
@ -725,6 +735,10 @@ from .orf import (
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .pandatv import PandaTVIE
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
@ -794,7 +808,7 @@ from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import (
|
||||
RaiTVIE,
|
||||
RaiPlayIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
@ -825,7 +839,11 @@ from .rozhlas import RozhlasIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtl2 import (
|
||||
RTL2IE,
|
||||
RTL2YouIE,
|
||||
RTL2YouSeriesIE,
|
||||
)
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
@ -921,6 +939,7 @@ from .srmediathek import SRMediathekIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamango import StreamangoIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@ -967,13 +986,13 @@ from .theplatform import (
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
@ -986,6 +1005,7 @@ from .tnaflix import (
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@ -1013,8 +1033,10 @@ from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
@ -1174,6 +1196,12 @@ from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
VRVIE,
|
||||
VRVSeriesIE,
|
||||
)
|
||||
from .vshare import VShareIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
@ -1206,7 +1234,10 @@ from .wrzuta import (
|
||||
WrzutaIE,
|
||||
WrzutaPlaylistIE,
|
||||
)
|
||||
from .wsj import WSJIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
|
@ -196,6 +196,10 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no title
|
||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -353,15 +357,15 @@ class FacebookIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||
default=None)
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
'title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title')
|
||||
'description', webpage, 'title', default=None)
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
|
@ -47,9 +47,12 @@ class FOXIE(AdobePassIE):
|
||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -4,7 +4,8 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor):
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'timestamp': 1393642916,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
|
||||
webpage, 'video path')
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
|
||||
webpage, 'video data'))
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
video_url = video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
r'(?s)<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||
ext = determine_ext(video_url.lower())
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'vcodec': vcodec,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if ext == 'mp3' else None,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
||||
'duration': int_or_none(video_data.get('data-duration')),
|
||||
}
|
||||
|
@ -56,9 +56,8 @@ class FreshLiveIE(InfoExtractor):
|
||||
is_live = info.get('liveStreamUrl') is not None
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls')
|
||||
stream_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls')
|
||||
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
@ -7,9 +7,9 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
ExtractorError,
|
||||
urlencode_postdata
|
||||
@ -17,34 +17,26 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'info_dict': {
|
||||
'id': '658',
|
||||
'display_id': 'breeze',
|
||||
'ext': 'mp4',
|
||||
'title': 'Air - 1 - Breeze',
|
||||
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
||||
'info_dict': {
|
||||
'id': '31128',
|
||||
'id': '91144',
|
||||
'display_id': 'role-play',
|
||||
'ext': 'mp4',
|
||||
'title': '.hack//SIGN - 1 - Role Play',
|
||||
'title': '.hack//SIGN - Role Play',
|
||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '9635',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
try:
|
||||
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
response = ee.cause.read()
|
||||
if b'>Please complete the security check to access<' in response:
|
||||
raise ExtractorError(
|
||||
'Access to funimation.com is blocked by CloudFlare. '
|
||||
'Please browse to http://www.funimation.com/, solve '
|
||||
'the reCAPTCHA, export browser cookies to a text file,'
|
||||
' and then try again with --cookies YOUR_COOKIE_FILE.',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
def _extract_cloudflare_session_ua(self, url):
|
||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||
if ci_session_cookie:
|
||||
@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def _search_kane(name):
|
||||
return self._search_regex(
|
||||
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
|
||||
webpage, name, default=None)
|
||||
|
||||
title_data = self._parse_json(self._search_regex(
|
||||
r'TITLE_DATA\s*=\s*({[^}]+})',
|
||||
webpage, 'title data', default=''),
|
||||
display_id, js_to_json, fatal=False) or {}
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)"',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
'al:web:url',
|
||||
'og:video:url',
|
||||
'og:video:secure_url',
|
||||
], webpage, fatal=True)
|
||||
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
|
||||
|
||||
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
|
||||
series = _search_kane('showName')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||
|
||||
try:
|
||||
sources = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||
video_id)['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
|
||||
raise
|
||||
|
||||
errors = []
|
||||
formats = []
|
||||
|
||||
ERRORS_MAP = {
|
||||
'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
|
||||
'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
|
||||
'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
|
||||
'ERROR_VIDEO_EXPIRED': 'videoExpired',
|
||||
'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
|
||||
'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
|
||||
'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
|
||||
'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
|
||||
'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
|
||||
'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
|
||||
}
|
||||
|
||||
USER_AGENTS = (
|
||||
# PC UA is served with m3u8 that provides some bonus lower quality formats
|
||||
('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
|
||||
# Mobile UA allows to extract direct links and also does not fail when
|
||||
# PC UA fails with hulu error (e.g.
|
||||
# http://www.funimation.com/shows/hacksign/videos/official/role-play)
|
||||
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
||||
)
|
||||
|
||||
user_agent = self._extract_cloudflare_session_ua(url)
|
||||
if user_agent:
|
||||
USER_AGENTS = ((None, user_agent),)
|
||||
|
||||
for kind, user_agent in USER_AGENTS:
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(
|
||||
request, display_id,
|
||||
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+playersData\s*=\s*(\[.+?\]);\n',
|
||||
webpage, 'players data'),
|
||||
display_id)[0]['playlist']
|
||||
|
||||
items = next(item['items'] for item in playlist if item.get('items'))
|
||||
item = next(item for item in items if item.get('itemAK') == display_id)
|
||||
|
||||
error_messages = {}
|
||||
video_error_messages = self._search_regex(
|
||||
r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
|
||||
webpage, 'error messages', default=None)
|
||||
if video_error_messages:
|
||||
error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
|
||||
if error_messages_json:
|
||||
for _, error in error_messages_json.items():
|
||||
type_ = error.get('type')
|
||||
description = error.get('description')
|
||||
content = error.get('content')
|
||||
if type_ == 'text' and description and content:
|
||||
error_message = ERRORS_MAP.get(description)
|
||||
if error_message:
|
||||
error_messages[error_message] = content
|
||||
|
||||
for video in item.get('videoSet', []):
|
||||
auth_token = video.get('authToken')
|
||||
if not auth_token:
|
||||
continue
|
||||
funimation_id = video.get('FUNImationID') or video.get('videoId')
|
||||
preference = 1 if video.get('languageMode') == 'dub' else 0
|
||||
if not auth_token.startswith('?'):
|
||||
auth_token = '?%s' % auth_token
|
||||
for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
|
||||
format_url = video.get('%sUrl' % quality)
|
||||
if not format_url:
|
||||
continue
|
||||
if not format_url.startswith(('http', '//')):
|
||||
errors.append(format_url)
|
||||
continue
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'-(\d+)[Kk]', format_url, 'tbr', default=None))
|
||||
formats.append({
|
||||
'url': format_url + auth_token,
|
||||
'format_id': '%s-http-%dp' % (funimation_id, height),
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
if not formats and errors:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s'
|
||||
% (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
|
||||
expected=True)
|
||||
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = item['title']
|
||||
artist = item.get('artist')
|
||||
if artist:
|
||||
title = '%s - %s' % (artist, title)
|
||||
description = self._og_search_description(webpage) or item.get('description')
|
||||
thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
|
||||
video_id = item.get('itemId') or display_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'series': series,
|
||||
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
|
||||
'episode_number': int_or_none(title_data.get('episodeNum')),
|
||||
'episode': episode,
|
||||
'season_id': title_data.get('seriesId'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -85,6 +85,7 @@ from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -449,6 +450,59 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
},
|
||||
{
|
||||
# Brightcove with UUID in videoPlayer
|
||||
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||
'info_dict': {
|
||||
'id': '5255815316001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprocket Video - China',
|
||||
'description': 'Sprocket Video - China',
|
||||
'uploader': 'HP-Video Gallery',
|
||||
'timestamp': 1482263210,
|
||||
'upload_date': '20161220',
|
||||
'uploader_id': '1107601872001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
'skip': 'video rotates...weekly?',
|
||||
},
|
||||
{
|
||||
# Brightcove:new type [2].
|
||||
'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
|
||||
'md5': '2b35148fcf48da41c9fb4591650784f3',
|
||||
'info_dict': {
|
||||
'id': '5348741021001',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170306',
|
||||
'uploader_id': '4191638492001',
|
||||
'timestamp': 1488769918,
|
||||
'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
|
||||
|
||||
},
|
||||
},
|
||||
{
|
||||
# Alternative brightcove <video> attributes
|
||||
'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
|
||||
'info_dict': {
|
||||
'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
|
||||
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '732d22ba3d33f2f3fc253c39f8f36523',
|
||||
'info_dict': {
|
||||
'id': '5311302538001',
|
||||
'ext': 'mp4',
|
||||
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
|
||||
'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
|
||||
'timestamp': 1486321708,
|
||||
'upload_date': '20170205',
|
||||
'uploader_id': '800000640001',
|
||||
},
|
||||
'only_matching': True,
|
||||
}],
|
||||
},
|
||||
{
|
||||
# Brightcove with UUID in videoPlayer
|
||||
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||
@ -730,6 +784,21 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# YouTube <object> embed
|
||||
{
|
||||
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
|
||||
'md5': '516718101ec834f74318df76259fb3cc',
|
||||
'info_dict': {
|
||||
'id': 'msN87y-iEx0',
|
||||
'ext': 'webm',
|
||||
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
|
||||
'upload_date': '20080526',
|
||||
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
|
||||
'uploader': 'Christopher Sykes',
|
||||
'uploader_id': 'ChristopherJSykes',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
# Camtasia studio
|
||||
{
|
||||
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
|
||||
@ -902,12 +971,13 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'url': 'https://skiplagged.com/',
|
||||
'info_dict': {
|
||||
'id': '1986',
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
'id': 'skiplagged',
|
||||
'title': 'Skiplagged: The smart way to find cheap flights',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'playlist_mincount': 1,
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
# Cinchcast embed
|
||||
{
|
||||
@ -990,6 +1060,20 @@ class GenericIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
# JWPlayer config passed as variable
|
||||
'url': 'http://www.txxx.com/videos/3326530/ariele/',
|
||||
'info_dict': {
|
||||
'id': '3326530_hq',
|
||||
'ext': 'mp4',
|
||||
'title': 'ARIELE | Tube Cup',
|
||||
'uploader': 'www.txxx.com',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@ -1065,6 +1149,21 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Kaltura iframe embed
|
||||
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
|
||||
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
|
||||
'info_dict': {
|
||||
'id': '0_f2cfbpwy',
|
||||
'ext': 'mp4',
|
||||
'title': 'I. M. Pei: A Centennial Celebration',
|
||||
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
|
||||
'upload_date': '20170403',
|
||||
'uploader_id': 'batchUser',
|
||||
'timestamp': 1491232186,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@ -1542,6 +1641,26 @@ class GenericIE(InfoExtractor):
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Senate ISVP iframe https
|
||||
'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
|
||||
'md5': 'fb8c70b0b515e5037981a2492099aab8',
|
||||
'info_dict': {
|
||||
'id': 'govtaff020316',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
},
|
||||
'add_ie': [SenateISVPIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# Limelight embeds (1 channel embed + 4 media embeds)
|
||||
'url': 'http://www.sedona.com/FacilitatorTraining2017',
|
||||
'info_dict': {
|
||||
'id': 'FacilitatorTraining2017',
|
||||
'title': 'Facilitator Training 2017',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -1584,7 +1703,7 @@ class GenericIE(InfoExtractor):
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'_type': 'url_transparent',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
})
|
||||
@ -1841,18 +1960,9 @@ class GenericIE(InfoExtractor):
|
||||
video_description = self._og_search_description(webpage, default=None)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for Brightcove Legacy Studio embeds
|
||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
self.to_screen('Brightcove video detected.')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||
@ -1867,30 +1977,30 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_urls = BrightcoveNewIE._extract_urls(webpage)
|
||||
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||
|
||||
# Look for ThePlatform embeds
|
||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||
if tp_urls:
|
||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
# Look for Vessel embeds
|
||||
vessel_urls = VesselIE._extract_urls(webpage)
|
||||
if vessel_urls:
|
||||
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
|
||||
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||
|
||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
||||
if vimeo_urls:
|
||||
return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
@ -1905,6 +2015,7 @@ class GenericIE(InfoExtractor):
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*|
|
||||
<object[^>]+data=|
|
||||
new\s+SWFObject\(
|
||||
)
|
||||
(["\'])
|
||||
@ -1912,25 +2023,25 @@ class GenericIE(InfoExtractor):
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: m[-1])
|
||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
||||
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches)
|
||||
return self.playlist_from_matches(matches, video_id, video_title)
|
||||
|
||||
# Look for embedded Dailymotion playlist player (#3822)
|
||||
m = re.search(
|
||||
@ -1939,8 +2050,8 @@ class GenericIE(InfoExtractor):
|
||||
playlists = re.findall(
|
||||
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
||||
if playlists:
|
||||
return _playlist_from_matches(
|
||||
playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||
return self.playlist_from_matches(
|
||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
match = re.search(
|
||||
@ -2047,8 +2158,9 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
||||
if embeds:
|
||||
return _playlist_from_matches(
|
||||
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
|
||||
return self.playlist_from_matches(
|
||||
embeds, video_id, video_title,
|
||||
getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
@ -2110,13 +2222,13 @@ class GenericIE(InfoExtractor):
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='BBCCoUk')
|
||||
return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
|
||||
|
||||
# Look for embedded RUTV player
|
||||
rutv_url = RUTVIE._extract_url(webpage)
|
||||
@ -2131,32 +2243,32 @@ class GenericIE(InfoExtractor):
|
||||
# Look for embedded SportBox player
|
||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||
if sportbox_urls:
|
||||
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded TNAFlixNetwork player
|
||||
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
|
||||
if tnaflix_urls:
|
||||
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||
return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded PornHub player
|
||||
pornhub_urls = PornHubIE._extract_urls(webpage)
|
||||
if pornhub_urls:
|
||||
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
|
||||
return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
|
||||
|
||||
# Look for embedded DrTuber player
|
||||
drtuber_urls = DrTuberIE._extract_urls(webpage)
|
||||
if drtuber_urls:
|
||||
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
|
||||
return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
|
||||
|
||||
# Look for embedded RedTube player
|
||||
redtube_urls = RedTubeIE._extract_urls(webpage)
|
||||
if redtube_urls:
|
||||
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
|
||||
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
@ -2202,12 +2314,12 @@ class GenericIE(InfoExtractor):
|
||||
# Look for embedded soundcloud player
|
||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||
if soundcloud_urls:
|
||||
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||
|
||||
# Look for tunein player
|
||||
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
||||
if tunein_urls:
|
||||
return _playlist_from_matches(tunein_urls)
|
||||
return self.playlist_from_matches(tunein_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
@ -2381,6 +2493,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(piksel_url, PikselIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
|
||||
if limelight_urls:
|
||||
return self.playlist_result(
|
||||
limelight_urls, video_id, video_title, video_description)
|
||||
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
@ -2490,35 +2607,35 @@ class GenericIE(InfoExtractor):
|
||||
# Look for DBTV embeds
|
||||
dbtv_urls = DBTVIE._extract_urls(webpage)
|
||||
if dbtv_urls:
|
||||
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
|
||||
return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
|
||||
|
||||
# Look for Videa embeds
|
||||
videa_urls = VideaIE._extract_urls(webpage)
|
||||
if videa_urls:
|
||||
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
||||
return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
|
||||
|
||||
# Look for 20 minuten embeds
|
||||
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
||||
if twentymin_urls:
|
||||
return _playlist_from_matches(
|
||||
twentymin_urls, ie=TwentyMinutenIE.ie_key())
|
||||
return self.playlist_from_matches(
|
||||
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
|
||||
|
||||
# Look for Openload embeds
|
||||
openload_urls = OpenloadIE._extract_urls(webpage)
|
||||
if openload_urls:
|
||||
return _playlist_from_matches(
|
||||
openload_urls, ie=OpenloadIE.ie_key())
|
||||
return self.playlist_from_matches(
|
||||
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
|
||||
|
||||
# Look for VideoPress embeds
|
||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||
if videopress_urls:
|
||||
return _playlist_from_matches(
|
||||
videopress_urls, ie=VideoPressIE.ie_key())
|
||||
return self.playlist_from_matches(
|
||||
videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
|
||||
|
||||
# Look for Rutube embeds
|
||||
rutube_urls = RutubeIE._extract_urls(webpage)
|
||||
if rutube_urls:
|
||||
return _playlist_from_matches(
|
||||
return self.playlist_from_matches(
|
||||
rutube_urls, ie=RutubeIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
@ -2545,17 +2662,14 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
jwplayer_data_str = self._find_jwplayer_data(webpage)
|
||||
if jwplayer_data_str:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(
|
||||
jwplayer_data_str, video_id, transform_source=js_to_json)
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False)
|
||||
if not info.get('title'):
|
||||
info['title'] = video_title
|
||||
except ExtractorError:
|
||||
pass
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
if not info.get('title'):
|
||||
info['title'] = video_title
|
||||
return info
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
@ -2564,7 +2678,7 @@ class GenericIE(InfoExtractor):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
|
||||
|
||||
def filter_video(urls):
|
||||
return list(filter(check_video, urls))
|
||||
@ -2630,11 +2744,14 @@ class GenericIE(InfoExtractor):
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
if new_url != url:
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
else:
|
||||
found = None
|
||||
|
||||
if not found:
|
||||
# twitter:player is a https URL to iframe player that may or may not
|
||||
|
126
youtube_dl/extractor/go90.py
Normal file
126
youtube_dl/extractor/go90.py
Normal file
@ -0,0 +1,126 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class Go90IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||
'info_dict': {
|
||||
'id': '84BUqjLpf9D',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
|
||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||
'timestamp': 1491868800,
|
||||
'upload_date': '20170411',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id,
|
||||
video_id, headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
||||
episode_number = int_or_none(video_data.get('episode_number'))
|
||||
series = None
|
||||
season = None
|
||||
season_id = None
|
||||
season_number = None
|
||||
for metadata in video_data.get('__children', {}).get('Item', {}).values():
|
||||
if metadata.get('type') == 'show':
|
||||
series = metadata.get('title')
|
||||
elif metadata.get('type') == 'season':
|
||||
season = metadata.get('title')
|
||||
season_id = metadata.get('id')
|
||||
season_number = int_or_none(metadata.get('season_number'))
|
||||
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in video_data.get('assets'):
|
||||
if asset.get('id') == main_video_asset:
|
||||
for source in asset.get('sources', []):
|
||||
source_location = source.get('location')
|
||||
if not source_location:
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
source_location, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for f in m3u8_formats:
|
||||
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
|
||||
if mobj:
|
||||
height, tbr = mobj.groups()
|
||||
height = int_or_none(height)
|
||||
f.update({
|
||||
'height': f.get('height') or height,
|
||||
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'tbr': f.get('tbr') or int_or_none(tbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
elif source_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_location, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source.get('name'),
|
||||
'url': source_location,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
})
|
||||
|
||||
for caption in asset.get('caption_metadata', []):
|
||||
caption_url = caption.get('source_url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': determine_ext(caption_url, 'vtt'),
|
||||
})
|
||||
elif asset.get('type') == 'image':
|
||||
asset_location = asset.get('location')
|
||||
if not asset_location:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': asset_location,
|
||||
'width': int_or_none(asset.get('width')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video_data.get('short_description'),
|
||||
'like_count': int_or_none(video_data.get('like_count')),
|
||||
'timestamp': parse_iso8601(video_data.get('released_at')),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season': season,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
@ -14,14 +15,26 @@ from ..utils import (
|
||||
|
||||
class HBOBaseIE(InfoExtractor):
|
||||
_FORMATS_INFO = {
|
||||
'pro7': {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
'1920': {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
'pro6': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'640': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'pro5': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'highwifi': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
@ -78,6 +91,19 @@ class HBOBaseIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url.replace('.tar', '/base_index_w8.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source.tag == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url.replace('.tar', '/base_index.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for f in m3u8_formats:
|
||||
if f.get('vcodec') == 'none' and not f.get('tbr'):
|
||||
f['tbr'] = int_or_none(self._search_regex(
|
||||
r'-(\d+)k/', f['url'], 'tbr', default=None))
|
||||
formats.extend(m3u8_formats)
|
||||
elif source.tag == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url.replace('.tar', '/manifest.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
format_info = self._FORMATS_INFO.get(source.tag, {})
|
||||
formats.append({
|
||||
@ -86,7 +112,7 @@ class HBOBaseIE(InfoExtractor):
|
||||
'width': format_info.get('width'),
|
||||
'height': format_info.get('height'),
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
card_sizes = xpath_element(video_data, 'titleCardSizes')
|
||||
@ -112,10 +138,11 @@ class HBOBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class HBOIE(HBOBaseIE):
|
||||
IE_NAME = 'hbo'
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
@ -131,11 +158,12 @@ class HBOIE(HBOBaseIE):
|
||||
|
||||
|
||||
class HBOEpisodeIE(HBOBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
|
||||
IE_NAME = 'hbo:episode'
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
|
||||
'md5': '689132b253cc0ab7434237fc3a293210',
|
||||
'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
|
||||
'info_dict': {
|
||||
'id': '1439518',
|
||||
'display_id': 'ep-52-inside-the-episode',
|
||||
@ -147,16 +175,19 @@ class HBOEpisodeIE(HBOBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content = self._download_json(
|
||||
'http://www.hbo.com/api/content/' + path, display_id)['content']
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
|
||||
webpage, 'video ID', group='video_id')
|
||||
video_id = compat_str((content.get('parsed', {}).get(
|
||||
'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
|
||||
|
||||
info_dict = self._extract_from_id(video_id)
|
||||
info_dict['display_id'] = display_id
|
||||
|
@ -116,13 +116,25 @@ class ITVIE(InfoExtractor):
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
})
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
@ -172,7 +184,9 @@ class ITVIE(InfoExtractor):
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
@ -189,7 +203,8 @@ class ITVIE(InfoExtractor):
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
@ -198,4 +213,5 @@ class ITVIE(InfoExtractor):
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
|
||||
}],
|
||||
},
|
||||
},
|
||||
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||
mobj = (
|
||||
re.search(
|
||||
r"""(?xs)
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
||||
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||
(?P<q1>['"])wid(?P=q1)\s*:\s*
|
||||
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||
""", webpage) or
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?P<q1>["'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
|
||||
(?P<q2>["'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage))
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage) or
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
<iframe[^>]+src=(?P<q1>["'])
|
||||
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||
(?:(?!(?P=q1)).)*
|
||||
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||
(?P=q1)
|
||||
''', webpage)
|
||||
)
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, source_url):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (lm[kind], video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind, video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (kind, video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind.capitalize(), video_id))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
@ -62,13 +99,21 @@ class LimelightBaseIE(InfoExtractor):
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
'abr': float_or_none(stream.get('audioBitRate')),
|
||||
'vbr': float_or_none(stream.get('videoBitRate')),
|
||||
'fps': float_or_none(stream.get('videoFrameRate')),
|
||||
'width': int_or_none(stream.get('videoWidthInPixels')),
|
||||
'height': int_or_none(stream.get('videoHeightInPixels')),
|
||||
'ext': ext,
|
||||
}
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
||||
width = int_or_none(stream.get('videoWidthInPixels'))
|
||||
height = int_or_none(stream.get('videoHeightInPixels'))
|
||||
vbr = float_or_none(stream.get('videoBitRate'))
|
||||
if width or height or vbr:
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
fmt['vcodec'] = 'none'
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
|
||||
if rtmp:
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
|
@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor):
|
||||
m3u8_url = video_data.get('m3u8_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
f4m_url = video_data.get('f4m_url')
|
||||
if f4m_url:
|
||||
@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor):
|
||||
if smil_url:
|
||||
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
|
||||
|
||||
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
|
||||
m3u8_url = stream_info.get('m3u8_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
|
||||
m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
rtsp_url = stream_info.get('rtsp_url')
|
||||
if rtsp_url:
|
||||
@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, video_data, video_id, entry_protocol):
|
||||
def _extract_video_formats(self, video_data, video_id):
|
||||
formats = []
|
||||
|
||||
progressive_url = video_data.get('progressiveUrl')
|
||||
@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
m3u8_url = video_data.get('httpUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
rtsp_url = video_data.get('rtspUrl')
|
||||
if rtsp_url:
|
||||
@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
}
|
||||
video_data = self._download_json(stream_url, content_id)
|
||||
is_live = video_data.get('isLive')
|
||||
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'title': self._live_title(info['title']) if is_live else info['title'],
|
||||
'formats': self._extract_video_formats(video_data, content_id, entry_protocol),
|
||||
'formats': self._extract_video_formats(video_data, content_id),
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
259
youtube_dl/extractor/medialaan.py
Normal file
259
youtube_dl/extractor/medialaan.py
Normal file
@ -0,0 +1,259 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class MedialaanIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||
(?:
|
||||
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
||||
(?:[^/]+/)*
|
||||
)
|
||||
)
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'medialaan'
|
||||
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
||||
_SITE_TO_APP_ID = {
|
||||
'vtm': 'vtm_watch',
|
||||
'q2': 'q2',
|
||||
'vtmkzoom': 'vtmkzoom',
|
||||
}
|
||||
_TESTS = [{
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
||||
'info_dict': {
|
||||
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
||||
'ext': 'mp4',
|
||||
'title': 'Allemaal Chris afl. 6',
|
||||
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
||||
'timestamp': 1487533280,
|
||||
'upload_date': '20170219',
|
||||
'duration': 2562,
|
||||
'series': 'Allemaal Chris',
|
||||
'season': 'Allemaal Chris',
|
||||
'season_number': 1,
|
||||
'season_id': '256936078124527',
|
||||
'episode': 'Allemaal Chris afl. 6',
|
||||
'episode_number': 6,
|
||||
'episode_id': '256936078591527',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtm.be/video?aid=168332',
|
||||
'info_dict': {
|
||||
'id': '168332',
|
||||
'ext': 'mp4',
|
||||
'title': '"Veronique liegt!"',
|
||||
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
||||
'timestamp': 1489002029,
|
||||
'upload_date': '20170308',
|
||||
'duration': 96,
|
||||
},
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video?aid=163157',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required()
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'sdk': 'js_6.1',
|
||||
'format': 'json',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
auth_info = self._download_json(
|
||||
'https://accounts.eu1.gigya.com/accounts.login', None,
|
||||
note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata(auth_data))
|
||||
|
||||
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error_message, expected=True)
|
||||
|
||||
self._uid = auth_info['UID']
|
||||
self._uid_signature = auth_info['UIDSignature']
|
||||
self._signature_timestamp = auth_info['signatureTimestamp']
|
||||
|
||||
self._logged_in = True
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, site_id = mobj.group('id', 'site_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
||||
webpage, 'config', default='{}'), video_id,
|
||||
transform_source=lambda s: s.replace(
|
||||
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
||||
|
||||
vod_id = config.get('vodId') or self._search_regex(
|
||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||
r'<[^>]+id=["\']vod-(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
|
||||
# clip, no authentication required
|
||||
if not vod_id:
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
||||
default=''),
|
||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||
if player:
|
||||
video = player[-1]
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video['videoUrl'],
|
||||
'title': video['title'],
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'timestamp': int_or_none(video.get('createdDate')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
||||
else:
|
||||
info = self._parse_html5_media_entries(
|
||||
url, webpage, video_id, m3u8_id='hls')[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta('description', webpage),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
})
|
||||
# vod, authentication required
|
||||
else:
|
||||
if not self._logged_in:
|
||||
self._login()
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', default='{}'),
|
||||
video_id)
|
||||
|
||||
def get(container, item):
|
||||
return try_get(
|
||||
settings, lambda x: x[container][item],
|
||||
compat_str) or self._search_regex(
|
||||
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
||||
default=None)
|
||||
|
||||
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
||||
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
||||
|
||||
data = self._download_json(
|
||||
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
||||
video_id, query={
|
||||
'app_id': app_id,
|
||||
'user_network': sso,
|
||||
'UID': self._uid,
|
||||
'UIDSignature': self._uid_signature,
|
||||
'signatureTimestamp': self._signature_timestamp,
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', m3u8_id='hls')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
api_key = get('vod', 'apiKey')
|
||||
channel = get('medialaanGigya', 'channel')
|
||||
|
||||
if api_key:
|
||||
videos = self._download_json(
|
||||
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
||||
query={
|
||||
'channels': channel,
|
||||
'ids': vod_id,
|
||||
'limit': 1,
|
||||
'apikey': api_key,
|
||||
})
|
||||
if videos:
|
||||
video = try_get(
|
||||
videos, lambda x: x['response']['videos'][0], dict)
|
||||
if video:
|
||||
def get(container, item, expected_type=None):
|
||||
return try_get(
|
||||
video, lambda x: x[container][item], expected_type)
|
||||
|
||||
def get_string(container, item):
|
||||
return get(container, item, compat_str)
|
||||
|
||||
info.update({
|
||||
'series': get_string('program', 'title'),
|
||||
'season': get_string('season', 'title'),
|
||||
'season_number': int_or_none(get('season', 'number')),
|
||||
'season_id': get_string('season', 'id'),
|
||||
'episode': get_string('episode', 'title'),
|
||||
'episode_number': int_or_none(get('episode', 'number')),
|
||||
'episode_id': get_string('episode', 'id'),
|
||||
'duration': int_or_none(
|
||||
video.get('duration')) or int_or_none(
|
||||
video.get('durationMillis'), scale=1000),
|
||||
'title': get_string('episode', 'title'),
|
||||
'description': get_string('episode', 'text'),
|
||||
'timestamp': unified_timestamp(get_string(
|
||||
'publication', 'begin')),
|
||||
})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = try_get(
|
||||
config, lambda x: x['videoConfig']['title'],
|
||||
compat_str) or self._html_search_regex(
|
||||
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
if not info.get('description'):
|
||||
info['description'] = self._html_search_regex(
|
||||
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
return info
|
70
youtube_dl/extractor/medici.py
Normal file
70
youtube_dl/extractor/medici.py
Normal file
@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class MediciIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
|
||||
'md5': '004c21bb0a57248085b6ff3fec72719d',
|
||||
'info_dict': {
|
||||
'id': '3059',
|
||||
'ext': 'flv',
|
||||
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
|
||||
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170408',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Sets csrftoken cookie
|
||||
self._download_webpage(url, video_id)
|
||||
|
||||
MEDICI_URL = 'http://www.medici.tv/'
|
||||
|
||||
data = self._download_json(
|
||||
MEDICI_URL, video_id,
|
||||
data=urlencode_postdata({
|
||||
'json': 'true',
|
||||
'page': '/%s' % video_id,
|
||||
'timezone_offset': -420,
|
||||
}), headers={
|
||||
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': MEDICI_URL,
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
video = data['video']['videos']['video1']
|
||||
|
||||
title = video.get('nom') or data['title']
|
||||
|
||||
video_id = video.get('id') or video_id
|
||||
formats = self._extract_f4m_formats(
|
||||
update_url_query(video['url_akamai'], {
|
||||
'hdcore': '3.1.0',
|
||||
'plugin=aasp': '3.1.0.43.124',
|
||||
}), video_id, f4m_id='hds')
|
||||
|
||||
description = data.get('meta_description')
|
||||
thumbnail = video.get('url_thumbnail') or data.get('main_image')
|
||||
upload_date = unified_strdate(data['video'].get('date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
@ -24,6 +25,9 @@ class MiTeleBaseIE(InfoExtractor):
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
if player_data.get('data-cms-id') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
|
@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>',
|
||||
r'm-tooltip=["\']([\d,.]+) plays'],
|
||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||
webpage, 'play count', default=None))
|
||||
|
||||
return {
|
||||
@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
def _get_user_description(self, page_content):
|
||||
return self._html_search_regex(
|
||||
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
|
||||
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
|
||||
page_content, 'user description', fatal=False)
|
||||
|
||||
|
||||
class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
|
||||
IE_NAME = 'mixcloud:user'
|
||||
|
||||
_TESTS = [{
|
||||
@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'dholbach_favorites',
|
||||
'title': 'Daniel Holbach (favorites)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
||||
},
|
||||
'params': {
|
||||
'playlist_items': '1-100',
|
||||
@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'dholbach_listens',
|
||||
'title': 'Daniel Holbach (listens)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
||||
},
|
||||
'params': {
|
||||
'playlist_items': '1-100',
|
||||
@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
|
||||
|
||||
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
|
||||
IE_NAME = 'mixcloud:playlist'
|
||||
|
||||
_TESTS = [{
|
||||
@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
|
||||
'info_dict': {
|
||||
'id': 'maxvibes_jazzcat-on-ness-radio',
|
||||
'title': 'Jazzcat on Ness Radio',
|
||||
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
|
||||
},
|
||||
'playlist_mincount': 23
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
playlist_id = mobj.group('playlist')
|
||||
video_id = '%s_%s' % (user_id, playlist_id)
|
||||
|
||||
profile = self._download_webpage(
|
||||
webpage = self._download_webpage(
|
||||
url, user_id,
|
||||
note='Downloading playlist page',
|
||||
errnote='Unable to download playlist page')
|
||||
|
||||
description = self._get_user_description(profile)
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
|
||||
profile, 'playlist title')
|
||||
title = self._html_search_regex(
|
||||
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
|
||||
webpage, 'playlist title',
|
||||
default=None) or self._og_search_title(webpage, fatal=False)
|
||||
description = self._get_user_description(webpage)
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(
|
||||
@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, video_id, playlist_title, description)
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
|
||||
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
|
||||
IE_NAME = 'mixcloud:stream'
|
||||
|
||||
_TEST = {
|
||||
|
@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stack_base_url + 'f4m', stack_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': 'mp4',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
class NPOLiveIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.npo.nl/live/npo-1',
|
||||
'info_dict': {
|
||||
'id': 'LI_NL1_4188102',
|
||||
@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id = self._match_id(url) or 'npo-1'
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
@ -176,14 +177,32 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'format_id': f['name'],
|
||||
'quality': quality(f['name']),
|
||||
} for f in metadata['videos']]
|
||||
|
||||
m3u8_url = metadata.get('hlsManifestUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
dash_manifest = metadata.get('metadataEmbedded')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(dash_manifest), 'mpd'))
|
||||
|
||||
for fmt in formats:
|
||||
fmt_type = self._search_regex(
|
||||
r'\btype[/=](\d)', fmt['url'],
|
||||
'format type', default=None)
|
||||
if fmt_type:
|
||||
fmt['quality'] = quality(fmt_type)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
|
@ -75,38 +75,38 @@ class OpenloadIE(InfoExtractor):
|
||||
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
|
||||
webpage, 'openload ID')
|
||||
|
||||
video_url_chars = []
|
||||
|
||||
first_char = ord(ol_id[0])
|
||||
key = first_char - 50
|
||||
maxKey = max(2, key)
|
||||
key = min(maxKey, len(ol_id) - 22)
|
||||
t = ol_id[key:key + 20]
|
||||
|
||||
hashMap = {}
|
||||
v = ol_id.replace(t, "")
|
||||
h = 0
|
||||
|
||||
while h < len(t):
|
||||
f = t[h:h + 2]
|
||||
i = int(f, 16)
|
||||
hashMap[h / 2] = i
|
||||
h += 2
|
||||
|
||||
h = 0
|
||||
|
||||
while h < len(v):
|
||||
B = v[h:h + 2]
|
||||
i = int(B, 16)
|
||||
index = (h / 2) % 10
|
||||
A = hashMap[index]
|
||||
i = i ^ 137
|
||||
i = i ^ A
|
||||
video_url_chars.append(compat_chr(i))
|
||||
h += 2
|
||||
decoded = ''
|
||||
a = ol_id[0:24]
|
||||
b = []
|
||||
for i in range(0, len(a), 8):
|
||||
b.append(int(a[i:i + 8] or '0', 16))
|
||||
ol_id = ol_id[24:]
|
||||
j = 0
|
||||
k = 0
|
||||
while j < len(ol_id):
|
||||
c = 128
|
||||
d = 0
|
||||
e = 0
|
||||
f = 0
|
||||
_more = True
|
||||
while _more:
|
||||
if j + 1 >= len(ol_id):
|
||||
c = 143
|
||||
f = int(ol_id[j:j + 2] or '0', 16)
|
||||
j += 2
|
||||
d += (f & 127) << e
|
||||
e += 7
|
||||
_more = f >= c
|
||||
g = d ^ b[k % 3]
|
||||
for i in range(4):
|
||||
char_dec = (g >> 8 * i) & (c + 127)
|
||||
char = compat_chr(char_dec)
|
||||
if char != '#':
|
||||
decoded += char
|
||||
k += 1
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true'
|
||||
video_url = video_url % (''.join(video_url_chars))
|
||||
video_url = video_url % decoded
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
|
138
youtube_dl/extractor/packtpub.py
Normal file
138
youtube_dl/extractor/packtpub.py
Normal file
@ -0,0 +1,138 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PacktPubBaseIE(InfoExtractor):
|
||||
_PACKT_BASE = 'https://www.packtpub.com'
|
||||
_MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
|
||||
|
||||
|
||||
class PacktPubIE(PacktPubBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
|
||||
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
|
||||
'info_dict': {
|
||||
'id': '20530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Project Intro',
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg',
|
||||
'timestamp': 1490918400,
|
||||
'upload_date': '20170331',
|
||||
},
|
||||
}
|
||||
|
||||
def _handle_error(self, response):
|
||||
if response.get('status') != 'success':
|
||||
raise ExtractorError(
|
||||
'% said: %s' % (self.IE_NAME, response['message']),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
response = super(PacktPubIE, self)._download_json(*args, **kwargs)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, chapter_id, video_id = mobj.group(
|
||||
'course_id', 'chapter_id', 'id')
|
||||
|
||||
video = self._download_json(
|
||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||
'Downloading JSON video')['data']
|
||||
|
||||
content = video.get('content')
|
||||
if not content:
|
||||
raise ExtractorError('This video is locked', expected=True)
|
||||
|
||||
video_url = content['file']
|
||||
|
||||
metadata = self._download_json(
|
||||
'%s/products/%s/chapters/%s/sections/%s/metadata'
|
||||
% (self._MAPT_REST, course_id, chapter_id, video_id),
|
||||
video_id)['data']
|
||||
|
||||
title = metadata['pageTitle']
|
||||
course_title = metadata.get('title')
|
||||
if course_title:
|
||||
title = remove_end(title, ' - %s' % course_title)
|
||||
timestamp = unified_timestamp(metadata.get('publicationDate'))
|
||||
thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
|
||||
class PacktPubCourseIE(PacktPubBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
|
||||
'info_dict': {
|
||||
'id': '9781787122215',
|
||||
'title': 'Learn Nodejs by building 12 projects [Video]',
|
||||
},
|
||||
'playlist_count': 90,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PacktPubIE.suitable(url) else super(
|
||||
PacktPubCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url, course_id = mobj.group('url', 'id')
|
||||
|
||||
course = self._download_json(
|
||||
'%s/products/%s/metadata' % (self._MAPT_REST, course_id),
|
||||
course_id)['data']
|
||||
|
||||
entries = []
|
||||
for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
|
||||
if chapter.get('type') != 'chapter':
|
||||
continue
|
||||
children = chapter.get('children')
|
||||
if not isinstance(children, list):
|
||||
continue
|
||||
chapter_info = {
|
||||
'chapter': chapter.get('title'),
|
||||
'chapter_number': chapter_num,
|
||||
'chapter_id': chapter.get('id'),
|
||||
}
|
||||
for section in children:
|
||||
if section.get('type') != 'section':
|
||||
continue
|
||||
section_url = section.get('seoUrl')
|
||||
if not isinstance(section_url, compat_str):
|
||||
continue
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': urljoin(url + '/', section_url),
|
||||
'title': strip_or_none(section.get('title')),
|
||||
'description': clean_html(section.get('summary')),
|
||||
'ie_key': PacktPubIE.ie_key(),
|
||||
}
|
||||
entry.update(chapter_info)
|
||||
entries.append(entry)
|
||||
|
||||
return self.playlist_result(entries, course_id, course.get('title'))
|
@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
|
||||
class PeriscopeIE(PeriscopeBaseIE):
|
||||
IE_DESC = 'Periscope'
|
||||
IE_NAME = 'periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
|
||||
# Alive example URLs can be found here http://onperiscope.com/
|
||||
_TESTS = [{
|
||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||
@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
|
||||
|
||||
|
||||
class PeriscopeUserIE(PeriscopeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
|
||||
IE_DESC = 'Periscope user videos'
|
||||
IE_NAME = 'periscope:user'
|
||||
|
||||
|
@ -40,7 +40,7 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Management of SQL Server - Demo Monitoring',
|
||||
'title': 'Demo Monitoring',
|
||||
'duration': 338,
|
||||
},
|
||||
'skip': 'Requires pluralsight account credentials',
|
||||
@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
|
||||
collection = course['modules']
|
||||
|
||||
module, clip = None, None
|
||||
clip = None
|
||||
|
||||
for module_ in collection:
|
||||
if name in (module_.get('moduleName'), module_.get('name')):
|
||||
module = module_
|
||||
for clip_ in module_.get('clips', []):
|
||||
clip_index = clip_.get('clipIndex')
|
||||
if clip_index is None:
|
||||
@ -187,7 +186,7 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
if not clip:
|
||||
raise ExtractorError('Unable to resolve clip')
|
||||
|
||||
title = '%s - %s' % (module['title'], clip['title'])
|
||||
title = clip['title']
|
||||
|
||||
QUALITIES = {
|
||||
'low': {'width': 640, 'height': 480},
|
||||
|
@ -1,7 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import operator
|
||||
# import os
|
||||
import re
|
||||
|
||||
@ -18,6 +20,7 @@ from ..utils import (
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
# sanitized_Request,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
)
|
||||
# from ..aes import (
|
||||
@ -129,9 +132,32 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
|
||||
'video url', group='url')
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||
|
@ -1,23 +1,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class RaiBaseIE(InfoExtractor):
|
||||
def _extract_relinker_formats(self, relinker_url, video_id):
|
||||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_GEO_COUNTRIES = ['IT']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id):
|
||||
formats = []
|
||||
geoprotection = None
|
||||
is_live = None
|
||||
duration = None
|
||||
|
||||
for platform in ('mon', 'flash', 'native'):
|
||||
relinker = self._download_xml(
|
||||
@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor):
|
||||
query={'output': 45, 'pl': platform},
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
|
||||
if not geoprotection:
|
||||
geoprotection = xpath_text(
|
||||
relinker, './geoprotection', default=None) == 'Y'
|
||||
|
||||
if not is_live:
|
||||
is_live = xpath_text(
|
||||
relinker, './is_live', default=None) == 'Y'
|
||||
if not duration:
|
||||
duration = parse_duration(xpath_text(
|
||||
relinker, './duration', default=None))
|
||||
|
||||
url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
|
||||
if url_elem is None:
|
||||
continue
|
||||
|
||||
media_url = url_elem.text
|
||||
|
||||
# This does not imply geo restriction (e.g.
|
||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
||||
self.raise_geo_restricted()
|
||||
continue
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
||||
@ -53,215 +88,333 @@ class RaiBaseIE(InfoExtractor):
|
||||
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
|
||||
})
|
||||
|
||||
return formats
|
||||
if not formats and geoprotection is True:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
def _extract_from_content_id(self, content_id, base_url):
|
||||
return dict((k, v) for k, v in {
|
||||
'is_live': is_live,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles(url, subtitle_url):
|
||||
subtitles = {}
|
||||
if subtitle_url and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = urljoin(url, subtitle_url)
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
subtitles['it'] = [{
|
||||
'ext': 'stl',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
if subtitle_url.endswith(STL_EXT):
|
||||
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'].append({
|
||||
'ext': 'srt',
|
||||
'url': srt_url,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
|
||||
class RaiPlayIE(RaiBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
|
||||
_TESTS = [{
|
||||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
|
||||
'md5': '340aa3b7afb54bfd14a8c11786450d76',
|
||||
'info_dict': {
|
||||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
|
||||
'ext': 'mp4',
|
||||
'title': 'La Casa Bianca',
|
||||
'alt_title': 'S2016 - Puntata del 23/10/2016',
|
||||
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai 3',
|
||||
'creator': 'Rai 3',
|
||||
'duration': 3278,
|
||||
'timestamp': 1477764300,
|
||||
'upload_date': '20161029',
|
||||
'series': 'La Casa Bianca',
|
||||
'season': '2016',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||
'info_dict': {
|
||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||
'ext': 'mp4',
|
||||
'title': 'Report del 07/04/2014',
|
||||
'alt_title': 'S2013/14 - Puntata del 07/04/2014',
|
||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai 5',
|
||||
'creator': 'Rai 5',
|
||||
'duration': 6160,
|
||||
'series': 'Report',
|
||||
'season_number': 5,
|
||||
'season': '2013/14',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url, video_id = mobj.group('url', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'%s?json' % url, video_id, 'Downloading video JSON')
|
||||
|
||||
title = media['name']
|
||||
|
||||
video = media['video']
|
||||
|
||||
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
|
||||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
thumbnails = []
|
||||
if 'images' in media:
|
||||
for _, value in media.get('images').items():
|
||||
if value:
|
||||
thumbnails.append({
|
||||
'url': value.replace('[RESOLUTION]', '600x400')
|
||||
})
|
||||
|
||||
timestamp = unified_timestamp(try_get(
|
||||
media, lambda x: x['availabilities'][0]['start'], compat_str))
|
||||
|
||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description'),
|
||||
'uploader': media.get('channel'),
|
||||
'creator': media.get('editor'),
|
||||
'duration': parse_duration(video.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'series': try_get(
|
||||
media, lambda x: x['isPartOf']['name'], compat_str),
|
||||
'season_number': int_or_none(try_get(
|
||||
media, lambda x: x['isPartOf']['numeroStagioni'])),
|
||||
'season': media.get('stagione') or None,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info.update(relinker_info)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class RaiIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||
_TESTS = [{
|
||||
# var uniquename = "ContentItem-..."
|
||||
# data-id="ContentItem-..."
|
||||
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
||||
'info_dict': {
|
||||
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG PRIMO TEMPO',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1758,
|
||||
'upload_date': '20140612',
|
||||
}
|
||||
}, {
|
||||
# with ContentItem in many metas
|
||||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||
'info_dict': {
|
||||
'id': '1632c009-c843-4836-bb65-80c33084a64b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
|
||||
'description': 'I film in uscita questa settimana.',
|
||||
'thumbnail': r're:^https?://.*\.png$',
|
||||
'duration': 833,
|
||||
'upload_date': '20161103',
|
||||
}
|
||||
}, {
|
||||
# with ContentItem in og:url
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||
'md5': '11959b4e44fa74de47011b5799490adf',
|
||||
'info_dict': {
|
||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG1 ore 20:00 del 03/11/2016',
|
||||
'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2214,
|
||||
'upload_date': '20161103',
|
||||
}
|
||||
}, {
|
||||
# drawMediaRaiTV(...)
|
||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
||||
'info_dict': {
|
||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il pacco',
|
||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
}, {
|
||||
# initEdizione('ContentItem-...'
|
||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||
'info_dict': {
|
||||
'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
|
||||
'duration': 2274,
|
||||
'upload_date': '20170401',
|
||||
},
|
||||
'skip': 'Changes daily',
|
||||
}, {
|
||||
# HDS live stream with only relinker URL
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
||||
'info_dict': {
|
||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
||||
'ext': 'flv',
|
||||
'title': 'EuroNews',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# HLS live stream with ContentItem in og:url
|
||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||
'info_dict': {
|
||||
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
|
||||
'ext': 'mp4',
|
||||
'title': 'La diretta di Rainews24',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_from_content_id(self, content_id, url):
|
||||
media = self._download_json(
|
||||
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
|
||||
content_id, 'Downloading video JSON')
|
||||
|
||||
title = media['name'].strip()
|
||||
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
relinker_info = {
|
||||
'formats': {
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
}
|
||||
}
|
||||
elif 'Video' in media_type:
|
||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||
else:
|
||||
raise ExtractorError('not a media file')
|
||||
|
||||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
thumbnails = []
|
||||
for image_type in ('image', 'image_medium', 'image_300'):
|
||||
thumbnail_url = media.get(image_type)
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
|
||||
'url': compat_urlparse.urljoin(url, thumbnail_url),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
formats.append({
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
})
|
||||
elif 'Video' in media_type:
|
||||
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
raise ExtractorError('not a media file')
|
||||
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
|
||||
|
||||
subtitles = {}
|
||||
captions = media.get('subtitlesUrl')
|
||||
if captions:
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = [{
|
||||
'ext': 'srt',
|
||||
'url': captions,
|
||||
}]
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': content_id,
|
||||
'title': media['name'],
|
||||
'description': media.get('desc'),
|
||||
'title': title,
|
||||
'description': strip_or_none(media.get('desc')),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': media.get('author'),
|
||||
'upload_date': unified_strdate(media.get('date')),
|
||||
'duration': parse_duration(media.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info.update(relinker_info)
|
||||
|
||||
class RaiTVIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||
'info_dict': {
|
||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||
'ext': 'mp4',
|
||||
'title': 'Report del 07/04/2014',
|
||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
||||
'upload_date': '20140407',
|
||||
'duration': 6160,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
# no m3u8 stream
|
||||
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
||||
# HDS download, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
||||
'ext': 'flv',
|
||||
'title': 'TG PRIMO TEMPO',
|
||||
'upload_date': '20140612',
|
||||
'duration': 1758,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Geo-restricted to Italy',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
|
||||
'md5': '35cf7c229f22eeef43e48b5cf923bef0',
|
||||
'info_dict': {
|
||||
'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
|
||||
'ext': 'mp4',
|
||||
'title': 'State of the Net, Antonella La Carpia: regole virali',
|
||||
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
|
||||
'upload_date': '20140613',
|
||||
},
|
||||
'skip': 'Error 404',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
|
||||
'info_dict': {
|
||||
'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alluvione in Sardegna e dissesto idrogeologico',
|
||||
'description': 'Edizione delle ore 20:30 ',
|
||||
},
|
||||
'skip': 'invalid urls',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
|
||||
'md5': 'e57493e1cb8bc7c564663f363b171847',
|
||||
'info_dict': {
|
||||
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
|
||||
'description': 'md5:364b604f7db50594678f483353164fb8',
|
||||
'upload_date': '20140923',
|
||||
'duration': 386,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
]
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_from_content_id(video_id, url)
|
||||
|
||||
|
||||
class RaiIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
||||
'info_dict': {
|
||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il pacco',
|
||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Direct relinker URL
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
||||
# HDS live stream, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
||||
'ext': 'flv',
|
||||
'title': 'EuroNews',
|
||||
},
|
||||
'skip': 'Geo-restricted to Italy',
|
||||
},
|
||||
{
|
||||
# Embedded content item ID
|
||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||
'md5': '84c1135ce960e8822ae63cec34441d63',
|
||||
'info_dict': {
|
||||
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG1 ore 20:00 del 02/07/2016',
|
||||
'upload_date': '20160702',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||
# HDS live stream, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
|
||||
'ext': 'flv',
|
||||
'title': 'La diretta di Rainews24',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
|
||||
r'drawMediaRaiTV\(["\'](.+?)["\']'],
|
||||
webpage, 'iframe', default=None)
|
||||
if iframe_url:
|
||||
if not iframe_url.startswith('http'):
|
||||
iframe_url = compat_urlparse.urljoin(url, iframe_url)
|
||||
return self.url_result(iframe_url)
|
||||
content_item_id = None
|
||||
|
||||
content_item_id = self._search_regex(
|
||||
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
|
||||
webpage, 'content item ID', group='content_id', default=None)
|
||||
content_item_url = self._html_search_meta(
|
||||
('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
|
||||
'twitter:player', 'jsonlink'), webpage, default=None)
|
||||
if content_item_url:
|
||||
content_item_id = self._search_regex(
|
||||
r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
|
||||
'content item id', default=None)
|
||||
|
||||
if not content_item_id:
|
||||
content_item_id = self._search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
(?:initEdizione|drawMediaRaiTV)\(|
|
||||
<(?:[^>]+\bdata-id|var\s+uniquename)=
|
||||
)
|
||||
(["\'])
|
||||
(?:(?!\1).)*\bContentItem-(?P<id>%s)
|
||||
''' % self._UUID_RE,
|
||||
webpage, 'content item id', default=None, group='id')
|
||||
|
||||
content_item_ids = set()
|
||||
if content_item_id:
|
||||
return self._extract_from_content_id(content_item_id, url)
|
||||
content_item_ids.add(content_item_id)
|
||||
if video_id not in content_item_ids:
|
||||
content_item_ids.add(video_id)
|
||||
|
||||
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
|
||||
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
|
||||
webpage, 'relinker URL', group='url'))
|
||||
formats = self._extract_relinker_formats(relinker_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
for content_item_id in content_item_ids:
|
||||
try:
|
||||
return self._extract_from_content_id(content_item_id, url)
|
||||
except GeoRestrictedError:
|
||||
raise
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
relinker_url = self._search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
var\s+videoURL|
|
||||
mediaInfo\.mediaUri
|
||||
)\s*=\s*
|
||||
([\'"])
|
||||
(?P<url>
|
||||
(?:https?:)?
|
||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||
''',
|
||||
webpage, 'relinker URL', group='url')
|
||||
|
||||
relinker_info = self._extract_relinker_info(
|
||||
urljoin(url, relinker_url), video_id)
|
||||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
title = self._search_regex(
|
||||
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
|
||||
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
info.update(relinker_info)
|
||||
|
||||
return info
|
||||
|
@ -13,15 +13,15 @@ from ..utils import (
|
||||
|
||||
|
||||
class RBMARadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
|
||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
'info_dict': {
|
||||
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||
'ext': 'mp3',
|
||||
'title': 'Main Stage - Ford & Lopatin',
|
||||
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
|
||||
'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2452,
|
||||
'timestamp': 1307103164,
|
||||
|
@ -2,11 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
# unified_timestamp,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -15,15 +17,15 @@ class RedBullTVIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
||||
'md5': '78e860f631d7a846e712fab8c5fe2c38',
|
||||
'md5': 'fb0445b98aa4394e504b413d98031d1f',
|
||||
'info_dict': {
|
||||
'id': 'AP-1Q756YYX51W11',
|
||||
'ext': 'mp4',
|
||||
'title': 'ABC of...WRC',
|
||||
'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
|
||||
'duration': 1582.04,
|
||||
'timestamp': 1488405786,
|
||||
'upload_date': '20170301',
|
||||
# 'timestamp': 1488405786,
|
||||
# 'upload_date': '20170301',
|
||||
},
|
||||
}, {
|
||||
# episode
|
||||
@ -34,8 +36,8 @@ class RedBullTVIE(InfoExtractor):
|
||||
'title': 'Grime - Hashtags S2 E4',
|
||||
'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
|
||||
'duration': 904.6,
|
||||
'timestamp': 1487290093,
|
||||
'upload_date': '20170217',
|
||||
# 'timestamp': 1487290093,
|
||||
# 'upload_date': '20170217',
|
||||
'series': 'Hashtags',
|
||||
'season_number': 2,
|
||||
'episode_number': 4,
|
||||
@ -48,29 +50,40 @@ class RedBullTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://api-v2.redbull.tv/start', video_id,
|
||||
session = self._download_json(
|
||||
'https://api-v2.redbull.tv/session', video_id,
|
||||
note='Downloading access token', query={
|
||||
'build': '4.0.9',
|
||||
'category': 'smartphone',
|
||||
'os_version': 23,
|
||||
'os_family': 'android',
|
||||
})['auth']['access_token']
|
||||
'build': '4.370.0',
|
||||
'category': 'personal_computer',
|
||||
'os_version': '1.0',
|
||||
'os_family': 'http',
|
||||
})
|
||||
if session.get('code') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, session['message']))
|
||||
auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
|
||||
|
||||
info = self._download_json(
|
||||
'https://api-v2.redbull.tv/views/%s' % video_id,
|
||||
video_id, note='Downloading video information',
|
||||
headers={'Authorization': 'Bearer ' + access_token}
|
||||
)['blocks'][0]['top'][0]
|
||||
try:
|
||||
info = self._download_json(
|
||||
'https://api-v2.redbull.tv/content/%s' % video_id,
|
||||
video_id, note='Downloading video information',
|
||||
headers={'Authorization': auth}
|
||||
)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
error_message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error_message), expected=True)
|
||||
raise
|
||||
|
||||
video = info['video_product']
|
||||
|
||||
title = info['title'].strip()
|
||||
m3u8_url = video['url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
video['url'], video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for _, captions in (try_get(
|
||||
@ -82,9 +95,12 @@ class RedBullTVIE(InfoExtractor):
|
||||
caption_url = caption.get('url')
|
||||
if not caption_url:
|
||||
continue
|
||||
ext = caption.get('format')
|
||||
if ext == 'xml':
|
||||
ext = 'ttml'
|
||||
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
||||
'url': caption_url,
|
||||
'ext': caption.get('format'),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
subheading = info.get('subheading')
|
||||
@ -97,7 +113,7 @@ class RedBullTVIE(InfoExtractor):
|
||||
'description': info.get('long_description') or info.get(
|
||||
'short_description'),
|
||||
'duration': float_or_none(video.get('duration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('published')),
|
||||
# 'timestamp': unified_timestamp(info.get('published')),
|
||||
'series': info.get('show_title'),
|
||||
'season_number': int_or_none(info.get('season_number')),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
|
@ -1,13 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RTL2IE(InfoExtractor):
|
||||
IE_NAME = 'rtl2'
|
||||
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||
@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class RTL2YouBaseIE(InfoExtractor):
|
||||
_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
|
||||
|
||||
|
||||
class RTL2YouIE(RTL2YouBaseIE):
|
||||
IE_NAME = 'rtl2:you'
|
||||
_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
|
||||
'info_dict': {
|
||||
'id': '15740',
|
||||
'ext': 'mp4',
|
||||
'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
|
||||
'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
|
||||
'age_limit': 12,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
stream_data = self._download_json(
|
||||
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
|
||||
|
||||
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
|
||||
stream_url = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(data)),
|
||||
bytes_to_intlist(self._AES_KEY),
|
||||
bytes_to_intlist(base64.b64decode(iv))
|
||||
))
|
||||
if b'rtl2_you_video_not_found' in stream_url:
|
||||
raise ExtractorError('video not found', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url[:-compat_ord(stream_url[-1])].decode(),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_data = self._download_json(
|
||||
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
|
||||
|
||||
series = video_data.get('formatTitle')
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': strip_or_none(video_data.get('description')),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'age_limit': int_or_none(video_data.get('minimumAge')),
|
||||
}
|
||||
|
||||
|
||||
class RTL2YouSeriesIE(RTL2YouBaseIE):
|
||||
IE_NAME = 'rtl2:you:series'
|
||||
_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://you.rtl2.de/videos/115/dragon-ball',
|
||||
'info_dict': {
|
||||
'id': '115',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
stream_data = self._download_json(
|
||||
self._BACKWERK_BASE_URL + 'videos',
|
||||
series_id, query={
|
||||
'formatId': series_id,
|
||||
'limit': 1000000000,
|
||||
})
|
||||
|
||||
entries = []
|
||||
for video in stream_data.get('videos', []):
|
||||
video_id = compat_str(video['videoId'])
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
|
||||
'RTL2You', video_id))
|
||||
return self.playlist_result(entries, series_id)
|
||||
|
@ -26,7 +26,7 @@ class RudoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_url(self, webpage):
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage)
|
||||
|
@ -89,7 +89,7 @@ class SenateISVPIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _search_iframe_url(webpage):
|
||||
mobj = re.search(
|
||||
r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
|
||||
r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
@ -65,7 +65,7 @@ class StreamableIE(InfoExtractor):
|
||||
# to return video info like the title properly sometimes, and doesn't
|
||||
# include info like the video duration
|
||||
video = self._download_json(
|
||||
'https://streamable.com/ajax/videos/%s' % video_id, video_id)
|
||||
'https://ajax.streamable.com/videos/%s' % video_id, video_id)
|
||||
|
||||
# Format IDs:
|
||||
# 0 The video is being uploaded
|
||||
|
64
youtube_dl/extractor/streamango.py
Normal file
64
youtube_dl/extractor/streamango.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class StreamangoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'md5': 'e992787515a182f55e38fc97588d802a',
|
||||
'info_dict': {
|
||||
'id': 'clapasobsptpkdfe',
|
||||
'ext': 'mp4',
|
||||
'title': '20170315_150006.mp4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||
video = self._parse_json(
|
||||
format_, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video:
|
||||
continue
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src, default_ext=None)
|
||||
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': ext or 'mp4',
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@ -44,6 +44,10 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ooyala video
|
||||
'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
32
youtube_dl/extractor/thesun.py
Normal file
32
youtube_dl/extractor/thesun.py
Normal file
@ -0,0 +1,32 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TheSunIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
|
||||
'info_dict': {
|
||||
'id': '2261604',
|
||||
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
|
||||
entries = []
|
||||
for ooyala_id in re.findall(
|
||||
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
|
||||
webpage):
|
||||
entries.append(OoyalaIE._build_url_result(ooyala_id))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage, fatal=False))
|
81
youtube_dl/extractor/toongoggles.py
Normal file
81
youtube_dl/extractor/toongoggles.py
Normal file
@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ToonGogglesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football',
|
||||
'md5': '18289fc2b951eff6b953a9d8f01e6831',
|
||||
'info_dict': {
|
||||
'id': '217147',
|
||||
'ext': 'mp4',
|
||||
'title': 'Football',
|
||||
'uploader_id': '1',
|
||||
'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.',
|
||||
'upload_date': '20160718',
|
||||
'timestamp': 1468879330,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world',
|
||||
'info_dict': {
|
||||
'id': '227759',
|
||||
'title': 'Om Nom Stories Around The World',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
|
||||
def _call_api(self, action, page_id, query):
|
||||
query.update({
|
||||
'for_ng': 1,
|
||||
'for_web': 1,
|
||||
'show_meta': 1,
|
||||
'version': 7.0,
|
||||
})
|
||||
return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query)
|
||||
|
||||
def _parse_episode_data(self, episode_data):
|
||||
title = episode_data['episode_name']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': episode_data['episode_id'],
|
||||
'title': title,
|
||||
'url': 'kaltura:513551:' + episode_data['entry_id'],
|
||||
'thumbnail': episode_data.get('thumbnail_url'),
|
||||
'description': episode_data.get('description'),
|
||||
'duration': parse_duration(episode_data.get('hms')),
|
||||
'series': episode_data.get('show_name'),
|
||||
'season_number': int_or_none(episode_data.get('season_num')),
|
||||
'episode_id': episode_data.get('episode_id'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(episode_data.get('episode_num')),
|
||||
'categories': episode_data.get('categories'),
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
if episode_id:
|
||||
episode_data = self._call_api('search', episode_id, {
|
||||
'filter': 'episode',
|
||||
'id': episode_id,
|
||||
})['objects'][0]
|
||||
return self._parse_episode_data(episode_data)
|
||||
else:
|
||||
show_data = self._call_api('getepisodesbyshow', show_id, {
|
||||
'max': 1000000000,
|
||||
'showid': show_id,
|
||||
})
|
||||
entries = []
|
||||
for episode_data in show_data.get('objects', []):
|
||||
entries.append(self._parse_episode_data(episode_data))
|
||||
return self.playlist_result(entries, show_id, show_data.get('show_name'))
|
62
youtube_dl/extractor/tv2hu.py
Normal file
62
youtube_dl/extractor/tv2hu.py
Normal file
@ -0,0 +1,62 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TV2HuIE(InfoExtractor):
|
||||
IE_NAME = 'tv2.hu'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
|
||||
'md5': '585e58e2e090f34603804bb2c48e98d8',
|
||||
'info_dict': {
|
||||
'id': '217679',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ezek megőrültek! - 1. adás 1. rész',
|
||||
'upload_date': '20160826',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._search_regex(
|
||||
r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
|
||||
json_data = self._download_json(json_url, video_id)
|
||||
|
||||
formats = []
|
||||
for b in ('bitrates', 'backupBitrates'):
|
||||
bitrates = json_data.get(b, {})
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
|
||||
|
||||
for mp4_url in bitrates.get('mp4', []):
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
|
||||
formats.append({
|
||||
'format_id': 'http' + ('-%d' % height if height else ''),
|
||||
'url': mp4_url,
|
||||
'height': height,
|
||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage).strip(),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': self._search_regex(
|
||||
r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
|
||||
'formats': formats,
|
||||
}
|
79
youtube_dl/extractor/tv5mondeplus.py
Normal file
79
youtube_dl/extractor/tv5mondeplus.py
Normal file
@ -0,0 +1,79 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TV5MondePlusIE(InfoExtractor):
|
||||
IE_DESC = 'TV5MONDE+'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
|
||||
'md5': '12130fc199f020673138a83466542ec6',
|
||||
'info_dict': {
|
||||
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tdah, mon amour - Enfants',
|
||||
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
|
||||
'upload_date': '20170401',
|
||||
'timestamp': 1491022860,
|
||||
}
|
||||
}
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
||||
self.raise_geo_restricted(countries=['FR'])
|
||||
|
||||
series = get_element_by_class('video-detail__title', webpage)
|
||||
title = episode = get_element_by_class(
|
||||
'video-detail__subtitle', webpage) or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
vpl_data = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+class="video_player_loader"[^>]+>)',
|
||||
webpage, 'video player loader'))
|
||||
|
||||
video_files = self._parse_json(
|
||||
vpl_data['data-broadcast'], display_id).get('files', [])
|
||||
formats = []
|
||||
for video_file in video_files:
|
||||
v_url = video_file.get('url')
|
||||
if not v_url:
|
||||
continue
|
||||
video_format = video_file.get('format') or determine_ext(v_url)
|
||||
if video_format == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, display_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
|
||||
'thumbnail': vpl_data.get('data-image'),
|
||||
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
|
||||
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
|
||||
'formats': formats,
|
||||
'episode': episode,
|
||||
'series': series,
|
||||
}
|
@ -31,9 +31,8 @@ class TVNoeIE(InfoExtractor):
|
||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
|
||||
|
||||
ifs_page = self._download_webpage(iframe_url, video_id)
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(ifs_page),
|
||||
video_id, transform_source=js_to_json)
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
ifs_page, video_id, transform_source=js_to_json)
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=iframe_url)
|
||||
|
||||
|
@ -225,7 +225,11 @@ class TVPlayIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
geo_country = self._search_regex(
|
||||
r'https?://[^/]+\.([a-z]{2})', url,
|
||||
'geo country', default=None)
|
||||
if geo_country:
|
||||
self._initialize_geo_bypass([geo_country.upper()])
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
|
@ -212,12 +212,15 @@ class UdemyIE(InfoExtractor):
|
||||
thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
|
||||
duration = float_or_none(asset.get('data', {}).get('duration'))
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
|
||||
formats = []
|
||||
|
||||
def extract_output_format(src):
|
||||
def extract_output_format(src, f_id):
|
||||
return {
|
||||
'url': src['url'],
|
||||
'format_id': '%sp' % (src.get('height') or format_id),
|
||||
'format_id': '%sp' % (src.get('height') or f_id),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
|
||||
@ -237,30 +240,33 @@ class UdemyIE(InfoExtractor):
|
||||
def add_output_format_meta(f, key):
|
||||
output = outputs.get(key)
|
||||
if isinstance(output, dict):
|
||||
output_format = extract_output_format(output)
|
||||
output_format = extract_output_format(output, key)
|
||||
output_format.update(f)
|
||||
return output_format
|
||||
return f
|
||||
|
||||
def extract_formats(source_list):
|
||||
if not isinstance(source_list, list):
|
||||
return
|
||||
for source in source_list:
|
||||
video_url = source.get('file') or source.get('src')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
format_id = source.get('label')
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
if format_id:
|
||||
# Some videos contain additional metadata (e.g.
|
||||
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
|
||||
f = add_output_format_meta(f, format_id)
|
||||
formats.append(f)
|
||||
|
||||
download_urls = asset.get('download_urls')
|
||||
if isinstance(download_urls, dict):
|
||||
video = download_urls.get('Video')
|
||||
if isinstance(video, list):
|
||||
for format_ in video:
|
||||
video_url = format_.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = format_.get('label')
|
||||
f = {
|
||||
'url': format_['file'],
|
||||
'format_id': '%sp' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
if format_id:
|
||||
# Some videos contain additional metadata (e.g.
|
||||
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
|
||||
f = add_output_format_meta(f, format_id)
|
||||
formats.append(f)
|
||||
extract_formats(download_urls.get('Video'))
|
||||
|
||||
view_html = lecture.get('view_html')
|
||||
if view_html:
|
||||
@ -294,6 +300,35 @@ class UdemyIE(InfoExtractor):
|
||||
'height': height,
|
||||
}, res))
|
||||
|
||||
# react rendition since 2017.04.15 (see
|
||||
# https://github.com/rg3/youtube-dl/issues/12744)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
|
||||
'setup data', default='{}', group='data'), video_id,
|
||||
transform_source=unescapeHTML, fatal=False)
|
||||
if data and isinstance(data, dict):
|
||||
extract_formats(data.get('sources'))
|
||||
if not duration:
|
||||
duration = int_or_none(data.get('duration'))
|
||||
tracks = data.get('tracks')
|
||||
if isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
src = track.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
lang = track.get('language') or track.get(
|
||||
'srclang') or track.get('label')
|
||||
sub_dict = automatic_captions if track.get(
|
||||
'autogenerated') is True else subtitles
|
||||
sub_dict.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
})
|
||||
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
@ -302,7 +337,9 @@ class UdemyIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
}
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ from .common import InfoExtractor
|
||||
|
||||
class VierIE(InfoExtractor):
|
||||
IE_NAME = 'vier'
|
||||
IE_DESC = 'vier.be and vijf.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
||||
|
@ -68,7 +68,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||
type_ = source.get('type')
|
||||
ext = determine_ext(file_)
|
||||
format_id = source.get('label') or ext
|
||||
if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
|
||||
if all(v in ('m3u8', 'hls') for v in (type_, ext)):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
|
@ -44,7 +44,7 @@ class ViuBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ViuIE(ViuBaseIE):
|
||||
_VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
|
||||
_VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
|
||||
'info_dict': {
|
||||
@ -69,6 +69,9 @@ class ViuIE(ViuBaseIE):
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
'skip': 'Geo-restricted to Indonesia',
|
||||
}, {
|
||||
'url': 'https://india.viu.com/en/media/1126286865',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -432,8 +432,7 @@ class VKIE(VKBaseIE):
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False, live=is_live))
|
||||
elif format_id == 'rtmp':
|
||||
formats.append({
|
||||
|
@ -70,9 +70,9 @@ class VLiveIE(InfoExtractor):
|
||||
status, long_video_id, key = params[2], params[5], params[6]
|
||||
status = remove_start(status, 'PRODUCT_')
|
||||
|
||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
|
||||
return self._live(video_id, webpage)
|
||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
else:
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VRTIE(InfoExtractor):
|
||||
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
|
||||
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
|
||||
_TESTS = [
|
||||
# deredactie.be
|
||||
|
191
youtube_dl/extractor/vrv.py
Normal file
191
youtube_dl/extractor/vrv.py
Normal file
@ -0,0 +1,191 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import hashlib
|
||||
import hmac
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VRVBaseIE(InfoExtractor):
|
||||
_API_DOMAIN = None
|
||||
_API_PARAMS = {}
|
||||
_CMS_SIGNING = {}
|
||||
|
||||
def _call_api(self, path, video_id, note, data=None):
|
||||
base_url = self._API_DOMAIN + '/core/' + path
|
||||
encoded_query = compat_urllib_parse_urlencode({
|
||||
'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
|
||||
'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'oauth_signature_method': 'HMAC-SHA1',
|
||||
'oauth_timestamp': int(time.time()),
|
||||
'oauth_version': '1.0',
|
||||
})
|
||||
headers = self.geo_verification_headers()
|
||||
if data:
|
||||
data = json.dumps(data).encode()
|
||||
headers['Content-Type'] = 'application/json'
|
||||
method = 'POST' if data else 'GET'
|
||||
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
|
||||
oauth_signature = base64.b64encode(hmac.new(
|
||||
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
|
||||
base_string.encode(), hashlib.sha1).digest()).decode()
|
||||
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
|
||||
return self._download_json(
|
||||
'?'.join([base_url, encoded_query]), video_id,
|
||||
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
|
||||
|
||||
def _call_cms(self, path, video_id, note):
|
||||
if not self._CMS_SIGNING:
|
||||
self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
|
||||
return self._download_json(
|
||||
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
|
||||
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
|
||||
|
||||
def _set_api_params(self, webpage, video_id):
|
||||
if not self._API_PARAMS:
|
||||
self._API_PARAMS = self._parse_json(self._search_regex(
|
||||
r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
|
||||
webpage, 'api config'), video_id)['cxApiParams']
|
||||
self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
|
||||
|
||||
def _get_cms_resource(self, resource_key, video_id):
|
||||
return self._call_api(
|
||||
'cms_resource', video_id, 'resource path', data={
|
||||
'resource_key': resource_key,
|
||||
})['__links__']['cms_resource']['href']
|
||||
|
||||
|
||||
class VRVIE(VRVBaseIE):
|
||||
IE_NAME = 'vrv'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
|
||||
'info_dict': {
|
||||
'id': 'GR9PNZ396',
|
||||
'ext': 'mp4',
|
||||
'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
|
||||
'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
|
||||
'uploader_id': 'seeso',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
media_resource = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
|
||||
webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
|
||||
|
||||
video_data = media_resource.get('json')
|
||||
if not video_data:
|
||||
self._set_api_params(webpage, video_id)
|
||||
episode_path = self._get_cms_resource(
|
||||
'cms:/episodes/' + video_id, video_id)
|
||||
video_data = self._call_cms(episode_path, video_id, 'video')
|
||||
title = video_data['title']
|
||||
|
||||
streams_json = media_resource.get('streams', {}).get('json', {})
|
||||
if not streams_json:
|
||||
self._set_api_params(webpage, video_id)
|
||||
streams_path = video_data['__links__']['streams']['href']
|
||||
streams_json = self._call_cms(streams_path, video_id, 'streams')
|
||||
|
||||
audio_locale = streams_json.get('audio_locale')
|
||||
formats = []
|
||||
for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_id = stream_id or audio_locale
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id=stream_id,
|
||||
note='Downloading %s m3u8 information' % stream_id,
|
||||
fatal=False)
|
||||
if audio_locale:
|
||||
for f in m3u8_formats:
|
||||
f['language'] = audio_locale
|
||||
formats.extend(m3u8_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
|
||||
thumbnail_url = thumbnail.get('source')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video_data.get('description'),
|
||||
'duration': float_or_none(video_data.get('duration_ms'), 1000),
|
||||
'uploader_id': video_data.get('channel_id'),
|
||||
'series': video_data.get('series_title'),
|
||||
'season': video_data.get('season_title'),
|
||||
'season_number': int_or_none(video_data.get('season_number')),
|
||||
'season_id': video_data.get('season_id'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episode_number')),
|
||||
'episode_id': video_data.get('production_episode_id'),
|
||||
}
|
||||
|
||||
|
||||
class VRVSeriesIE(VRVBaseIE):
|
||||
IE_NAME = 'vrv:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
|
||||
'info_dict': {
|
||||
'id': 'G68VXG3G6',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, series_id,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
self._set_api_params(webpage, series_id)
|
||||
seasons_path = self._get_cms_resource(
|
||||
'cms:/seasons?series_id=' + series_id, series_id)
|
||||
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
|
||||
|
||||
entries = []
|
||||
for season in seasons_data.get('items', []):
|
||||
episodes_path = season['__links__']['season/episodes']['href']
|
||||
episodes = self._call_cms(episodes_path, series_id, 'episodes')
|
||||
for episode in episodes.get('items', []):
|
||||
episode_id = episode['id']
|
||||
entries.append(self.url_result(
|
||||
'https://vrv.co/watch/' + episode_id,
|
||||
'VRV', episode_id, episode.get('title')))
|
||||
|
||||
return self.playlist_result(entries, series_id)
|
38
youtube_dl/extractor/vshare.py
Normal file
38
youtube_dl/extractor/vshare.py
Normal file
@ -0,0 +1,38 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VShareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vshare.io/d/0f64ce6',
|
||||
'md5': '16d7b8fef58846db47419199ff1ab3e7',
|
||||
'info_dict': {
|
||||
'id': '0f64ce6',
|
||||
'title': 'vl14062007715967',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://vshare.io/d/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
|
||||
webpage, 'video url', group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
@ -1,12 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class WorldStarHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
|
||||
'md5': '9d04de741161603bf7071bbf4e883186',
|
||||
@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
|
||||
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
|
||||
'info_dict': {
|
||||
'id': 'wshh6a7q1ny0G34ZwuIO',
|
||||
'ext': 'mp4',
|
||||
'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
|
||||
}
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?', webpage)
|
||||
if m_vevo_id is not None:
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'so\.addVariable\("file","(.*?)"\)',
|
||||
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
|
||||
webpage, 'video URL')
|
||||
if not entries:
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
if 'youtube' in video_url:
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
title = self._html_search_regex(
|
||||
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
|
||||
webpage, 'title')
|
||||
|
||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||
thumbnail = self._html_search_regex(
|
||||
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
|
||||
default=None)
|
||||
if not thumbnail:
|
||||
_title = r'candytitles.*>(.*)</span>'
|
||||
mobj = re.search(_title, webpage)
|
||||
if mobj is not None:
|
||||
video_title = mobj.group(1)
|
||||
|
||||
return {
|
||||
info = entries[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
'title': title,
|
||||
})
|
||||
return info
|
||||
|
@ -10,12 +10,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
|
||||
(?:www\.)?wsj\.com/video/[^/]+/
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9-]+)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
||||
https?://(?:www\.)?wsj\.com/video/[^/]+/|
|
||||
wsj:
|
||||
)
|
||||
(?P<id>[a-fA-F0-9-]{36})
|
||||
'''
|
||||
IE_DESC = 'Wall Street Journal'
|
||||
_TESTS = [{
|
||||
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
@ -38,12 +40,17 @@ class WSJIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = (
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||
'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
|
||||
'thumbnailList,author,description,name,duration,videoURL,'
|
||||
'titletag,formattedCreationDate,keywords,editor' % video_id)
|
||||
info = self._download_json(api_url, video_id)['items'][0]
|
||||
info = self._download_json(
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
|
||||
query={
|
||||
'type': 'guid',
|
||||
'count': 1,
|
||||
'query': video_id,
|
||||
'fields': ','.join((
|
||||
'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
|
||||
'description', 'name', 'duration', 'videoURL', 'titletag',
|
||||
'formattedCreationDate', 'keywords', 'editor')),
|
||||
})['items'][0]
|
||||
title = info.get('name', info.get('titletag'))
|
||||
|
||||
formats = []
|
||||
@ -87,3 +94,24 @@ class WSJIE(InfoExtractor):
|
||||
'title': title,
|
||||
'categories': info.get('keywords'),
|
||||
}
|
||||
|
||||
|
||||
class WSJArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
||||
'info_dict': {
|
||||
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170221',
|
||||
'uploader_id': 'ralcaraz',
|
||||
'title': 'Bao Bao the Panda Leaves for China',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
|
||||
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
|
||||
|
@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
@ -26,6 +27,9 @@ class XFileShareIE(InfoExtractor):
|
||||
('vidto.me', 'Vidto'),
|
||||
('streamin.to', 'Streamin.To'),
|
||||
('xvidstage.com', 'XVIDSTAGE'),
|
||||
('vidabc.com', 'Vid ABC'),
|
||||
('vidbom.com', 'VidBom'),
|
||||
('vidlo.us', 'vidlo'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
@ -95,6 +99,16 @@ class XFileShareIE(InfoExtractor):
|
||||
# removed by administrator
|
||||
'url': 'http://xvidstage.com/amfy7atlkx25',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidabc.com/i8ybqscrphfv',
|
||||
'info_dict': {
|
||||
'id': 'i8ybqscrphfv',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Beauty and the Beast 2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -133,31 +147,45 @@ class XFileShareIE(InfoExtractor):
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or video_id).strip()
|
||||
|
||||
def extract_video_url(default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
(r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
|
||||
r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
|
||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
|
||||
r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
|
||||
webpage, 'file url', default=default, group='url')
|
||||
def extract_formats(default=NO_DEFAULT):
|
||||
urls = []
|
||||
for regex in (
|
||||
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in urls:
|
||||
urls.append(video_url)
|
||||
formats = []
|
||||
for video_url in urls:
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'sd',
|
||||
})
|
||||
if not formats and default is not NO_DEFAULT:
|
||||
return default
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
video_url = extract_video_url(default=None)
|
||||
formats = extract_formats(default=None)
|
||||
|
||||
if not video_url:
|
||||
if not formats:
|
||||
webpage = decode_packed_codes(self._search_regex(
|
||||
r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
|
||||
webpage, 'packed code'))
|
||||
video_url = extract_video_url()
|
||||
formats = extract_formats()
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'quality': 1,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
@ -10,12 +10,14 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@ -105,7 +107,9 @@ class YoukuIE(InfoExtractor):
|
||||
if stream.get('channel_type') == 'tail':
|
||||
continue
|
||||
format = stream.get('stream_type')
|
||||
fileid = stream['stream_fileid']
|
||||
fileid = try_get(
|
||||
stream, lambda x: x['segs'][0]['fileid'],
|
||||
compat_str) or stream['stream_fileid']
|
||||
fileid_dict[format] = fileid
|
||||
|
||||
def get_fileid(format, n):
|
||||
|
@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||
@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
)
|
||||
)? # all until now is optional -> you can pass the naked ID
|
||||
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
||||
(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
|
||||
(?!.*?\blist=
|
||||
(?:
|
||||
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
|
||||
WL # WL are handled by the watch later IE
|
||||
)
|
||||
)
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$"""
|
||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
_formats = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
@ -310,60 +317,60 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
||||
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
|
||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
||||
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
|
||||
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
|
||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
|
||||
|
||||
# Dash webm audio with opus inside
|
||||
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
||||
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
||||
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
||||
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
|
||||
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
|
||||
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
|
||||
|
||||
# RTMP (unnamed)
|
||||
'_rtmp': {'protocol': 'rtmp'},
|
||||
@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'sJL6WA-aGkQ',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -1242,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
url_map = {}
|
||||
|
||||
def _get_urls(_manifest):
|
||||
lines = _manifest.split('\n')
|
||||
urls = filter(lambda l: l and not l.startswith('#'),
|
||||
lines)
|
||||
return urls
|
||||
manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
|
||||
formats_urls = _get_urls(manifest)
|
||||
for format_url in formats_urls:
|
||||
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
|
||||
url_map[itag] = format_url
|
||||
return url_map
|
||||
|
||||
def _extract_annotations(self, video_id):
|
||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||
@ -1562,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
video_annotations = self._extract_annotations(video_id)
|
||||
|
||||
def _map_to_format_list(urlmap):
|
||||
formats = []
|
||||
for itag, video_real_url in urlmap.items():
|
||||
dct = {
|
||||
'format_id': itag,
|
||||
'url': video_real_url,
|
||||
'player_url': player_url,
|
||||
}
|
||||
if itag in self._formats:
|
||||
dct.update(self._formats[itag])
|
||||
formats.append(dct)
|
||||
return formats
|
||||
|
||||
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
||||
self.report_rtmp_download()
|
||||
formats = [{
|
||||
@ -1707,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
formats.append(dct)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
formats = _map_to_format_list(url_map)
|
||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||
for a_format in formats:
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', fatal=False)
|
||||
for a_format in m3u8_formats:
|
||||
itag = self._search_regex(
|
||||
r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
|
||||
if itag:
|
||||
a_format['format_id'] = itag
|
||||
if itag in self._formats:
|
||||
dct = self._formats[itag].copy()
|
||||
dct.update(a_format)
|
||||
a_format = dct
|
||||
a_format['player_url'] = player_url
|
||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||
formats.append(a_format)
|
||||
else:
|
||||
unavailable_message = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
|
||||
@ -1864,8 +1858,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
)
|
||||
.*
|
||||
|
|
||||
((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
(%(playlist_id)s)
|
||||
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||
IE_NAME = 'youtube:playlist'
|
||||
|
@ -459,11 +459,11 @@ def parseOpts(overrideArguments=None):
|
||||
downloader.add_option(
|
||||
'--fragment-retries',
|
||||
dest='fragment_retries', metavar='RETRIES', default=10,
|
||||
help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
|
||||
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
|
||||
downloader.add_option(
|
||||
'--skip-unavailable-fragments',
|
||||
action='store_true', dest='skip_unavailable_fragments', default=True,
|
||||
help='Skip unavailable fragments (DASH and hlsnative only)')
|
||||
help='Skip unavailable fragments (DASH, hlsnative and ISM)')
|
||||
downloader.add_option(
|
||||
'--abort-on-unavailable-fragment',
|
||||
action='store_false', dest='skip_unavailable_fragments',
|
||||
@ -773,7 +773,7 @@ def parseOpts(overrideArguments=None):
|
||||
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
||||
postproc.add_option(
|
||||
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||
help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x')
|
||||
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
|
||||
postproc.add_option(
|
||||
'--audio-quality', metavar='QUALITY',
|
||||
dest='audioquality', default='5',
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user