mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-11 10:57:22 +08:00
Merge branch 'master' of git://github.com/rg3/youtube-dl into steam
This commit is contained in:
commit
06d55f1004
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.24**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.09.02
|
[debug] youtube-dl version 2017.09.24
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -22,6 +22,7 @@ cover/
|
|||||||
updates_key.pem
|
updates_key.pem
|
||||||
*.egg-info
|
*.egg-info
|
||||||
*.srt
|
*.srt
|
||||||
|
*.ttml
|
||||||
*.sbv
|
*.sbv
|
||||||
*.vtt
|
*.vtt
|
||||||
*.flv
|
*.flv
|
||||||
|
7
AUTHORS
7
AUTHORS
@ -224,3 +224,10 @@ Giuseppe Fabiano
|
|||||||
Örn Guðjónsson
|
Örn Guðjónsson
|
||||||
Parmjit Virk
|
Parmjit Virk
|
||||||
Genki Sky
|
Genki Sky
|
||||||
|
Ľuboš Katrinec
|
||||||
|
Corey Nicholson
|
||||||
|
Ashutosh Chaudhary
|
||||||
|
John Dong
|
||||||
|
Tatsuyuki Ishi
|
||||||
|
Daniel Weber
|
||||||
|
Kay Bouché
|
||||||
|
@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
|
||||||
* python
|
* python
|
||||||
@ -149,7 +151,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
81
ChangeLog
81
ChangeLog
@ -1,3 +1,84 @@
|
|||||||
|
version 2017.09.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [options] Accept lrc as a subtitle conversion target format (#14292)
|
||||||
|
* [utils] Fix handling raw TTML subtitles (#14191)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [24video] Fix timestamp extraction and make non fatal (#14295)
|
||||||
|
+ [24video] Add support for 24video.adult (#14295)
|
||||||
|
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
|
||||||
|
+ [twitter] Add support for URLs without user id (#14270)
|
||||||
|
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
|
||||||
|
#13996)
|
||||||
|
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
|
||||||
|
* [mixcloud] Fix extraction (#14088, #14132)
|
||||||
|
+ [lynda] Add support for educourse.ga (#14286)
|
||||||
|
* [beeg] Fix extraction (#14275)
|
||||||
|
* [nbcsports:vplayer] Correct theplatform URL (#13873)
|
||||||
|
* [twitter] Fix duration extraction (#14141)
|
||||||
|
* [tvplay] Bypass geo restriction
|
||||||
|
+ [heise] Add support for YouTube embeds (#14109)
|
||||||
|
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
|
||||||
|
* [viki] Update app data (#14181)
|
||||||
|
* [morningstar] Relax URL regular expression (#14222)
|
||||||
|
* [openload] Fix extraction (#14225, #14257)
|
||||||
|
* [noovo] Fix extraction (#14214)
|
||||||
|
* [dailymotion:playlist] Relax URL regular expression (#14219)
|
||||||
|
+ [twitch] Add support for go.twitch.tv URLs (#14215)
|
||||||
|
* [vgtv] Relax URL regular expression (#14223)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
|
||||||
|
(#13731)
|
||||||
|
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [condenast] Fix extraction (#14196, #14207)
|
||||||
|
+ [orf] Add support for f4m stories
|
||||||
|
* [tv4] Relax URL regular expression (#14206)
|
||||||
|
* [animeondemand] Bypass geo restriction
|
||||||
|
+ [animeondemand] Add support for flash videos (#9944)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rutube:playlist] Fix suitable (#14166)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce bool_or_none
|
||||||
|
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [fox] Fix extraction (#14147)
|
||||||
|
* [rutube] Use bool_or_none
|
||||||
|
* [rutube] Rework and generalize playlist extractors (#13565)
|
||||||
|
+ [rutube:playlist] Add support for playlists (#13534, #13565)
|
||||||
|
+ [radiocanada] Add fallback for title extraction (#14145)
|
||||||
|
* [vk] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [vice] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [cracked] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [chilloutzone] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [abcnews] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [youtube] Separate methods for embeds extraction
|
||||||
|
* [redtube] Fix formats extraction (#14122)
|
||||||
|
* [arte] Relax unavailability check (#14112)
|
||||||
|
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
|
||||||
|
* [vidme:user] Relax URL regular expression (#14054)
|
||||||
|
* [bpb] Fix extraction (#14043, #14086)
|
||||||
|
* [soundcloud] Fix download URL with private tracks (#14093)
|
||||||
|
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
|
||||||
|
* [viidea] Capture and output lecture error message (#14099)
|
||||||
|
* [radiocanada] Skip unsupported platforms (#14100)
|
||||||
|
|
||||||
|
|
||||||
version 2017.09.02
|
version 2017.09.02
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -427,7 +427,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
syntax. Example: --exec 'adb push {}
|
syntax. Example: --exec 'adb push {}
|
||||||
/sdcard/Music/ && rm {}'
|
/sdcard/Music/ && rm {}'
|
||||||
--convert-subs FORMAT Convert the subtitles to other format
|
--convert-subs FORMAT Convert the subtitles to other format
|
||||||
(currently supported: srt|ass|vtt)
|
(currently supported: srt|ass|vtt|lrc)
|
||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_testcases
|
from test.helper import gettestcases
|
||||||
from youtube_dl.utils import compat_urllib_parse_urlparse
|
from youtube_dl.utils import compat_urllib_parse_urlparse
|
||||||
from youtube_dl.utils import compat_urllib_request
|
from youtube_dl.utils import compat_urllib_request
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ if len(sys.argv) > 1:
|
|||||||
else:
|
else:
|
||||||
METHOD = 'EURISTIC'
|
METHOD = 'EURISTIC'
|
||||||
|
|
||||||
for test in get_testcases():
|
for test in gettestcases():
|
||||||
if METHOD == 'EURISTIC':
|
if METHOD == 'EURISTIC':
|
||||||
try:
|
try:
|
||||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
|
@ -38,10 +38,12 @@
|
|||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
- **afreecatv:global**: afreecatv.com
|
- **afreecatv:global**: afreecatv.com
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
|
- **AliExpressLive**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
- **AMCNetworks**
|
- **AMCNetworks**
|
||||||
|
- **AmericasTestKitchen**
|
||||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **AnimeOnDemand**
|
- **AnimeOnDemand**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
@ -377,6 +379,7 @@
|
|||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
|
- **Kakao**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
- **Kamcord**
|
- **Kamcord**
|
||||||
- **KanalPlay**: Kanal 5/9/11 Play
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
@ -437,6 +440,7 @@
|
|||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
|
- **ManyVids**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
@ -591,6 +595,7 @@
|
|||||||
- **Openload**
|
- **Openload**
|
||||||
- **OraTV**
|
- **OraTV**
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
|
- **orf:fm4:story**: fm4.orf.at stories
|
||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
@ -624,6 +629,7 @@
|
|||||||
- **Pokemon**
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PolskieRadioCategory**
|
- **PolskieRadioCategory**
|
||||||
|
- **PopcornTV**
|
||||||
- **PornCom**
|
- **PornCom**
|
||||||
- **PornerBros**
|
- **PornerBros**
|
||||||
- **PornFlip**
|
- **PornFlip**
|
||||||
@ -701,6 +707,7 @@
|
|||||||
- **rutube:embed**: Rutube embedded videos
|
- **rutube:embed**: Rutube embedded videos
|
||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
|
- **rutube:playlist**: Rutube playlists
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
- **Ruutu**
|
- **Ruutu**
|
||||||
- **Ruv**
|
- **Ruv**
|
||||||
|
@ -1064,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
<p begin="3" dur="-1">Ignored, three</p>
|
<p begin="3" dur="-1">Ignored, three</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:00,000 --> 00:00:01,000
|
00:00:00,000 --> 00:00:01,000
|
||||||
The following line contains Chinese characters and special symbols
|
The following line contains Chinese characters and special symbols
|
||||||
@ -1089,7 +1089,7 @@ Line
|
|||||||
<p begin="0" end="1">The first line</p>
|
<p begin="0" end="1">The first line</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:00,000 --> 00:00:01,000
|
00:00:00,000 --> 00:00:01,000
|
||||||
The first line
|
The first line
|
||||||
@ -1115,7 +1115,7 @@ The first line
|
|||||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</tt>'''
|
</tt>'''.encode('utf-8')
|
||||||
srt_data = '''1
|
srt_data = '''1
|
||||||
00:00:02,080 --> 00:00:05,839
|
00:00:02,080 --> 00:00:05,839
|
||||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||||
@ -1138,6 +1138,26 @@ part 3</font></u>
|
|||||||
'''
|
'''
|
||||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||||
|
|
||||||
|
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
|
||||||
|
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||||
|
<body>
|
||||||
|
<div xml:lang="en">
|
||||||
|
<p begin="0" end="1">Line 1</p>
|
||||||
|
<p begin="1" end="2">第二行</p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</tt>'''.encode('utf-16')
|
||||||
|
srt_data = '''1
|
||||||
|
00:00:00,000 --> 00:00:01,000
|
||||||
|
Line 1
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:01,000 --> 00:00:02,000
|
||||||
|
第二行
|
||||||
|
|
||||||
|
'''
|
||||||
|
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
|
||||||
|
|
||||||
def test_cli_option(self):
|
def test_cli_option(self):
|
||||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||||
|
@ -92,6 +92,7 @@ from .utils import (
|
|||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
|
from .extractor.openload import PhantomJSwrapper
|
||||||
from .downloader import get_suitable_downloader
|
from .downloader import get_suitable_downloader
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
@ -1763,29 +1764,30 @@ class YoutubeDL(object):
|
|||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
if sub_info.get('data') is not None:
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
sub_data = sub_info['data']
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
try:
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
sub_data = ie._download_webpage(
|
if sub_info.get('data') is not None:
|
||||||
sub_info['url'], info_dict['id'], note=False)
|
try:
|
||||||
except ExtractorError as err:
|
# Use newline='' to prevent conversion of newline characters
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||||
(sub_lang, error_to_compat_str(err.cause)))
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
continue
|
subfile.write(sub_info['data'])
|
||||||
try:
|
except (OSError, IOError):
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
return
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
sub_data = ie._request_webpage(
|
||||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
sub_info['url'], info_dict['id'], note=False).read()
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_data)
|
||||||
except (OSError, IOError):
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
return
|
(sub_lang, error_to_compat_str(err)))
|
||||||
|
continue
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||||
@ -2216,6 +2218,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
|
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||||
exe_str = ', '.join(
|
exe_str = ', '.join(
|
||||||
'%s %s' % (exe, v)
|
'%s %s' % (exe, v)
|
||||||
for exe, v in sorted(exe_versions.items())
|
for exe, v in sorted(exe_versions.items())
|
||||||
|
@ -206,7 +206,7 @@ def _real_main(argv=None):
|
|||||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||||
parser.error('invalid video recode format specified')
|
parser.error('invalid video recode format specified')
|
||||||
if opts.convertsubtitles is not None:
|
if opts.convertsubtitles is not None:
|
||||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||||
parser.error('invalid subtitle format specified')
|
parser.error('invalid subtitle format specified')
|
||||||
|
|
||||||
if opts.date is not None:
|
if opts.date is not None:
|
||||||
|
@ -6,6 +6,7 @@ import collections
|
|||||||
import email
|
import email
|
||||||
import getpass
|
import getpass
|
||||||
import io
|
import io
|
||||||
|
import itertools
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -15,7 +16,6 @@ import socket
|
|||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import itertools
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
|
||||||
@ -2898,6 +2898,13 @@ else:
|
|||||||
compat_struct_pack = struct.pack
|
compat_struct_pack = struct.pack
|
||||||
compat_struct_unpack = struct.unpack
|
compat_struct_unpack = struct.unpack
|
||||||
|
|
||||||
|
try:
|
||||||
|
from future_builtins import zip as compat_zip
|
||||||
|
except ImportError: # not 2.6+ or is 3.x
|
||||||
|
try:
|
||||||
|
from itertools import izip as compat_zip # < 2.5 or 3.x
|
||||||
|
except ImportError:
|
||||||
|
compat_zip = zip
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
@ -2948,5 +2955,6 @@ __all__ = [
|
|||||||
'compat_urlretrieve',
|
'compat_urlretrieve',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_parse_error',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
|
'compat_zip',
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
]
|
]
|
||||||
|
@ -151,10 +151,15 @@ class FragmentFD(FileDownloader):
|
|||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||||
self._read_ytdl_file(ctx)
|
self._read_ytdl_file(ctx)
|
||||||
|
if ctx['fragment_index'] > 0 and resume_len == 0:
|
||||||
|
self.report_error(
|
||||||
|
'Inconsistent state of incomplete fragment download. '
|
||||||
|
'Restarting from the beginning...')
|
||||||
|
ctx['fragment_index'] = resume_len = 0
|
||||||
|
self._write_ytdl_file(ctx)
|
||||||
else:
|
else:
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
if ctx['fragment_index'] > 0:
|
assert ctx['fragment_index'] == 0
|
||||||
assert resume_len > 0
|
|
||||||
|
|
||||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||||
|
|
||||||
|
85
youtube_dl/extractor/americastestkitchen.py
Executable file
85
youtube_dl/extractor/americastestkitchen.py
Executable file
@ -0,0 +1,85 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
||||||
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_5g5zua6e',
|
||||||
|
'title': 'Summer Dinner Party',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1497285541,
|
||||||
|
'upload_date': '20170612',
|
||||||
|
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
||||||
|
'release_date': '20170617',
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': 17,
|
||||||
|
'episode': 'Summer Dinner Party',
|
||||||
|
'episode_number': 24,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||||
|
webpage, 'kaltura partner id')
|
||||||
|
|
||||||
|
video_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||||
|
webpage, 'initial context'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
ep_data = try_get(
|
||||||
|
video_data,
|
||||||
|
(lambda x: x['episodeDetail']['content']['data'],
|
||||||
|
lambda x: x['videoDetail']['content']['data']), dict)
|
||||||
|
ep_meta = ep_data.get('full_video', {})
|
||||||
|
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||||
|
|
||||||
|
title = ep_data.get('title') or ep_meta.get('title')
|
||||||
|
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||||
|
'description') or ep_meta.get('description'))
|
||||||
|
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||||
|
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||||
|
|
||||||
|
season_number = int_or_none(ep_meta.get('season_number'))
|
||||||
|
episode = ep_meta.get('title')
|
||||||
|
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'release_date': release_date,
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
}
|
@ -3,16 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urlparse,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||||
_NETRC_MACHINE = 'animeondemand'
|
_NETRC_MACHINE = 'animeondemand'
|
||||||
|
# German-speaking countries of Europe
|
||||||
|
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# jap, OmU
|
# jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||||
@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Flash videos
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -72,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'post url', default=self._LOGIN_URL, group='url')
|
'post url', default=self._LOGIN_URL, group='url')
|
||||||
|
|
||||||
if not post_url.startswith('http'):
|
if not post_url.startswith('http'):
|
||||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
post_url, urlencode_postdata(login_form))
|
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
|
||||||
|
|
||||||
response = self._download_webpage(
|
response = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
post_url, None, 'Logging in as %s' % username,
|
||||||
|
data=urlencode_postdata(login_form), headers={
|
||||||
|
'Referer': self._LOGIN_URL,
|
||||||
|
})
|
||||||
|
|
||||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for input_ in re.findall(
|
for input_ in re.findall(
|
||||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||||
attributes = extract_attributes(input_)
|
attributes = extract_attributes(input_)
|
||||||
|
title = attributes.get('data-dialog-header')
|
||||||
playlist_urls = []
|
playlist_urls = []
|
||||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||||
playlist_url = attributes.get(playlist_key)
|
playlist_url = attributes.get(playlist_key)
|
||||||
if isinstance(playlist_url, compat_str) and re.match(
|
if isinstance(playlist_url, compat_str) and re.match(
|
||||||
r'/?[\da-zA-Z]+', playlist_url):
|
r'/?[\da-zA-Z]+', playlist_url):
|
||||||
@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
format_id_list.append(compat_str(num))
|
format_id_list.append(compat_str(num))
|
||||||
format_id = '-'.join(format_id_list)
|
format_id = '-'.join(format_id_list)
|
||||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||||
request = sanitized_Request(
|
item_id_list = []
|
||||||
compat_urlparse.urljoin(url, playlist_url),
|
if format_id:
|
||||||
|
item_id_list.append(format_id)
|
||||||
|
item_id_list.append('videomaterial')
|
||||||
|
playlist = self._download_json(
|
||||||
|
urljoin(url, playlist_url), video_id,
|
||||||
|
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||||
headers={
|
headers={
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'X-CSRF-Token': csrf_token,
|
'X-CSRF-Token': csrf_token,
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||||
})
|
}, fatal=False)
|
||||||
playlist = self._download_json(
|
|
||||||
request, video_id, 'Downloading %s playlist JSON' % format_id,
|
|
||||||
fatal=False)
|
|
||||||
if not playlist:
|
if not playlist:
|
||||||
continue
|
continue
|
||||||
|
stream_url = playlist.get('streamurl')
|
||||||
|
if stream_url:
|
||||||
|
rtmp = re.search(
|
||||||
|
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||||
|
stream_url)
|
||||||
|
if rtmp:
|
||||||
|
formats.append({
|
||||||
|
'url': rtmp.group('url'),
|
||||||
|
'app': rtmp.group('app'),
|
||||||
|
'play_path': rtmp.group('playpath'),
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
continue
|
||||||
start_video = playlist.get('startvideo', 0)
|
start_video = playlist.get('startvideo', 0)
|
||||||
playlist = playlist.get('playlist')
|
playlist = playlist.get('playlist')
|
||||||
if not playlist or not isinstance(playlist, list):
|
if not playlist or not isinstance(playlist, list):
|
||||||
@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
f.update({
|
f.update({
|
||||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
'url': urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
cpl_url = self._search_regex(
|
cpl_url = self._search_regex(
|
||||||
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
|
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||||
webpage, 'cpl', default=None, group='url')
|
webpage, 'cpl', default=None, group='url')
|
||||||
|
|
||||||
|
cpl_url = urljoin(url, cpl_url)
|
||||||
|
|
||||||
beeg_version, beeg_salt = [None] * 2
|
beeg_version, beeg_salt = [None] * 2
|
||||||
|
|
||||||
if cpl_url:
|
if cpl_url:
|
||||||
@ -54,7 +57,7 @@ class BeegIE(InfoExtractor):
|
|||||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||||
default=None, group='beeg_salt')
|
default=None, group='beeg_salt')
|
||||||
|
|
||||||
beeg_version = beeg_version or '2000'
|
beeg_version = beeg_version or '2185'
|
||||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
|
@ -2449,10 +2449,12 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning(msg)
|
self._downloader.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
|
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||||
cookie = compat_cookiejar.Cookie(
|
cookie = compat_cookiejar.Cookie(
|
||||||
0, name, value, None, None, domain, None,
|
0, name, value, port, port is not None, domain, True,
|
||||||
None, '/', True, False, expire_time, '', None, None, None)
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
|
discard, None, None, rest)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
|
@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video_params(self, webpage):
|
def _extract_video_params(self, webpage, display_id):
|
||||||
query = {}
|
query = self._parse_json(
|
||||||
params = self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
|
||||||
if params:
|
default='{}'),
|
||||||
query.update({
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
if query:
|
||||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
query['videoId'] = self._search_regex(
|
||||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
|
||||||
})
|
webpage, 'video id', default=None)
|
||||||
else:
|
else:
|
||||||
params = extract_attributes(self._search_regex(
|
params = extract_attributes(self._search_regex(
|
||||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||||
@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
|
|||||||
video_id = params['videoId']
|
video_id = params['videoId']
|
||||||
|
|
||||||
video_info = None
|
video_info = None
|
||||||
if params.get('playerId'):
|
|
||||||
info_page = self._download_json(
|
# New API path
|
||||||
'http://player.cnevids.com/player/video.js',
|
query = params.copy()
|
||||||
video_id, 'Downloading video info', fatal=False, query=params)
|
query['embedType'] = 'inline'
|
||||||
if info_page:
|
info_page = self._download_json(
|
||||||
video_info = info_page.get('video')
|
'http://player.cnevids.com/embed-api.json', video_id,
|
||||||
if not video_info:
|
'Downloading embed info', fatal=False, query=query)
|
||||||
info_page = self._download_webpage(
|
|
||||||
'http://player.cnevids.com/player/loader.js',
|
# Old fallbacks
|
||||||
video_id, 'Downloading loader info', query=params)
|
if not info_page:
|
||||||
else:
|
if params.get('playerId'):
|
||||||
|
info_page = self._download_json(
|
||||||
|
'http://player.cnevids.com/player/video.js', video_id,
|
||||||
|
'Downloading video info', fatal=False, query=params)
|
||||||
|
if info_page:
|
||||||
|
video_info = info_page.get('video')
|
||||||
|
if not video_info:
|
||||||
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=params)
|
||||||
|
if not video_info:
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||||
video_id, 'Downloading inline info', query={
|
video_id, 'Downloading inline info', query={
|
||||||
@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage)
|
params = self._extract_video_params(webpage, display_id)
|
||||||
info = self._search_json_ld(
|
info = self._search_json_ld(
|
||||||
webpage, display_id, fatal=False)
|
webpage, display_id, fatal=False)
|
||||||
info.update(self._extract_video(params))
|
info.update(self._extract_video(params))
|
||||||
|
@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||||
IE_NAME = 'dailymotion:playlist'
|
IE_NAME = 'dailymotion:playlist'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -39,6 +39,7 @@ from .airmozilla import AirMozillaIE
|
|||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
|
from .americastestkitchen import AmericasTestKitchenIE
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
@ -482,6 +483,7 @@ from .jove import JoveIE
|
|||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kamcord import KamcordIE
|
from .kamcord import KamcordIE
|
||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
@ -768,6 +770,7 @@ from .ora import OraTVIE
|
|||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
@ -807,6 +810,7 @@ from .polskieradio import (
|
|||||||
PolskieRadioIE,
|
PolskieRadioIE,
|
||||||
PolskieRadioCategoryIE,
|
PolskieRadioCategoryIE,
|
||||||
)
|
)
|
||||||
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
from .pornflip import PornFlipIE
|
from .pornflip import PornFlipIE
|
||||||
@ -899,6 +903,7 @@ from .rutube import (
|
|||||||
RutubeEmbedIE,
|
RutubeEmbedIE,
|
||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
|
RutubePlaylistIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
|
@ -3,56 +3,99 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
int_or_none,
|
||||||
update_url_query,
|
parse_age_limit,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FOXIE(AdobePassIE):
|
class FOXIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
# clip
|
||||||
|
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '255180355939',
|
'id': '4b765a60490325103ea69888fb2bd4e8',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Official Trailer: Gotham',
|
'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
|
||||||
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
|
'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
|
||||||
'duration': 129,
|
'duration': 102,
|
||||||
'timestamp': 1400020798,
|
'timestamp': 1504291893,
|
||||||
'upload_date': '20140513',
|
'upload_date': '20170901',
|
||||||
'uploader': 'NEWA-FNG-FOXCOM',
|
'creator': 'FOX',
|
||||||
|
'series': 'Gotham',
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'params': {
|
||||||
}
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted
|
||||||
|
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted, tv provided required
|
||||||
|
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(self._search_regex(
|
video = self._download_json(
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||||
webpage, 'drupal settings'), video_id)
|
video_id, headers={
|
||||||
fox_pdk_player = settings['fox_pdk_player']
|
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||||
release_url = fox_pdk_player['release_url']
|
'Content-Type': 'application/json',
|
||||||
query = {
|
'Referer': url,
|
||||||
'mbr': 'true',
|
})
|
||||||
'switch': 'http'
|
|
||||||
}
|
|
||||||
if fox_pdk_player.get('access') == 'locked':
|
|
||||||
ap_p = settings['foxAdobePassProvider']
|
|
||||||
rating = ap_p.get('videoRating')
|
|
||||||
if rating == 'n/a':
|
|
||||||
rating = None
|
|
||||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
title = video['name']
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
m3u8_url = self._download_json(
|
||||||
'ie_key': 'ThePlatform',
|
video['videoRelease']['url'], video_id)['playURL']
|
||||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = video.get('description')
|
||||||
|
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||||
|
video.get('duration')) or parse_duration(video.get('duration'))
|
||||||
|
timestamp = unified_timestamp(video.get('datePublished'))
|
||||||
|
age_limit = parse_age_limit(video.get('contentRating'))
|
||||||
|
|
||||||
|
data = try_get(
|
||||||
|
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||||
|
|
||||||
|
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||||
|
|
||||||
|
series = video.get('seriesName') or data.get(
|
||||||
|
'seriesName') or data.get('show')
|
||||||
|
season_number = int_or_none(video.get('seasonNumber'))
|
||||||
|
episode = video.get('name')
|
||||||
|
episode_number = int_or_none(video.get('episodeNumber'))
|
||||||
|
release_year = int_or_none(video.get('releaseYear'))
|
||||||
|
|
||||||
|
if data.get('authRequired'):
|
||||||
|
# TODO: AP
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
})
|
'title': title,
|
||||||
|
'description': description,
|
||||||
return info
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'creator': creator,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'release_year': release_year,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
@ -1879,6 +1879,15 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Building A Business Online: Principal Chairs Q & A',
|
'title': 'Building A Business Online: Principal Chairs Q & A',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# multiple HTML5 videos on one page
|
||||||
|
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'keyscenarios',
|
||||||
|
'title': 'Rescue Kit 14 Free Edition - Getting started',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@ -2849,13 +2858,20 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
for entry in entries:
|
if len(entries) == 1:
|
||||||
entry.update({
|
entries[0].update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
for num, entry in enumerate(entries, start=1):
|
||||||
|
entry.update({
|
||||||
|
'id': '%s-%s' % (video_id, num),
|
||||||
|
'title': '%s (%d)' % (video_title, num),
|
||||||
|
})
|
||||||
|
for entry in entries:
|
||||||
self._sort_formats(entry['formats'])
|
self._sort_formats(entry['formats'])
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries, video_id, video_title)
|
||||||
|
|
||||||
jwplayer_data = self._find_jwplayer_data(
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
webpage, video_id, transform_source=js_to_json)
|
webpage, video_id, transform_source=js_to_json)
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor):
|
|||||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||||
'thumbnail': r're:^https?://.*/gallery/$',
|
'thumbnail': r're:^https?://.*/gallery/$',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# YouTube embed
|
||||||
|
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||||
|
'md5': 'e403d2b43fea8e405e88e3f8623909f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6kmWbXleKW4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NEU IM SEPTEMBER | Netflix',
|
||||||
|
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
|
||||||
|
'upload_date': '20170830',
|
||||||
|
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
|
||||||
|
'uploader_id': 'netflixdach',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||||
|
if not title or title == "c't":
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||||
|
if yt_urls:
|
||||||
|
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
container_id = self._search_regex(
|
container_id = self._search_regex(
|
||||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||||
webpage, 'container ID')
|
webpage, 'container ID')
|
||||||
@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor):
|
|||||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||||
webpage, 'sequenz ID')
|
webpage, 'sequenz ID')
|
||||||
|
|
||||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
|
||||||
if not title or title == "c't":
|
|
||||||
title = self._search_regex(
|
|
||||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
'http://www.heise.de/videout/feed', video_id, query={
|
'http://www.heise.de/videout/feed', video_id, query={
|
||||||
'container': container_id,
|
'container': container_id,
|
||||||
|
149
youtube_dl/extractor/kakao.py
Normal file
149
youtube_dl/extractor/kakao.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KakaoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
||||||
|
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||||
|
'md5': '702b2fbdeb51ad82f5c904e8c0766340',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '301965083',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||||
|
'uploader_id': 2671005,
|
||||||
|
'uploader': '그랑그랑이',
|
||||||
|
'timestamp': 1488160199,
|
||||||
|
'upload_date': '20170227',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
|
||||||
|
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '300103180',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||||
|
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||||
|
'uploader_id': 2653210,
|
||||||
|
'uploader': '쇼 음악중심',
|
||||||
|
'timestamp': 1485684628,
|
||||||
|
'upload_date': '20170129',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
player_header = {
|
||||||
|
'Referer': update_url_query(
|
||||||
|
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'autoplay': '1',
|
||||||
|
'profile': 'HIGH',
|
||||||
|
'wmode': 'transparent',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
QUERY_COMMON = {
|
||||||
|
'player': 'monet_html5',
|
||||||
|
'referer': url,
|
||||||
|
'uuid': '',
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'section': '',
|
||||||
|
'dteType': 'PC',
|
||||||
|
}
|
||||||
|
|
||||||
|
query = QUERY_COMMON.copy()
|
||||||
|
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
||||||
|
impress = self._download_json(
|
||||||
|
'%s/%s/impress' % (self._API_BASE, video_id),
|
||||||
|
video_id, 'Downloading video info',
|
||||||
|
query=query, headers=player_header)
|
||||||
|
|
||||||
|
clip_link = impress['clipLink']
|
||||||
|
clip = clip_link['clip']
|
||||||
|
|
||||||
|
title = clip.get('title') or clip_link.get('displayTitle')
|
||||||
|
|
||||||
|
tid = impress.get('tid', '')
|
||||||
|
|
||||||
|
query = QUERY_COMMON.copy()
|
||||||
|
query.update({
|
||||||
|
'tid': tid,
|
||||||
|
'profile': 'HIGH',
|
||||||
|
})
|
||||||
|
raw = self._download_json(
|
||||||
|
'%s/%s/raw' % (self._API_BASE, video_id),
|
||||||
|
video_id, 'Downloading video formats info',
|
||||||
|
query=query, headers=player_header)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in raw.get('outputList', []):
|
||||||
|
try:
|
||||||
|
profile_name = fmt['profile']
|
||||||
|
fmt_url_json = self._download_json(
|
||||||
|
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
||||||
|
video_id,
|
||||||
|
'Downloading video URL for profile %s' % profile_name,
|
||||||
|
query={
|
||||||
|
'service': 'kakao_tv',
|
||||||
|
'section': '',
|
||||||
|
'tid': tid,
|
||||||
|
'profile': profile_name
|
||||||
|
}, headers=player_header, fatal=False)
|
||||||
|
|
||||||
|
if fmt_url_json is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fmt_url = fmt_url_json['url']
|
||||||
|
formats.append({
|
||||||
|
'url': fmt_url,
|
||||||
|
'format_id': profile_name,
|
||||||
|
'width': int_or_none(fmt.get('width')),
|
||||||
|
'height': int_or_none(fmt.get('height')),
|
||||||
|
'format_note': fmt.get('label'),
|
||||||
|
'filesize': int_or_none(fmt.get('filesize'))
|
||||||
|
})
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbs = []
|
||||||
|
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||||
|
thumbs.append({
|
||||||
|
'url': thumb.get('thumbnailUrl'),
|
||||||
|
'id': compat_str(thumb.get('timeInSec')),
|
||||||
|
'preference': -1 if thumb.get('isDefault') else 0
|
||||||
|
})
|
||||||
|
top_thumbnail = clip.get('thumbnailUrl')
|
||||||
|
if top_thumbnail:
|
||||||
|
thumbs.append({
|
||||||
|
'url': top_thumbnail,
|
||||||
|
'preference': 10,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': clip.get('description'),
|
||||||
|
'uploader': clip_link.get('channel', {}).get('name'),
|
||||||
|
'uploader_id': clip_link.get('channelId'),
|
||||||
|
'thumbnails': thumbs,
|
||||||
|
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||||
|
'duration': int_or_none(clip.get('duration')),
|
||||||
|
'view_count': int_or_none(clip.get('playCount')),
|
||||||
|
'like_count': int_or_none(clip.get('likeCount')),
|
||||||
|
'comment_count': int_or_none(clip.get('commentCount')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor):
|
|||||||
class LyndaIE(LyndaBaseIE):
|
class LyndaIE(LyndaBaseIE):
|
||||||
IE_NAME = 'lynda'
|
IE_NAME = 'lynda'
|
||||||
IE_DESC = 'lynda.com videos'
|
IE_DESC = 'lynda.com videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||||
|
|
||||||
@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _raise_unavailable(self, video_id):
|
def _raise_unavailable(self, video_id):
|
||||||
@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
|
|
||||||
# Course link equals to welcome/introduction video link of same course
|
# Course link equals to welcome/introduction video link of same course
|
||||||
# We will recognize it as course link
|
# We will recognize it as course link
|
||||||
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -12,12 +12,16 @@ from ..compat import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_zip
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -54,27 +58,12 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_keys = [
|
@staticmethod
|
||||||
'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
|
def _decrypt_xor_cipher(key, ciphertext):
|
||||||
'pleasedontdownloadourmusictheartistswontgetpaid',
|
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||||
'window.addEventListener = window.addEventListener || function() {};',
|
return ''.join([
|
||||||
'(function() { return new Date().toLocaleDateString(); })()'
|
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||||
]
|
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
||||||
_current_key = None
|
|
||||||
|
|
||||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
|
||||||
def _decrypt_play_info(self, play_info, video_id):
|
|
||||||
play_info = base64.b64decode(play_info.encode('ascii'))
|
|
||||||
for num, key in enumerate(self._keys, start=1):
|
|
||||||
try:
|
|
||||||
return self._parse_json(
|
|
||||||
''.join([
|
|
||||||
compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
|
|
||||||
for idx, ch in enumerate(play_info)]),
|
|
||||||
video_id)
|
|
||||||
except ExtractorError:
|
|
||||||
if num == len(self._keys):
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -84,54 +73,119 @@ class MixcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
if not self._current_key:
|
# Legacy path
|
||||||
js_url = self._search_regex(
|
encrypted_play_info = self._search_regex(
|
||||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
|
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
|
||||||
webpage, 'js url', default=None)
|
|
||||||
if js_url:
|
if encrypted_play_info is not None:
|
||||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
# Decode
|
||||||
if js:
|
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||||
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
else:
|
||||||
for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
|
# New path
|
||||||
key = self._search_regex(
|
full_info_json = self._parse_json(self._html_search_regex(
|
||||||
KEY_RE_TEMPLATE % key_name, js, 'key',
|
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
||||||
default=None, group='key')
|
webpage, 'play info'), 'play info')
|
||||||
if key and isinstance(key, compat_str):
|
for item in full_info_json:
|
||||||
self._keys.insert(0, key)
|
item_data = try_get(
|
||||||
self._current_key = key
|
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||||
|
dict)
|
||||||
|
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
||||||
|
info_json = item_data
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Failed to extract matching stream info')
|
||||||
|
|
||||||
message = self._html_search_regex(
|
message = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||||
webpage, 'error message', default=None)
|
webpage, 'error message', default=None)
|
||||||
|
|
||||||
encrypted_play_info = self._search_regex(
|
js_url = self._search_regex(
|
||||||
r'm-play-info="([^"]+)"', webpage, 'play info')
|
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
|
||||||
|
webpage, 'js url')
|
||||||
|
js = self._download_webpage(js_url, track_id, 'Downloading JS')
|
||||||
|
# Known plaintext attack
|
||||||
|
if encrypted_play_info:
|
||||||
|
kps = ['{"stream_url":']
|
||||||
|
kpa_target = encrypted_play_info
|
||||||
|
else:
|
||||||
|
kps = ['https://', 'http://']
|
||||||
|
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||||
|
for kp in kps:
|
||||||
|
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||||
|
for quote in ["'", '"']:
|
||||||
|
key = self._search_regex(
|
||||||
|
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
|
||||||
|
js, 'encryption key', default=None)
|
||||||
|
if key is not None:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Failed to extract encryption key')
|
||||||
|
|
||||||
play_info = self._decrypt_play_info(encrypted_play_info, track_id)
|
if encrypted_play_info is not None:
|
||||||
|
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
|
||||||
|
if message and 'stream_url' not in play_info:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
song_url = play_info['stream_url']
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'normal',
|
||||||
|
'url': song_url
|
||||||
|
}]
|
||||||
|
|
||||||
if message and 'stream_url' not in play_info:
|
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||||
|
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||||
|
uploader_id = self._search_regex(
|
||||||
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
|
r'/listeners/?">([0-9,.]+)</a>',
|
||||||
|
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||||
|
webpage, 'play count', default=None))
|
||||||
|
|
||||||
song_url = play_info['stream_url']
|
else:
|
||||||
|
title = info_json['name']
|
||||||
|
thumbnail = urljoin(
|
||||||
|
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
|
||||||
|
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
|
||||||
|
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
|
||||||
|
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
|
||||||
|
description = try_get(info_json, lambda x: x['description'])
|
||||||
|
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
|
||||||
|
|
||||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
stream_info = info_json['streamInfo']
|
||||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
formats = []
|
||||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
|
||||||
uploader = self._html_search_regex(
|
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
format_url = stream_info.get(url_key)
|
||||||
uploader_id = self._search_regex(
|
if not format_url:
|
||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
continue
|
||||||
description = self._og_search_description(webpage)
|
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||||
view_count = str_to_int(self._search_regex(
|
if not decrypted:
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
continue
|
||||||
r'/listeners/?">([0-9,.]+)</a>',
|
if url_key == 'hlsUrl':
|
||||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
formats.extend(self._extract_m3u8_formats(
|
||||||
webpage, 'play count', default=None))
|
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif url_key == 'dashUrl':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
decrypted, track_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': decrypted,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': song_url,
|
'formats': formats,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -8,8 +8,8 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class MorningstarIE(InfoExtractor):
|
class MorningstarIE(InfoExtractor):
|
||||||
IE_DESC = 'morningstar.com'
|
IE_DESC = 'morningstar.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor):
|
|||||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -109,10 +109,10 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9CsDKds0kvHI',
|
'id': '9CsDKds0kvHI',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||||
'timestamp': 1426270238,
|
'timestamp': 1426270238,
|
||||||
@ -120,7 +120,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
'uploader': 'NBCU-SPORTS',
|
'uploader': 'NBCU-SPORTS',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -134,7 +134,8 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
theplatform_url = self._og_search_video_url(webpage)
|
theplatform_url = self._og_search_video_url(webpage).replace(
|
||||||
|
'vplayer.nbcsports.com', 'player.theplatform.com')
|
||||||
return self.url_result(theplatform_url, 'ThePlatform')
|
return self.url_result(theplatform_url, 'ThePlatform')
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
|
|||||||
'timestamp': 1491399228,
|
'timestamp': 1491399228,
|
||||||
'upload_date': '20170405',
|
'upload_date': '20170405',
|
||||||
'uploader_id': '618566855001',
|
'uploader_id': '618566855001',
|
||||||
'creator': 'vtele',
|
|
||||||
'view_count': int,
|
|
||||||
'series': 'RPM+',
|
'series': 'RPM+',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5395865725001',
|
'id': '5395865725001',
|
||||||
'title': 'Épisode 13 : Les retrouvailles',
|
'title': 'Épisode 13 : Les retrouvailles',
|
||||||
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
|
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'timestamp': 1492019320,
|
'timestamp': 1492019320,
|
||||||
'upload_date': '20170412',
|
'upload_date': '20170412',
|
||||||
'uploader_id': '618566855001',
|
'uploader_id': '618566855001',
|
||||||
'creator': 'vtele',
|
|
||||||
'view_count': int,
|
|
||||||
'series': "L'amour est dans le pré",
|
'series': "L'amour est dans le pré",
|
||||||
'season_number': 5,
|
'season_number': 5,
|
||||||
'episode': 'Épisode 13',
|
'episode': 'Épisode 13',
|
||||||
@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
data = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
|
|
||||||
video_id)['data']
|
|
||||||
|
|
||||||
content = try_get(data, lambda x: x['contents'][0])
|
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||||
|
|
||||||
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
|
data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
title = try_get(
|
||||||
|
data, lambda x: x['video']['nom'],
|
||||||
|
compat_str) or self._html_search_meta(
|
||||||
|
'dcterms.Title', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
description = self._html_search_meta(
|
||||||
|
('dcterms.Description', 'description'), webpage, 'description')
|
||||||
|
|
||||||
series = try_get(
|
series = try_get(
|
||||||
data, (
|
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||||
lambda x: x['show']['title'],
|
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||||
lambda x: x['season']['show']['title']),
|
webpage, 'series', default=None)
|
||||||
compat_str)
|
|
||||||
|
|
||||||
episode = None
|
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||||
og = data.get('og')
|
season = try_get(season_el, lambda x: x['nom'], compat_str)
|
||||||
if isinstance(og, dict) and og.get('type') == 'video.episode':
|
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||||
episode = og.get('title')
|
|
||||||
|
|
||||||
video = content or data
|
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||||
|
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
|
||||||
|
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': BrightcoveNewIE.ie_key(),
|
'ie_key': BrightcoveNewIE.ie_key(),
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
'title': title,
|
||||||
{'geo_countries': ['CA']}),
|
'description': description,
|
||||||
'id': brightcove_id,
|
|
||||||
'title': video.get('title'),
|
|
||||||
'creator': video.get('source'),
|
|
||||||
'view_count': int_or_none(video.get('viewsCount')),
|
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': int_or_none(try_get(
|
'season': season,
|
||||||
data, lambda x: x['season']['seasonNumber'])),
|
'season_number': season_number,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': int_or_none(data.get('episodeNumber')),
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,244 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_chr
|
from ..compat import (
|
||||||
from ..utils import (
|
compat_urlparse,
|
||||||
determine_ext,
|
compat_kwargs,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
from ..utils import (
|
||||||
|
check_executable,
|
||||||
|
determine_ext,
|
||||||
|
encodeArgument,
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_id,
|
||||||
|
get_exe_version,
|
||||||
|
is_outdated_version,
|
||||||
|
std_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_to_dict(cookie):
|
||||||
|
cookie_dict = {
|
||||||
|
'name': cookie.name,
|
||||||
|
'value': cookie.value,
|
||||||
|
}
|
||||||
|
if cookie.port_specified:
|
||||||
|
cookie_dict['port'] = cookie.port
|
||||||
|
if cookie.domain_specified:
|
||||||
|
cookie_dict['domain'] = cookie.domain
|
||||||
|
if cookie.path_specified:
|
||||||
|
cookie_dict['path'] = cookie.path
|
||||||
|
if cookie.expires is not None:
|
||||||
|
cookie_dict['expires'] = cookie.expires
|
||||||
|
if cookie.secure is not None:
|
||||||
|
cookie_dict['secure'] = cookie.secure
|
||||||
|
if cookie.discard is not None:
|
||||||
|
cookie_dict['discard'] = cookie.discard
|
||||||
|
try:
|
||||||
|
if (cookie.has_nonstandard_attr('httpOnly') or
|
||||||
|
cookie.has_nonstandard_attr('httponly') or
|
||||||
|
cookie.has_nonstandard_attr('HttpOnly')):
|
||||||
|
cookie_dict['httponly'] = True
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
return cookie_dict
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_jar_to_list(cookie_jar):
|
||||||
|
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
||||||
|
|
||||||
|
|
||||||
|
class PhantomJSwrapper(object):
|
||||||
|
"""PhantomJS wrapper class
|
||||||
|
|
||||||
|
This class is experimental.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_TEMPLATE = r'''
|
||||||
|
phantom.onError = function(msg, trace) {{
|
||||||
|
var msgStack = ['PHANTOM ERROR: ' + msg];
|
||||||
|
if(trace && trace.length) {{
|
||||||
|
msgStack.push('TRACE:');
|
||||||
|
trace.forEach(function(t) {{
|
||||||
|
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
|
||||||
|
+ (t.function ? ' (in function ' + t.function +')' : ''));
|
||||||
|
}});
|
||||||
|
}}
|
||||||
|
console.error(msgStack.join('\n'));
|
||||||
|
phantom.exit(1);
|
||||||
|
}};
|
||||||
|
var page = require('webpage').create();
|
||||||
|
var fs = require('fs');
|
||||||
|
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||||
|
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||||
|
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
|
||||||
|
phantom.addCookie(x);
|
||||||
|
}});
|
||||||
|
page.settings.resourceTimeout = {timeout};
|
||||||
|
page.settings.userAgent = "{ua}";
|
||||||
|
page.onLoadStarted = function() {{
|
||||||
|
page.evaluate(function() {{
|
||||||
|
delete window._phantom;
|
||||||
|
delete window.callPhantom;
|
||||||
|
}});
|
||||||
|
}};
|
||||||
|
var saveAndExit = function() {{
|
||||||
|
fs.write("{html}", page.content, write);
|
||||||
|
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
|
||||||
|
phantom.exit();
|
||||||
|
}};
|
||||||
|
page.onLoadFinished = function(status) {{
|
||||||
|
if(page.url === "") {{
|
||||||
|
page.setContent(fs.read("{html}", read), "{url}");
|
||||||
|
}}
|
||||||
|
else {{
|
||||||
|
{jscode}
|
||||||
|
}}
|
||||||
|
}};
|
||||||
|
page.open("");
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _version():
|
||||||
|
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||||
|
|
||||||
|
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||||
|
self.exe = check_executable('phantomjs', ['-v'])
|
||||||
|
if not self.exe:
|
||||||
|
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||||
|
'download it from http://phantomjs.org',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
self.extractor = extractor
|
||||||
|
|
||||||
|
if required_version:
|
||||||
|
version = self._version()
|
||||||
|
if is_outdated_version(version, required_version):
|
||||||
|
self.extractor._downloader.report_warning(
|
||||||
|
'Your copy of PhantomJS is outdated, update it to version '
|
||||||
|
'%s or newer if you encounter any errors.' % required_version)
|
||||||
|
|
||||||
|
self.options = {
|
||||||
|
'timeout': timeout,
|
||||||
|
}
|
||||||
|
self._TMP_FILES = {}
|
||||||
|
for name in self._TMP_FILE_NAMES:
|
||||||
|
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
tmp.close()
|
||||||
|
self._TMP_FILES[name] = tmp
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
for name in self._TMP_FILE_NAMES:
|
||||||
|
try:
|
||||||
|
os.remove(self._TMP_FILES[name].name)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _save_cookies(self, url):
|
||||||
|
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
|
||||||
|
for cookie in cookies:
|
||||||
|
if 'path' not in cookie:
|
||||||
|
cookie['path'] = '/'
|
||||||
|
if 'domain' not in cookie:
|
||||||
|
cookie['domain'] = compat_urlparse.urlparse(url).netloc
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
|
||||||
|
f.write(json.dumps(cookies).encode('utf-8'))
|
||||||
|
|
||||||
|
def _load_cookies(self):
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
|
||||||
|
cookies = json.loads(f.read().decode('utf-8'))
|
||||||
|
for cookie in cookies:
|
||||||
|
if cookie['httponly'] is True:
|
||||||
|
cookie['rest'] = {'httpOnly': None}
|
||||||
|
if 'expiry' in cookie:
|
||||||
|
cookie['expire_time'] = cookie['expiry']
|
||||||
|
self.extractor._set_cookie(**compat_kwargs(cookie))
|
||||||
|
|
||||||
|
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
||||||
|
"""
|
||||||
|
Downloads webpage (if needed) and executes JS
|
||||||
|
|
||||||
|
Params:
|
||||||
|
url: website url
|
||||||
|
html: optional, html code of website
|
||||||
|
video_id: video id
|
||||||
|
note: optional, displayed when downloading webpage
|
||||||
|
note2: optional, displayed when executing JS
|
||||||
|
headers: custom http headers
|
||||||
|
jscode: code to be executed when page is loaded
|
||||||
|
|
||||||
|
Returns tuple with:
|
||||||
|
* downloaded website (after JS execution)
|
||||||
|
* anything you print with `console.log` (but not inside `page.execute`!)
|
||||||
|
|
||||||
|
In most cases you don't need to add any `jscode`.
|
||||||
|
It is executed in `page.onLoadFinished`.
|
||||||
|
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
|
||||||
|
It is possible to wait for some element on the webpage, for example:
|
||||||
|
var check = function() {
|
||||||
|
var elementFound = page.evaluate(function() {
|
||||||
|
return document.querySelector('#b.done') !== null;
|
||||||
|
});
|
||||||
|
if(elementFound)
|
||||||
|
saveAndExit();
|
||||||
|
else
|
||||||
|
window.setTimeout(check, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
page.evaluate(function(){
|
||||||
|
document.querySelector('#a').click();
|
||||||
|
});
|
||||||
|
check();
|
||||||
|
"""
|
||||||
|
if 'saveAndExit();' not in jscode:
|
||||||
|
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||||
|
if not html:
|
||||||
|
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
|
||||||
|
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||||
|
f.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
self._save_cookies(url)
|
||||||
|
|
||||||
|
replaces = self.options
|
||||||
|
replaces['url'] = url
|
||||||
|
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||||
|
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||||
|
replaces['jscode'] = jscode
|
||||||
|
|
||||||
|
for x in self._TMP_FILE_NAMES:
|
||||||
|
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||||
|
|
||||||
|
with open(self._TMP_FILES['script'].name, 'wb') as f:
|
||||||
|
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
|
||||||
|
|
||||||
|
if video_id is None:
|
||||||
|
self.extractor.to_screen('%s' % (note2,))
|
||||||
|
else:
|
||||||
|
self.extractor.to_screen('%s: %s' % (video_id, note2))
|
||||||
|
|
||||||
|
p = subprocess.Popen([
|
||||||
|
self.exe, '--ssl-protocol=any',
|
||||||
|
self._TMP_FILES['script'].name
|
||||||
|
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
out, err = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Executing JS failed\n:' + encodeArgument(err))
|
||||||
|
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||||
|
html = f.read().decode('utf-8')
|
||||||
|
|
||||||
|
self._load_cookies()
|
||||||
|
|
||||||
|
return (html, encodeArgument(out))
|
||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
@ -58,6 +288,8 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
@ -66,47 +298,22 @@ class OpenloadIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
url = 'https://openload.co/embed/%s/' % video_id
|
||||||
|
headers = {
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
|
}
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||||
|
|
||||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||||
|
|
||||||
ol_id = self._search_regex(
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||||
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
|
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||||
webpage, 'openload ID')
|
|
||||||
|
|
||||||
decoded = ''
|
decoded_id = get_element_by_id('streamurl', webpage)
|
||||||
a = ol_id[0:24]
|
|
||||||
b = []
|
|
||||||
for i in range(0, len(a), 8):
|
|
||||||
b.append(int(a[i:i + 8] or '0', 16))
|
|
||||||
ol_id = ol_id[24:]
|
|
||||||
j = 0
|
|
||||||
k = 0
|
|
||||||
while j < len(ol_id):
|
|
||||||
c = 128
|
|
||||||
d = 0
|
|
||||||
e = 0
|
|
||||||
f = 0
|
|
||||||
_more = True
|
|
||||||
while _more:
|
|
||||||
if j + 1 >= len(ol_id):
|
|
||||||
c = 143
|
|
||||||
f = int(ol_id[j:j + 2] or '0', 16)
|
|
||||||
j += 2
|
|
||||||
d += (f & 127) << e
|
|
||||||
e += 7
|
|
||||||
_more = f >= c
|
|
||||||
g = d ^ b[k % 3]
|
|
||||||
for i in range(4):
|
|
||||||
char_dec = (g >> 8 * i) & (c + 127)
|
|
||||||
char = compat_chr(char_dec)
|
|
||||||
if char != '#':
|
|
||||||
decoded += char
|
|
||||||
k += 1
|
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/%s?mime=true'
|
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||||
video_url = video_url % decoded
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
@ -114,15 +321,17 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'description', webpage, 'title', fatal=True)
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
subtitles = entries[0]['subtitles'] if entries else None
|
entry = entries[0] if entries else {}
|
||||||
|
subtitles = entry.get('subtitles')
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
# Seems all videos have extensions in their titles
|
# Seems all videos have extensions in their titles
|
||||||
'ext': determine_ext(title, 'mp4'),
|
'ext': determine_ext(title, 'mp4'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
return info_dict
|
return info_dict
|
||||||
|
@ -6,14 +6,15 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
|
||||||
unified_strdate,
|
|
||||||
strip_jsonp,
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
|
orderedSet,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFFM4StoryIE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:fm4:story'
|
||||||
|
IE_DESC = 'fm4.orf.at stories'
|
||||||
|
_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://fm4.orf.at/stories/2865738/',
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '547792',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Manu Delago und Inner Tongue live',
|
||||||
|
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||||
|
'duration': 1748.52,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20170913',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'md5': 'c6dd2179731f86f4f55a7b49899d515f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '547798',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Manu Delago und Inner Tongue live (2)',
|
||||||
|
'duration': 1504.08,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20170913',
|
||||||
|
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
story_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, story_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
||||||
|
for idx, video_id in enumerate(all_ids):
|
||||||
|
data = self._download_json(
|
||||||
|
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||||
|
video_id)[0]
|
||||||
|
|
||||||
|
duration = float_or_none(data['duration'], 1000)
|
||||||
|
|
||||||
|
video = data['sources']['q8c']
|
||||||
|
load_balancer_url = video['loadBalancerUrl']
|
||||||
|
abr = int_or_none(video.get('audioBitrate'))
|
||||||
|
vbr = int_or_none(video.get('bitrate'))
|
||||||
|
fps = int_or_none(video.get('videoFps'))
|
||||||
|
width = int_or_none(video.get('videoWidth'))
|
||||||
|
height = int_or_none(video.get('videoHeight'))
|
||||||
|
thumbnail = video.get('preview')
|
||||||
|
|
||||||
|
rendition = self._download_json(
|
||||||
|
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
|
f = {
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'fps': fps,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in rendition['redirect'].items():
|
||||||
|
if format_id == 'rtmp':
|
||||||
|
ff = f.copy()
|
||||||
|
ff.update({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
formats.append(ff)
|
||||||
|
elif determine_ext(format_url) == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_id))
|
||||||
|
elif determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
|
||||||
|
if idx >= 1:
|
||||||
|
# Titles are duplicates, make them unique
|
||||||
|
title += ' (' + str(idx + 1) + ')'
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'dc.date', webpage, 'upload date'))
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
78
youtube_dl/extractor/popcorntv.py
Normal file
78
youtube_dl/extractor/popcorntv.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PopcornTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
|
||||||
|
'md5': '47d65a48d147caf692ab8562fe630b45',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9183',
|
||||||
|
'display_id': 'food-wars-battaglie-culinarie-episodio-01',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
|
||||||
|
'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1497610857,
|
||||||
|
'upload_date': '20170616',
|
||||||
|
'duration': 1440,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id, video_id = mobj.group('display_id', 'id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
m3u8_url = extract_attributes(
|
||||||
|
self._search_regex(
|
||||||
|
r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
|
||||||
|
webpage, 'content'
|
||||||
|
))['href']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
|
||||||
|
'title', default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
timestamp = unified_timestamp(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'timestamp'))
|
||||||
|
print(self._html_search_meta(
|
||||||
|
'duration', webpage))
|
||||||
|
duration = int_or_none(self._html_search_meta(
|
||||||
|
'duration', webpage), invscale=60)
|
||||||
|
view_count = int_or_none(self._html_search_meta(
|
||||||
|
'interactionCount', webpage, 'view count'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -7,43 +7,84 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
unified_strdate,
|
bool_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RutubeIE(InfoExtractor):
|
class RutubeBaseIE(InfoExtractor):
|
||||||
|
def _extract_video(self, video, video_id=None, require_title=True):
|
||||||
|
title = video['title'] if require_title else video.get('title')
|
||||||
|
|
||||||
|
age_limit = video.get('is_adult')
|
||||||
|
if age_limit is not None:
|
||||||
|
age_limit = 18 if age_limit is True else 0
|
||||||
|
|
||||||
|
uploader_id = try_get(video, lambda x: x['author']['id'])
|
||||||
|
category = try_get(video, lambda x: x['category']['name'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video.get('id') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': video.get('thumbnail_url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'uploader': try_get(video, lambda x: x['author']['name']),
|
||||||
|
'uploader_id': compat_str(uploader_id) if uploader_id else None,
|
||||||
|
'timestamp': unified_timestamp(video.get('created_ts')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'view_count': int_or_none(video.get('hits')),
|
||||||
|
'comment_count': int_or_none(video.get('comments_count')),
|
||||||
|
'is_live': bool_or_none(video.get('is_livestream')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube'
|
IE_NAME = 'rutube'
|
||||||
IE_DESC = 'Rutube videos'
|
IE_DESC = 'Rutube videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
|
'md5': '79938ade01294ef7e27574890d0d3769',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 80,
|
||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
|
'timestamp': 1381943602,
|
||||||
'upload_date': '20131016',
|
'upload_date': '20131016',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# It requires ffmpeg (m3u8 download)
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [mobj.group('url') for mobj in re.finditer(
|
return [mobj.group('url') for mobj in re.finditer(
|
||||||
@ -52,12 +93,12 @@ class RutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
# Some videos don't have the author field
|
info = self._extract_video(video, video_id)
|
||||||
author = video.get('author') or {}
|
|
||||||
|
|
||||||
options = self._download_json(
|
options = self._download_json(
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||||
@ -79,19 +120,8 @@ class RutubeIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info['formats'] = formats
|
||||||
'id': video['id'],
|
return info
|
||||||
'title': video['title'],
|
|
||||||
'description': video['description'],
|
|
||||||
'duration': video['duration'],
|
|
||||||
'view_count': video['hits'],
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': video['thumbnail_url'],
|
|
||||||
'uploader': author.get('name'),
|
|
||||||
'uploader_id': compat_str(author['id']) if author else None,
|
|
||||||
'upload_date': unified_strdate(video['created_ts']),
|
|
||||||
'age_limit': 18 if video['is_adult'] else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeEmbedIE(InfoExtractor):
|
class RutubeEmbedIE(InfoExtractor):
|
||||||
@ -103,7 +133,8 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
|
'timestamp': 1387830582,
|
||||||
'upload_date': '20131223',
|
'upload_date': '20131223',
|
||||||
'uploader_id': '297833',
|
'uploader_id': '297833',
|
||||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||||
@ -111,7 +142,7 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'Requires ffmpeg',
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/8083783',
|
'url': 'http://rutube.ru/play/embed/8083783',
|
||||||
@ -125,10 +156,51 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
canonical_url = self._html_search_regex(
|
canonical_url = self._html_search_regex(
|
||||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||||
'Canonical URL')
|
'Canonical URL')
|
||||||
return self.url_result(canonical_url, 'Rutube')
|
return self.url_result(canonical_url, RutubeIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class RutubeChannelIE(InfoExtractor):
|
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||||
|
def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
|
||||||
|
return self._PAGE_TEMPLATE % (playlist_id, page_num)
|
||||||
|
|
||||||
|
def _entries(self, playlist_id, *args, **kwargs):
|
||||||
|
next_page_url = None
|
||||||
|
for pagenum in itertools.count(1):
|
||||||
|
page = self._download_json(
|
||||||
|
next_page_url or self._next_page_url(
|
||||||
|
pagenum, playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, 'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
|
results = page.get('results')
|
||||||
|
if not results or not isinstance(results, list):
|
||||||
|
break
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
video_url = result.get('video_url')
|
||||||
|
if not video_url or not isinstance(video_url, compat_str):
|
||||||
|
continue
|
||||||
|
entry = self._extract_video(result, require_title=False)
|
||||||
|
entry.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': video_url,
|
||||||
|
'ie_key': RutubeIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
next_page_url = page.get('next')
|
||||||
|
if not next_page_url or not page.get('has_next'):
|
||||||
|
break
|
||||||
|
|
||||||
|
def _extract_playlist(self, playlist_id, *args, **kwargs):
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, kwargs.get('playlist_name'))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_playlist(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeChannelIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:channel'
|
IE_NAME = 'rutube:channel'
|
||||||
IE_DESC = 'Rutube channels'
|
IE_DESC = 'Rutube channels'
|
||||||
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||||
@ -142,27 +214,8 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||||
|
|
||||||
def _extract_videos(self, channel_id, channel_title=None):
|
|
||||||
entries = []
|
|
||||||
for pagenum in itertools.count(1):
|
|
||||||
page = self._download_json(
|
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
|
||||||
results = page['results']
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
|
|
||||||
if not page['has_next']:
|
|
||||||
break
|
|
||||||
return self.playlist_result(entries, channel_id, channel_title)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
channel_id = mobj.group('id')
|
|
||||||
return self._extract_videos(channel_id)
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeMovieIE(RutubeChannelIE):
|
|
||||||
IE_NAME = 'rutube:movie'
|
IE_NAME = 'rutube:movie'
|
||||||
IE_DESC = 'Rutube movies'
|
IE_DESC = 'Rutube movies'
|
||||||
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||||
@ -176,11 +229,11 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
movie = self._download_json(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie_name = movie['name']
|
return self._extract_playlist(
|
||||||
return self._extract_videos(movie_id, movie_name)
|
movie_id, playlist_name=movie.get('name'))
|
||||||
|
|
||||||
|
|
||||||
class RutubePersonIE(RutubeChannelIE):
|
class RutubePersonIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:person'
|
IE_NAME = 'rutube:person'
|
||||||
IE_DESC = 'Rutube person videos'
|
IE_DESC = 'Rutube person videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
||||||
@ -193,3 +246,37 @@ class RutubePersonIE(RutubeChannelIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
|
||||||
|
class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||||
|
IE_NAME = 'rutube:playlist'
|
||||||
|
IE_DESC = 'Rutube playlists'
|
||||||
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3097',
|
||||||
|
},
|
||||||
|
'playlist_count': 27,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
if not super(RutubePlaylistIE, cls).suitable(url):
|
||||||
|
return False
|
||||||
|
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||||
|
|
||||||
|
def _next_page_url(self, page_num, playlist_id, item_kind):
|
||||||
|
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
playlist_kind = qs['pl_type'][0]
|
||||||
|
playlist_id = qs['pl_id'][0]
|
||||||
|
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
|
||||||
|
@ -18,7 +18,7 @@ class TV4IE(InfoExtractor):
|
|||||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||||
tv4play\.se/
|
tv4play\.se/
|
||||||
(?:
|
(?:
|
||||||
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
||||||
iframe/video/|
|
iframe/video/|
|
||||||
film/|
|
film/|
|
||||||
sport/|
|
sport/|
|
||||||
@ -63,6 +63,10 @@ class TV4IE(InfoExtractor):
|
|||||||
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -15,7 +15,9 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||||
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
geo_country = self._search_regex(
|
geo_country = self._search_regex(
|
||||||
r'https?://[^/]+\.([a-z]{2})', url,
|
r'https?://[^/]+\.([a-z]{2})', url,
|
||||||
@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor):
|
|||||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
|
return self.url_result(
|
||||||
|
smuggle_url(
|
||||||
|
'mtg:%s' % video_id,
|
||||||
|
{'geo_countries': [
|
||||||
|
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
|
||||||
|
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class TwentyFourVideoIE(InfoExtractor):
|
class TwentyFourVideoIE(InfoExtractor):
|
||||||
IE_NAME = '24video'
|
IE_NAME = '24video'
|
||||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.24video.net/video/view/1044982',
|
'url': 'http://www.24video.net/video/view/1044982',
|
||||||
@ -60,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
duration = int_or_none(self._og_search_property(
|
duration = int_or_none(self._og_search_property(
|
||||||
'duration', webpage, 'duration', fatal=False))
|
'duration', webpage, 'duration', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
|
r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
|
||||||
webpage, 'upload date'))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
|
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
|
||||||
@ -72,7 +72,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
# Sets some cookies
|
# Sets some cookies
|
||||||
self._download_xml(
|
self._download_xml(
|
||||||
|
@ -28,7 +28,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TwitchBaseIE(InfoExtractor):
|
class TwitchBaseIE(InfoExtractor):
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
|
_VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
|
||||||
|
|
||||||
_API_BASE = 'https://api.twitch.tv'
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||||
@ -217,7 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
(?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||||
player\.twitch\.tv/\?.*?\bvideo=v
|
player\.twitch\.tv/\?.*?\bvideo=v
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
@ -458,7 +458,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?twitch\.tv/|
|
(?:(?:www|go)\.)?twitch\.tv/|
|
||||||
player\.twitch\.tv/\?.*?\bchannel=
|
player\.twitch\.tv/\?.*?\bchannel=
|
||||||
)
|
)
|
||||||
(?P<id>[^/#?]+)
|
(?P<id>[^/#?]+)
|
||||||
@ -489,6 +489,9 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://go.twitch.tv/food',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -229,7 +229,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
||||||
duration = float_or_none(config.get('duration')) or duration
|
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -242,8 +242,9 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
|
|
||||||
class TwitterIE(InfoExtractor):
|
class TwitterIE(InfoExtractor):
|
||||||
IE_NAME = 'twitter'
|
IE_NAME = 'twitter'
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
|
||||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||||
|
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
@ -255,6 +256,7 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
||||||
'uploader': 'FREE THE NIPPLE',
|
'uploader': 'FREE THE NIPPLE',
|
||||||
'uploader_id': 'freethenipple',
|
'uploader_id': 'freethenipple',
|
||||||
|
'duration': 12.922,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -305,11 +307,12 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '700207533655363584',
|
'id': '700207533655363584',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel',
|
'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel',
|
||||||
'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'uploader': 'Donte',
|
'uploader': 'あかさ',
|
||||||
'uploader_id': 'jaydingeer',
|
'uploader_id': 'jaydingeer',
|
||||||
|
'duration': 30.0,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -320,9 +323,9 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MIOxnrUteUd',
|
'id': 'MIOxnrUteUd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FilmDrunk - Vine of the day',
|
'title': 'Vince Mancini - Vine of the day',
|
||||||
'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
||||||
'uploader': 'FilmDrunk',
|
'uploader': 'Vince Mancini',
|
||||||
'uploader_id': 'Filmdrunk',
|
'uploader_id': 'Filmdrunk',
|
||||||
'timestamp': 1402826626,
|
'timestamp': 1402826626,
|
||||||
'upload_date': '20140615',
|
'upload_date': '20140615',
|
||||||
@ -337,6 +340,7 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
||||||
'uploader_id': 'captainamerica',
|
'uploader_id': 'captainamerica',
|
||||||
'uploader': 'Captain America',
|
'uploader': 'Captain America',
|
||||||
|
'duration': 3.17,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -364,10 +368,26 @@ class TwitterIE(InfoExtractor):
|
|||||||
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
||||||
'uploader': 'عالم الأخبار',
|
'uploader': 'عالم الأخبار',
|
||||||
'uploader_id': 'news_al3alm',
|
'uploader_id': 'news_al3alm',
|
||||||
|
'duration': 277.4,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=http-]',
|
'format': 'best[format_id^=http-]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '910031516746514432',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
|
||||||
|
'uploader': 'Préfet de Guadeloupe',
|
||||||
|
'uploader_id': 'Prefet971',
|
||||||
|
'duration': 47.48,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -376,11 +396,15 @@ class TwitterIE(InfoExtractor):
|
|||||||
twid = mobj.group('id')
|
twid = mobj.group('id')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
self._TEMPLATE_URL % (user_id, twid), twid)
|
self._TEMPLATE_STATUSES_URL % twid, twid)
|
||||||
|
|
||||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||||
|
|
||||||
|
if user_id is None:
|
||||||
|
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||||
|
user_id = mobj.group('user_id')
|
||||||
|
|
||||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||||
|
|
||||||
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
||||||
|
@ -42,7 +42,7 @@ class VGTVIE(XstreamIE):
|
|||||||
)
|
)
|
||||||
/?
|
/?
|
||||||
(?:
|
(?:
|
||||||
\#!/(?:video|live)/|
|
(?:\#!/)?(?:video|live)/|
|
||||||
embed?.*id=|
|
embed?.*id=|
|
||||||
articles/
|
articles/
|
||||||
)|
|
)|
|
||||||
@ -146,7 +146,11 @@ class VGTVIE(XstreamIE):
|
|||||||
{
|
{
|
||||||
'url': 'abtv:140026',
|
'url': 'abtv:140026',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -23,9 +23,9 @@ class VikiBaseIE(InfoExtractor):
|
|||||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
||||||
_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
|
_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
|
||||||
|
|
||||||
_APP = '65535a'
|
_APP = '100005a'
|
||||||
_APP_VERSION = '2.2.5.1428709186'
|
_APP_VERSION = '2.2.5.1428709186'
|
||||||
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
|
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||||
|
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_NETRC_MACHINE = 'viki'
|
_NETRC_MACHINE = 'viki'
|
||||||
|
@ -221,7 +221,7 @@ class XHamsterEmbedIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
|
r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
|
||||||
webpage, 'xhamster url', default=None)
|
webpage, 'xhamster url', default=None)
|
||||||
|
|
||||||
if not video_url:
|
if not video_url:
|
||||||
|
@ -847,7 +847,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--convert-subs', '--convert-subtitles',
|
'--convert-subs', '--convert-subtitles',
|
||||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||||
help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
|
help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
|
||||||
|
|
||||||
parser.add_option_group(general)
|
parser.add_option_group(general)
|
||||||
parser.add_option_group(network)
|
parser.add_option_group(network)
|
||||||
|
@ -585,7 +585,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
dfxp_file = old_file
|
dfxp_file = old_file
|
||||||
srt_file = subtitles_filename(filename, lang, 'srt')
|
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||||
|
|
||||||
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
with open(dfxp_file, 'rb') as f:
|
||||||
srt_data = dfxp2srt(f.read())
|
srt_data = dfxp2srt(f.read())
|
||||||
|
|
||||||
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
||||||
|
@ -1815,6 +1815,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def bool_or_none(v, default=None):
|
||||||
|
return v if isinstance(v, bool) else default
|
||||||
|
|
||||||
|
|
||||||
def strip_or_none(v):
|
def strip_or_none(v):
|
||||||
return None if v is None else v.strip()
|
return None if v is None else v.strip()
|
||||||
|
|
||||||
@ -2568,14 +2572,18 @@ def srt_subtitles_timecode(seconds):
|
|||||||
|
|
||||||
|
|
||||||
def dfxp2srt(dfxp_data):
|
def dfxp2srt(dfxp_data):
|
||||||
|
'''
|
||||||
|
@param dfxp_data A bytes-like object containing DFXP data
|
||||||
|
@returns A unicode object containing converted SRT data
|
||||||
|
'''
|
||||||
LEGACY_NAMESPACES = (
|
LEGACY_NAMESPACES = (
|
||||||
('http://www.w3.org/ns/ttml', [
|
(b'http://www.w3.org/ns/ttml', [
|
||||||
'http://www.w3.org/2004/11/ttaf1',
|
b'http://www.w3.org/2004/11/ttaf1',
|
||||||
'http://www.w3.org/2006/04/ttaf1',
|
b'http://www.w3.org/2006/04/ttaf1',
|
||||||
'http://www.w3.org/2006/10/ttaf1',
|
b'http://www.w3.org/2006/10/ttaf1',
|
||||||
]),
|
]),
|
||||||
('http://www.w3.org/ns/ttml#styling', [
|
(b'http://www.w3.org/ns/ttml#styling', [
|
||||||
'http://www.w3.org/ns/ttml#style',
|
b'http://www.w3.org/ns/ttml#style',
|
||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -2670,7 +2678,7 @@ def dfxp2srt(dfxp_data):
|
|||||||
for ns in v:
|
for ns in v:
|
||||||
dfxp_data = dfxp_data.replace(ns, k)
|
dfxp_data = dfxp_data.replace(ns, k)
|
||||||
|
|
||||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
dfxp = compat_etree_fromstring(dfxp_data)
|
||||||
out = []
|
out = []
|
||||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.09.02'
|
__version__ = '2017.09.24'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user