mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-13 14:42:52 +08:00
Merge branch 'master' into Vimeo-issue-16717
This commit is contained in:
commit
899387c453
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.29**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.07.21
|
[debug] youtube-dl version 2018.07.29
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
7
AUTHORS
7
AUTHORS
@ -239,3 +239,10 @@ Martin Weinelt
|
|||||||
Surya Oktafendri
|
Surya Oktafendri
|
||||||
TingPing
|
TingPing
|
||||||
Alexandre Macabies
|
Alexandre Macabies
|
||||||
|
Bastian de Groot
|
||||||
|
Niklas Haas
|
||||||
|
András Veres-Szentkirályi
|
||||||
|
Enes Solak
|
||||||
|
Nathan Rossi
|
||||||
|
Thomas van der Berg
|
||||||
|
Luca Cherubin
|
||||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
|||||||
|
version 2018.07.29
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||||
|
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||||
|
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||||
|
* [dailymotion:playlist] Fix extraction (#16894)
|
||||||
|
* [ted] Improve extraction
|
||||||
|
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||||
|
* [telecinco] Fix extraction (#17080)
|
||||||
|
* [mitele] Reduce number of requests
|
||||||
|
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||||
|
* [vk] Fix extraction for inline only videos (#16923)
|
||||||
|
* [streamcloud] Fix extraction (#17054)
|
||||||
|
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||||
|
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||||
|
|
||||||
|
|
||||||
version 2018.07.21
|
version 2018.07.21
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
|||||||
|
|
||||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||||
|
|
||||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||||
|
|
||||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||||
|
|
||||||
|
@ -672,6 +672,8 @@
|
|||||||
- **PrimeShareTV**
|
- **PrimeShareTV**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
|
- **puhutv**
|
||||||
|
- **puhutv:serie**
|
||||||
- **Puls4**
|
- **Puls4**
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **qqmusic**: QQ音乐
|
- **qqmusic**: QQ音乐
|
||||||
|
@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
|
|
||||||
for user_agent in (None, USER_AGENTS['Safari']):
|
for user_agent in (None, USER_AGENTS['Safari']):
|
||||||
req = sanitized_Request(
|
req = sanitized_Request(
|
||||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||||
data=urlencode_postdata(data))
|
data=urlencode_postdata(data))
|
||||||
|
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
@ -262,6 +262,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
|||||||
# Just test metadata extraction
|
# Just test metadata extraction
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.crunchyroll.com/media-723735',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
@ -580,7 +583,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||||
IE_NAME = 'crunchyroll:playlist'
|
IE_NAME = 'crunchyroll:playlist'
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
@ -16,11 +17,13 @@ from ..utils import (
|
|||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
mimetype2ext,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -343,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||||
IE_NAME = 'dailymotion:playlist'
|
IE_NAME = 'dailymotion:playlist'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
|
||||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SPORT',
|
'title': 'SPORT',
|
||||||
'id': 'xv4bw_nqtv_sport',
|
'id': 'xv4bw',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||||
|
page += 1
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://graphql.api.dailymotion.com',
|
||||||
|
playlist_id, 'Downloading page %d' % page,
|
||||||
|
data=json.dumps({
|
||||||
|
'query': '''{
|
||||||
|
collection(xid: "%s") {
|
||||||
|
videos(first: %d, page: %d) {
|
||||||
|
pageInfo {
|
||||||
|
hasNextPage
|
||||||
|
nextPage
|
||||||
|
}
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
xid
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||||
|
}).encode(), headers={
|
||||||
|
'Authorization': authorizaion,
|
||||||
|
'Origin': 'https://www.dailymotion.com',
|
||||||
|
})['data']['collection']['videos']
|
||||||
|
for edge in videos['edges']:
|
||||||
|
node = edge['node']
|
||||||
|
yield self.url_result(
|
||||||
|
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
api = self._parse_json(self._search_regex(
|
||||||
|
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||||
|
webpage, 'player config'), playlist_id)['context']['api']
|
||||||
|
auth = self._download_json(
|
||||||
|
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||||
|
playlist_id, data=urlencode_postdata({
|
||||||
|
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||||
|
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||||
|
'grant_type': 'client_credentials',
|
||||||
|
}))
|
||||||
|
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id,
|
||||||
|
self._og_search_title(webpage))
|
||||||
|
|
||||||
|
|
||||||
|
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||||
|
IE_NAME = 'dailymotion:user'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||||
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||||
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nqtv',
|
||||||
|
'title': 'Rémi Gaillard',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UnderProject',
|
||||||
|
'title': 'UnderProject',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1800,
|
||||||
|
'expected_warnings': [
|
||||||
|
'Stopped at duplicated page',
|
||||||
|
],
|
||||||
|
'skip': 'Takes too long time',
|
||||||
|
}]
|
||||||
|
|
||||||
def _extract_entries(self, id):
|
def _extract_entries(self, id):
|
||||||
video_ids = set()
|
video_ids = set()
|
||||||
@ -379,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||||
break
|
break
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': playlist_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'entries': self._extract_entries(playlist_id),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
|
||||||
IE_NAME = 'dailymotion:user'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
|
||||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'nqtv',
|
|
||||||
'title': 'Rémi Gaillard',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 100,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'UnderProject',
|
|
||||||
'title': 'UnderProject',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 1800,
|
|
||||||
'expected_warnings': [
|
|
||||||
'Stopped at duplicated page',
|
|
||||||
],
|
|
||||||
'skip': 'Takes too long time',
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
|
@ -860,6 +860,10 @@ from .pornhub import (
|
|||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornovoisines import PornoVoisinesIE
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .puhutv import (
|
||||||
|
PuhuTVIE,
|
||||||
|
PuhuTVSerieIE,
|
||||||
|
)
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
from .primesharetv import PrimeShareTVIE
|
from .primesharetv import PrimeShareTVIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
|
@ -355,7 +355,6 @@ class FacebookIE(InfoExtractor):
|
|||||||
tahoe_data = self._download_webpage(
|
tahoe_data = self._download_webpage(
|
||||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
'__user': 0,
|
|
||||||
'__a': 1,
|
'__a': 1,
|
||||||
'__pc': self._search_regex(
|
'__pc': self._search_regex(
|
||||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||||
@ -363,6 +362,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
'__rev': self._search_regex(
|
'__rev': self._search_regex(
|
||||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||||
'client revision', default='3944515'),
|
'client revision', default='3944515'),
|
||||||
|
'fb_dtsg': self._search_regex(
|
||||||
|
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||||
|
webpage, 'dtsg token', default=''),
|
||||||
}),
|
}),
|
||||||
headers={
|
headers={
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
@ -1,84 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
extract_attributes,
|
|
||||||
determine_ext,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MiTeleBaseIE(InfoExtractor):
|
|
||||||
def _get_player_info(self, url, webpage):
|
|
||||||
player_data = extract_attributes(self._search_regex(
|
|
||||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
|
||||||
webpage, 'ms video player'))
|
|
||||||
video_id = player_data['data-media-id']
|
|
||||||
if player_data.get('data-cms-id') == 'ooyala':
|
|
||||||
return self.url_result(
|
|
||||||
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
|
||||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
|
||||||
config = self._download_json(
|
|
||||||
config_url, video_id, 'Downloading config JSON')
|
|
||||||
mmc_url = config['services']['mmc']
|
|
||||||
|
|
||||||
duration = None
|
|
||||||
formats = []
|
|
||||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
|
||||||
mmc = self._download_json(
|
|
||||||
m_url, video_id, 'Downloading mmc JSON')
|
|
||||||
if not duration:
|
|
||||||
duration = int_or_none(mmc.get('duration'))
|
|
||||||
for location in mmc['locations']:
|
|
||||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
|
||||||
gcp = location.get('gcp')
|
|
||||||
ogn = location.get('ogn')
|
|
||||||
if None in (gat, gcp, ogn):
|
|
||||||
continue
|
|
||||||
token_data = {
|
|
||||||
'gcp': gcp,
|
|
||||||
'ogn': ogn,
|
|
||||||
'sta': 0,
|
|
||||||
}
|
|
||||||
media = self._download_json(
|
|
||||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
|
||||||
headers={
|
|
||||||
'Content-Type': 'application/json;charset=utf-8',
|
|
||||||
'Referer': url,
|
|
||||||
})
|
|
||||||
stream = media.get('stream') or media.get('file')
|
|
||||||
if not stream:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(stream)
|
|
||||||
if ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
|
||||||
video_id, f4m_id='hds', fatal=False))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
stream, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
|
||||||
'duration': duration,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MiTeleIE(InfoExtractor):
|
class MiTeleIE(InfoExtractor):
|
||||||
IE_DESC = 'mitele.es'
|
IE_DESC = 'mitele.es'
|
||||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||||
@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '57b0dfb9c715da65618b4afa',
|
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tor, la web invisible',
|
'title': 'Tor, la web invisible',
|
||||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||||
@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
# no explicit title
|
# no explicit title
|
||||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '57b0de3dc915da14058b4876',
|
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
||||||
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
||||||
@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
gigya_url = self._search_regex(
|
|
||||||
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
|
|
||||||
webpage, 'gigya', default=None)
|
|
||||||
gigya_sc = self._download_webpage(
|
|
||||||
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
|
|
||||||
video_id, 'Downloading gigya script')
|
|
||||||
|
|
||||||
# Get a appKey/uuid for getting the session key
|
|
||||||
appKey = self._search_regex(
|
|
||||||
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
|
||||||
gigya_sc, 'appKey')
|
|
||||||
|
|
||||||
session_json = self._download_json(
|
|
||||||
'https://appgrid-api.cloud.accedo.tv/session',
|
|
||||||
video_id, 'Downloading session keys', query={
|
|
||||||
'appKey': appKey,
|
|
||||||
'uuid': compat_str(uuid.uuid4()),
|
|
||||||
})
|
|
||||||
|
|
||||||
paths = self._download_json(
|
paths = self._download_json(
|
||||||
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
|
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
|
||||||
video_id, 'Downloading paths JSON',
|
video_id, 'Downloading paths JSON')
|
||||||
query={'sessionKey': compat_str(session_json['sessionKey'])})
|
|
||||||
|
|
||||||
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
||||||
|
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
|
||||||
|
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
|
||||||
source = self._download_json(
|
source = self._download_json(
|
||||||
'http://%s%s%s/docs/%s' % (
|
'%s://%s%s%s/docs/%s' % (
|
||||||
ooyala_s['base_url'], ooyala_s['full_path'],
|
ooyala_s.get('protocol', 'https'), base_url, full_path,
|
||||||
ooyala_s['provider_id'], video_id),
|
ooyala_s.get('provider_id', '104951'), video_id),
|
||||||
video_id, 'Downloading data JSON', query={
|
video_id, 'Downloading data JSON', query={
|
||||||
'include_titles': 'Series,Season',
|
'include_titles': 'Series,Season',
|
||||||
'product_name': 'test',
|
'product_name': ooyala_s.get('product_name', 'test'),
|
||||||
'format': 'full',
|
'format': 'full',
|
||||||
})['hits']['hits'][0]['_source']
|
})['hits']['hits'][0]['_source']
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
orderedSet,
|
orderedSet,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -68,6 +69,31 @@ class PornHubIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# subtitles
|
||||||
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ph5af5fef7c2aa7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
||||||
|
'uploader': 'BFFs',
|
||||||
|
'duration': 622,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
'tags': list,
|
||||||
|
'categories': list,
|
||||||
|
'subtitles': {
|
||||||
|
'en': [{
|
||||||
|
"ext": 'srt'
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -139,12 +165,19 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
video_urls = []
|
video_urls = []
|
||||||
video_urls_set = set()
|
video_urls_set = set()
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
flashvars = self._parse_json(
|
flashvars = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||||
video_id)
|
video_id)
|
||||||
if flashvars:
|
if flashvars:
|
||||||
|
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': subtitle_url,
|
||||||
|
'ext': 'srt',
|
||||||
|
})
|
||||||
thumbnail = flashvars.get('image_url')
|
thumbnail = flashvars.get('image_url')
|
||||||
duration = int_or_none(flashvars.get('video_duration'))
|
duration = int_or_none(flashvars.get('video_duration'))
|
||||||
media_definitions = flashvars.get('mediaDefinitions')
|
media_definitions = flashvars.get('mediaDefinitions')
|
||||||
@ -256,6 +289,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
247
youtube_dl/extractor/puhutv.py
Normal file
247
youtube_dl/extractor/puhutv.py
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PuhuTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
|
||||||
|
IE_NAME = 'puhutv'
|
||||||
|
_TESTS = [{
|
||||||
|
# film
|
||||||
|
'url': 'https://puhutv.com/sut-kardesler-izle',
|
||||||
|
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5085',
|
||||||
|
'display_id': 'sut-kardesler',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Süt Kardeşler',
|
||||||
|
'description': 'md5:405fd024df916ca16731114eb18e511a',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 4832.44,
|
||||||
|
'creator': 'Arzu Film',
|
||||||
|
'timestamp': 1469778212,
|
||||||
|
'upload_date': '20160729',
|
||||||
|
'release_year': 1976,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['Aile', 'Komedi', 'Klasikler'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode, geo restricted, bypassable with --geo-verification-proxy
|
||||||
|
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# 4k, with subtitles
|
||||||
|
'url': 'https://puhutv.com/dip-1-bolum-izle',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_SUBTITLE_LANGS = {
|
||||||
|
'English': 'en',
|
||||||
|
'Deutsch': 'de',
|
||||||
|
'عربى': 'ar'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
urljoin(url, '/api/slug/%s-izle' % display_id),
|
||||||
|
display_id)['data']
|
||||||
|
|
||||||
|
video_id = compat_str(info['id'])
|
||||||
|
title = info.get('name') or info['title']['name']
|
||||||
|
if info.get('display_name'):
|
||||||
|
title = '%s %s' % (title, info.get('display_name'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://puhutv.com/api/assets/%s/videos' % video_id,
|
||||||
|
display_id, 'Downloading video JSON',
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
raise
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos['data']['videos']:
|
||||||
|
media_url = url_or_none(video.get('url'))
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
playlist = video.get('is_playlist')
|
||||||
|
if video.get('stream_type') == 'hls' and playlist is True:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
quality = int_or_none(video.get('quality'))
|
||||||
|
f = {
|
||||||
|
'url': media_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'height': quality
|
||||||
|
}
|
||||||
|
video_format = video.get('video_format')
|
||||||
|
if video_format == 'hls' and playlist is False:
|
||||||
|
format_id = 'hls'
|
||||||
|
f['protocol'] = 'm3u8_native'
|
||||||
|
elif video_format == 'mp4':
|
||||||
|
format_id = 'http'
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if quality:
|
||||||
|
format_id += '-%sp' % quality
|
||||||
|
f['format_id'] = format_id
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = try_get(
|
||||||
|
info, lambda x: x['title']['description'],
|
||||||
|
compat_str) or info.get('description')
|
||||||
|
timestamp = unified_timestamp(info.get('created_at'))
|
||||||
|
creator = try_get(
|
||||||
|
info, lambda x: x['title']['producer']['name'], compat_str)
|
||||||
|
|
||||||
|
duration = float_or_none(
|
||||||
|
try_get(info, lambda x: x['content']['duration_in_ms'], int),
|
||||||
|
scale=1000)
|
||||||
|
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
|
||||||
|
|
||||||
|
images = try_get(
|
||||||
|
info, lambda x: x['content']['images']['wide'], dict) or {}
|
||||||
|
thumbnails = []
|
||||||
|
for image_id, image_url in images.items():
|
||||||
|
if not isinstance(image_url, compat_str):
|
||||||
|
continue
|
||||||
|
if not image_url.startswith(('http', '//')):
|
||||||
|
image_url = 'https://%s' % image_url
|
||||||
|
t = parse_resolution(image_id)
|
||||||
|
t.update({
|
||||||
|
'id': image_id,
|
||||||
|
'url': image_url
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
|
||||||
|
release_year = try_get(info, lambda x: x['title']['released_at'], int)
|
||||||
|
|
||||||
|
season_number = int_or_none(info.get('season_number'))
|
||||||
|
season_id = str_or_none(info.get('season_id'))
|
||||||
|
episode_number = int_or_none(info.get('episode_number'))
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
|
||||||
|
if not isinstance(genre, dict):
|
||||||
|
continue
|
||||||
|
genre_name = genre.get('name')
|
||||||
|
if genre_name and isinstance(genre_name, compat_str):
|
||||||
|
tags.append(genre_name)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in try_get(
|
||||||
|
info, lambda x: x['content']['subtitles'], list) or []:
|
||||||
|
if not isinstance(subtitle, dict):
|
||||||
|
continue
|
||||||
|
lang = subtitle.get('language')
|
||||||
|
sub_url = url_or_none(subtitle.get('url'))
|
||||||
|
if not lang or not isinstance(lang, compat_str) or not sub_url:
|
||||||
|
continue
|
||||||
|
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||||
|
'url': sub_url
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'season_id': season_id,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'release_year': release_year,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'creator': creator,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'tags': tags,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PuhuTVSerieIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
|
||||||
|
IE_NAME = 'puhutv:serie'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://puhutv.com/deniz-yildizi-detay',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Deniz Yıldızı',
|
||||||
|
'id': 'deniz-yildizi',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 205,
|
||||||
|
}, {
|
||||||
|
# a film detail page which is using same url with serie page
|
||||||
|
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_entries(self, seasons):
|
||||||
|
for season in seasons:
|
||||||
|
season_id = season.get('id')
|
||||||
|
if not season_id:
|
||||||
|
continue
|
||||||
|
page = 1
|
||||||
|
has_more = True
|
||||||
|
while has_more is True:
|
||||||
|
season = self._download_json(
|
||||||
|
'https://galadriel.puhutv.com/seasons/%s' % season_id,
|
||||||
|
season_id, 'Downloading page %s' % page, query={
|
||||||
|
'page': page,
|
||||||
|
'per': 40,
|
||||||
|
})
|
||||||
|
episodes = season.get('episodes')
|
||||||
|
if isinstance(episodes, list):
|
||||||
|
for ep in episodes:
|
||||||
|
slug_path = str_or_none(ep.get('slugPath'))
|
||||||
|
if not slug_path:
|
||||||
|
continue
|
||||||
|
video_id = str_or_none(int_or_none(ep.get('id')))
|
||||||
|
yield self.url_result(
|
||||||
|
'https://puhutv.com/%s' % slug_path,
|
||||||
|
ie=PuhuTVIE.ie_key(), video_id=video_id,
|
||||||
|
video_title=ep.get('name') or ep.get('eventLabel'))
|
||||||
|
page += 1
|
||||||
|
has_more = season.get('hasMore')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
urljoin(url, '/api/slug/%s-detay' % playlist_id),
|
||||||
|
playlist_id)['data']
|
||||||
|
|
||||||
|
seasons = info.get('seasons')
|
||||||
|
if seasons:
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_entries(seasons), playlist_id, info.get('name'))
|
||||||
|
|
||||||
|
# For films, these are using same url with series
|
||||||
|
video_id = info.get('slug') or info['assets'][0]['slug']
|
||||||
|
return self.url_result(
|
||||||
|
'https://puhutv.com/%s-izle' % video_id,
|
||||||
|
PuhuTVIE.ie_key(), video_id)
|
@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
def _extract_relinker_info(self, relinker_url, video_id):
|
def _extract_relinker_info(self, relinker_url, video_id):
|
||||||
|
if not re.match(r'https?://', relinker_url):
|
||||||
|
return {'formats': [{'url': relinker_url}]}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
geoprotection = None
|
geoprotection = None
|
||||||
is_live = None
|
is_live = None
|
||||||
@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Direct MMS URL
|
||||||
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_from_content_id(self, content_id, url):
|
def _extract_from_content_id(self, content_id, url):
|
||||||
|
@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': url,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,10 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
'md5': 'b0ce2b05ca215042124fbc9e3886493a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '102',
|
'id': '102',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
|
|||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
'width': 853,
|
'width': 853,
|
||||||
'duration': 1308,
|
'duration': 1308,
|
||||||
}
|
'view_count': int,
|
||||||
}, {
|
'comment_count': int,
|
||||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
'tags': list,
|
||||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
},
|
||||||
'info_dict': {
|
'params': {
|
||||||
'id': 'tSVI8ta_P4w',
|
'skip_download': True,
|
||||||
'ext': 'mp4',
|
},
|
||||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
}, {
|
||||||
'thumbnail': r're:^https?://.+\.jpg',
|
# missing HTTP bitrates
|
||||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
|
||||||
'upload_date': '20140122',
|
'info_dict': {
|
||||||
'uploader_id': 'TEDInstitute',
|
'id': '6069',
|
||||||
'uploader': 'TED Institute',
|
'ext': 'mp4',
|
||||||
|
'title': 'The beauty and power of algorithms',
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
|
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
|
||||||
|
'uploader': 'Vishal Sikka',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1972',
|
'id': '1972',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
|
|||||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||||
'duration': 1128,
|
'duration': 1128,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# YouTube video
|
# no nativeDownloads
|
||||||
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aFBIPO-P7LM',
|
'id': '1792',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
'title': 'The orchestra in my mouth',
|
||||||
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
||||||
'uploader': 'TEDx Talks',
|
'uploader': 'Tom Thum',
|
||||||
'uploader_id': 'TEDxTalks',
|
'view_count': int,
|
||||||
'upload_date': '20111216',
|
'comment_count': int,
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
|
|||||||
|
|
||||||
info = self._extract_info(webpage)
|
info = self._extract_info(webpage)
|
||||||
|
|
||||||
talk_info = try_get(
|
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
|
||||||
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
talk_info = data['talks'][0]
|
||||||
dict) or info['talks'][0]
|
|
||||||
|
|
||||||
title = talk_info['title'].strip()
|
title = talk_info['title'].strip()
|
||||||
|
|
||||||
external = talk_info.get('external')
|
|
||||||
if external:
|
|
||||||
service = external['service']
|
|
||||||
self.to_screen('Found video from %s' % service)
|
|
||||||
ext_url = None
|
|
||||||
if service.lower() == 'youtube':
|
|
||||||
ext_url = external.get('code')
|
|
||||||
return {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': ext_url or external['uri'],
|
|
||||||
}
|
|
||||||
|
|
||||||
native_downloads = try_get(
|
native_downloads = try_get(
|
||||||
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
talk_info,
|
||||||
dict) or talk_info['nativeDownloads']
|
(lambda x: x['downloads']['nativeDownloads'],
|
||||||
|
lambda x: x['nativeDownloads']),
|
||||||
|
dict) or {}
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
|
|||||||
|
|
||||||
player_talk = talk_info['player_talks'][0]
|
player_talk = talk_info['player_talks'][0]
|
||||||
|
|
||||||
|
external = player_talk.get('external')
|
||||||
|
if isinstance(external, dict):
|
||||||
|
service = external.get('service')
|
||||||
|
if isinstance(service, compat_str):
|
||||||
|
ext_url = None
|
||||||
|
if service.lower() == 'youtube':
|
||||||
|
ext_url = external.get('code')
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': ext_url or external['uri'],
|
||||||
|
}
|
||||||
|
|
||||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||||
|
|
||||||
http_url = None
|
http_url = None
|
||||||
for format_id, resources in resources_.items():
|
for format_id, resources in resources_.items():
|
||||||
|
if not isinstance(resources, dict):
|
||||||
|
continue
|
||||||
if format_id == 'h264':
|
if format_id == 'h264':
|
||||||
for resource in resources:
|
for resource in resources:
|
||||||
h264_url = resource.get('file')
|
h264_url = resource.get('file')
|
||||||
@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
|
|||||||
'tbr': int_or_none(resource.get('bitrate')),
|
'tbr': int_or_none(resource.get('bitrate')),
|
||||||
})
|
})
|
||||||
elif format_id == 'hls':
|
elif format_id == 'hls':
|
||||||
|
stream_url = url_or_none(resources.get('stream'))
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
stream_url, video_name, 'mp4', m3u8_id=format_id,
|
||||||
|
fatal=False))
|
||||||
|
|
||||||
m3u8_formats = list(filter(
|
m3u8_formats = list(filter(
|
||||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||||
@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
|
|||||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||||
if not bitrate:
|
if not bitrate:
|
||||||
continue
|
continue
|
||||||
|
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
|
||||||
|
if not self._is_valid_url(
|
||||||
|
bitrate_url, video_name, '%s bitrate' % bitrate):
|
||||||
|
continue
|
||||||
f = m3u8_format.copy()
|
f = m3u8_format.copy()
|
||||||
f.update({
|
f.update({
|
||||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
'url': bitrate_url,
|
||||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||||
'protocol': 'http',
|
'protocol': 'http',
|
||||||
})
|
})
|
||||||
@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
|
|||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': talk_info.get('duration'),
|
'duration': float_or_none(talk_info.get('duration')),
|
||||||
|
'view_count': int_or_none(data.get('viewed_count')),
|
||||||
|
'comment_count': int_or_none(
|
||||||
|
try_get(data, lambda x: x['comments']['count'])),
|
||||||
|
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, talk_info):
|
def _get_subtitles(self, video_id, talk_info):
|
||||||
|
@ -1,26 +1,43 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mitele import MiTeleBaseIE
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .ooyala import OoyalaIE
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelecincoIE(MiTeleBaseIE):
|
class TelecincoIE(InfoExtractor):
|
||||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
'id': '1876350223',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Bacalao con kokotxas al pil-pil',
|
'title': 'Bacalao con kokotxas al pil-pil',
|
||||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||||
'duration': 662,
|
|
||||||
},
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
|
||||||
|
'duration': 662,
|
||||||
|
},
|
||||||
|
}]
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _parse_content(self, content, url):
|
||||||
|
video_id = content['dataMediaId']
|
||||||
|
if content.get('dataCmsId') == 'ooyala':
|
||||||
|
return self.url_result(
|
||||||
|
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||||
|
config_url = urljoin(url, content['dataConfig'])
|
||||||
|
config = self._download_json(
|
||||||
|
config_url, video_id, 'Downloading config JSON')
|
||||||
|
title = config['info']['title']
|
||||||
|
|
||||||
|
def mmc_url(mmc_type):
|
||||||
|
return re.sub(
|
||||||
|
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||||
|
config['services']['mmc'])
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
formats = []
|
||||||
|
for mmc_type in ('flash', 'html5'):
|
||||||
|
mmc = self._download_json(
|
||||||
|
mmc_url(mmc_type), video_id,
|
||||||
|
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||||
|
if not mmc:
|
||||||
|
continue
|
||||||
|
if not duration:
|
||||||
|
duration = int_or_none(mmc.get('duration'))
|
||||||
|
for location in mmc['locations']:
|
||||||
|
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||||
|
gcp = location.get('gcp')
|
||||||
|
ogn = location.get('ogn')
|
||||||
|
if None in (gat, gcp, ogn):
|
||||||
|
continue
|
||||||
|
token_data = {
|
||||||
|
'gcp': gcp,
|
||||||
|
'ogn': ogn,
|
||||||
|
'sta': 0,
|
||||||
|
}
|
||||||
|
media = self._download_json(
|
||||||
|
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json;charset=utf-8',
|
||||||
|
'Referer': url,
|
||||||
|
}, fatal=False) or {}
|
||||||
|
stream = media.get('stream') or media.get('file')
|
||||||
|
if not stream:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(stream)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
title = self._html_search_meta(
|
article = self._parse_json(self._search_regex(
|
||||||
['og:title', 'twitter:title'], webpage, 'title')
|
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
||||||
info = self._get_player_info(url, webpage)
|
webpage, 'article'), display_id)['article']
|
||||||
|
title = article.get('title')
|
||||||
|
description = clean_html(article.get('leadParagraph'))
|
||||||
|
if article.get('editorialType') != 'VID':
|
||||||
|
entries = []
|
||||||
|
for p in article.get('body', []):
|
||||||
|
content = p.get('content')
|
||||||
|
if p.get('type') != 'video' or not content:
|
||||||
|
continue
|
||||||
|
entries.append(self._parse_content(content, url))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, str_or_none(article.get('id')), title, description)
|
||||||
|
content = article['opening']['content']
|
||||||
|
info = self._parse_content(content, url)
|
||||||
info.update({
|
info.update({
|
||||||
'display_id': display_id,
|
'description': description,
|
||||||
'title': title,
|
|
||||||
'description': self._html_search_meta(
|
|
||||||
['og:description', 'twitter:description'],
|
|
||||||
webpage, 'title', fatal=False),
|
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
@ -106,10 +107,10 @@ class VKIE(VKBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ProtivoGunz - Хуёвая песня',
|
'title': 'ProtivoGunz - Хуёвая песня',
|
||||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||||
|
'uploader_id': '-77521',
|
||||||
'duration': 195,
|
'duration': 195,
|
||||||
'timestamp': 1329060660,
|
'timestamp': 1329049880,
|
||||||
'upload_date': '20120212',
|
'upload_date': '20120212',
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -118,12 +119,12 @@ class VKIE(VKBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '165548505',
|
'id': '165548505',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Tom Cruise',
|
|
||||||
'title': 'No name',
|
'title': 'No name',
|
||||||
|
'uploader': 'Tom Cruise',
|
||||||
|
'uploader_id': '205387401',
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'timestamp': 1374374880,
|
'timestamp': 1374364108,
|
||||||
'upload_date': '20130721',
|
'upload_date': '20130720',
|
||||||
'view_count': int,
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -207,10 +208,10 @@ class VKIE(VKBaseIE):
|
|||||||
'id': 'V3K4mi0SYkc',
|
'id': 'V3K4mi0SYkc',
|
||||||
'ext': 'webm',
|
'ext': 'webm',
|
||||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||||
'description': 'md5:d9903938abdc74c738af77f527ca0596',
|
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||||
'duration': 178,
|
'duration': 179,
|
||||||
'upload_date': '20130116',
|
'upload_date': '20130116',
|
||||||
'uploader': "Children's Joy Foundation",
|
'uploader': "Children's Joy Foundation Inc.",
|
||||||
'uploader_id': 'thecjf',
|
'uploader_id': 'thecjf',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
@ -222,6 +223,7 @@ class VKIE(VKBaseIE):
|
|||||||
'id': 'k3lz2cmXyRuJQSjGHUv',
|
'id': 'k3lz2cmXyRuJQSjGHUv',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
|
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
|
||||||
|
# TODO: fix test by fixing dailymotion description extraction
|
||||||
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
|
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
|
||||||
'uploader': 'AniLibria.Tv',
|
'uploader': 'AniLibria.Tv',
|
||||||
'upload_date': '20160914',
|
'upload_date': '20160914',
|
||||||
@ -241,9 +243,12 @@ class VKIE(VKBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'S-Dance, репетиции к The way show',
|
'title': 'S-Dance, репетиции к The way show',
|
||||||
'uploader': 'THE WAY SHOW | 17 апреля',
|
'uploader': 'THE WAY SHOW | 17 апреля',
|
||||||
'timestamp': 1454870100,
|
'uploader_id': '-110305615',
|
||||||
|
'timestamp': 1454859345,
|
||||||
'upload_date': '20160207',
|
'upload_date': '20160207',
|
||||||
'view_count': int,
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -296,7 +301,7 @@ class VKIE(VKBaseIE):
|
|||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
if video_id:
|
if video_id:
|
||||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
|
||||||
# Some videos (removed?) can only be downloaded with list id specified
|
# Some videos (removed?) can only be downloaded with list id specified
|
||||||
list_id = mobj.group('list_id')
|
list_id = mobj.group('list_id')
|
||||||
if list_id:
|
if list_id:
|
||||||
@ -346,6 +351,9 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
r'<!>This video is no longer available, because its author has been blocked.':
|
r'<!>This video is no longer available, because its author has been blocked.':
|
||||||
'Video %s is no longer available, because its author has been blocked.',
|
'Video %s is no longer available, because its author has been blocked.',
|
||||||
|
|
||||||
|
r'<!>This video is no longer available, because it has been deleted.':
|
||||||
|
'Video %s is no longer available, because it has been deleted.',
|
||||||
}
|
}
|
||||||
|
|
||||||
for error_re, error_msg in ERRORS.items():
|
for error_re, error_msg in ERRORS.items():
|
||||||
@ -394,7 +402,8 @@ class VKIE(VKBaseIE):
|
|||||||
if not data:
|
if not data:
|
||||||
data = self._parse_json(
|
data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
|
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
|
||||||
|
info_page, 'json', default='{}'),
|
||||||
video_id)
|
video_id)
|
||||||
if data:
|
if data:
|
||||||
data = data['player']['params'][0]
|
data = data['player']['params'][0]
|
||||||
@ -416,7 +425,7 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
|
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
|
||||||
'upload date', fatal=False))
|
'upload date', default=None)) or int_or_none(data.get('date'))
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
|
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
|
||||||
@ -454,9 +463,12 @@ class VKIE(VKBaseIE):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': data.get('jpg'),
|
'thumbnail': data.get('jpg'),
|
||||||
'uploader': data.get('md_author'),
|
'uploader': data.get('md_author'),
|
||||||
|
'uploader_id': str_or_none(data.get('author_id')),
|
||||||
'duration': data.get('duration'),
|
'duration': data.get('duration'),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'like_count': int_or_none(data.get('liked')),
|
||||||
|
'dislike_count': int_or_none(data.get('nolikes')),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3569,7 +3569,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
|||||||
setattr(self, '%s_open' % type,
|
setattr(self, '%s_open' % type,
|
||||||
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
||||||
meth(r, proxy, type))
|
meth(r, proxy, type))
|
||||||
return compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||||
|
|
||||||
def proxy_open(self, req, proxy, type):
|
def proxy_open(self, req, proxy, type):
|
||||||
req_proxy = req.headers.get('Ytdl-request-proxy')
|
req_proxy = req.headers.get('Ytdl-request-proxy')
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.07.21'
|
__version__ = '2018.07.29'
|
||||||
|
Loading…
Reference in New Issue
Block a user