mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-09 11:42:54 +08:00
Merge branch 'master' into Vimeo-issue-16717
This commit is contained in:
commit
899387c453
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.29**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.07.21
|
||||
[debug] youtube-dl version 2018.07.29
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
7
AUTHORS
7
AUTHORS
@ -239,3 +239,10 @@ Martin Weinelt
|
||||
Surya Oktafendri
|
||||
TingPing
|
||||
Alexandre Macabies
|
||||
Bastian de Groot
|
||||
Niklas Haas
|
||||
András Veres-Szentkirályi
|
||||
Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||
* [dailymotion:playlist] Fix extraction (#16894)
|
||||
* [ted] Improve extraction
|
||||
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||
* [telecinco] Fix extraction (#17080)
|
||||
* [mitele] Reduce number of requests
|
||||
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||
* [vk] Fix extraction for inline only videos (#16923)
|
||||
* [streamcloud] Fix extraction (#17054)
|
||||
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||
|
||||
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
|
@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
@ -672,6 +672,8 @@
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**: QQ音乐
|
||||
|
@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
@ -262,6 +262,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@ -580,7 +583,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@ -16,11 +17,13 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -343,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
'id': 'xv4bw',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://graphql.api.dailymotion.com',
|
||||
playlist_id, 'Downloading page %d' % page,
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
collection(xid: "%s") {
|
||||
videos(first: %d, page: %d) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
xid
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||
}).encode(), headers={
|
||||
'Authorization': authorizaion,
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
})['data']['collection']['videos']
|
||||
for edge in videos['edges']:
|
||||
node = edge['node']
|
||||
yield self.url_result(
|
||||
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
api = self._parse_json(self._search_regex(
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||
webpage, 'player config'), playlist_id)['context']['api']
|
||||
auth = self._download_json(
|
||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||
playlist_id, data=urlencode_postdata({
|
||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||
'grant_type': 'client_credentials',
|
||||
}))
|
||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
self._og_search_title(webpage))
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = set()
|
||||
@ -379,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist_id),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
@ -860,6 +860,10 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
|
@ -355,7 +355,6 @@ class FacebookIE(InfoExtractor):
|
||||
tahoe_data = self._download_webpage(
|
||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'__user': 0,
|
||||
'__a': 1,
|
||||
'__pc': self._search_regex(
|
||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
@ -363,6 +362,9 @@ class FacebookIE(InfoExtractor):
|
||||
'__rev': self._search_regex(
|
||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||
'client revision', default='3944515'),
|
||||
'fb_dtsg': self._search_regex(
|
||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||
webpage, 'dtsg token', default=''),
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
|
@ -1,84 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
smuggle_url,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleBaseIE(InfoExtractor):
|
||||
def _get_player_info(self, url, webpage):
|
||||
player_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
if player_data.get('data-cms-id') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
mmc_url = config['services']['mmc']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
||||
mmc = self._download_json(
|
||||
m_url, video_id, 'Downloading mmc JSON')
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
'id': '57b0dfb9c715da65618b4afa',
|
||||
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
'info_dict': {
|
||||
'id': '57b0de3dc915da14058b4876',
|
||||
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
||||
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
||||
@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
gigya_url = self._search_regex(
|
||||
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
|
||||
webpage, 'gigya', default=None)
|
||||
gigya_sc = self._download_webpage(
|
||||
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
|
||||
video_id, 'Downloading gigya script')
|
||||
|
||||
# Get a appKey/uuid for getting the session key
|
||||
appKey = self._search_regex(
|
||||
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||
gigya_sc, 'appKey')
|
||||
|
||||
session_json = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/session',
|
||||
video_id, 'Downloading session keys', query={
|
||||
'appKey': appKey,
|
||||
'uuid': compat_str(uuid.uuid4()),
|
||||
})
|
||||
|
||||
paths = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
|
||||
video_id, 'Downloading paths JSON',
|
||||
query={'sessionKey': compat_str(session_json['sessionKey'])})
|
||||
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
|
||||
video_id, 'Downloading paths JSON')
|
||||
|
||||
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
||||
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
|
||||
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
|
||||
source = self._download_json(
|
||||
'http://%s%s%s/docs/%s' % (
|
||||
ooyala_s['base_url'], ooyala_s['full_path'],
|
||||
ooyala_s['provider_id'], video_id),
|
||||
'%s://%s%s%s/docs/%s' % (
|
||||
ooyala_s.get('protocol', 'https'), base_url, full_path,
|
||||
ooyala_s.get('provider_id', '104951'), video_id),
|
||||
video_id, 'Downloading data JSON', query={
|
||||
'include_titles': 'Series,Season',
|
||||
'product_name': 'test',
|
||||
'product_name': ooyala_s.get('product_name', 'test'),
|
||||
'format': 'full',
|
||||
})['hits']['hits'][0]['_source']
|
||||
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -68,6 +69,31 @@ class PornHubIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
'info_dict': {
|
||||
'id': 'ph5af5fef7c2aa7',
|
||||
'ext': 'mp4',
|
||||
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
||||
'uploader': 'BFFs',
|
||||
'duration': 622,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
"ext": 'srt'
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
@ -139,12 +165,19 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_urls = []
|
||||
video_urls_set = set()
|
||||
subtitles = {}
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
@ -256,6 +289,7 @@ class PornHubIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
247
youtube_dl/extractor/puhutv.py
Normal file
247
youtube_dl/extractor/puhutv.py
Normal file
@ -0,0 +1,247 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PuhuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
|
||||
IE_NAME = 'puhutv'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://puhutv.com/sut-kardesler-izle',
|
||||
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
|
||||
'info_dict': {
|
||||
'id': '5085',
|
||||
'display_id': 'sut-kardesler',
|
||||
'ext': 'mp4',
|
||||
'title': 'Süt Kardeşler',
|
||||
'description': 'md5:405fd024df916ca16731114eb18e511a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4832.44,
|
||||
'creator': 'Arzu Film',
|
||||
'timestamp': 1469778212,
|
||||
'upload_date': '20160729',
|
||||
'release_year': 1976,
|
||||
'view_count': int,
|
||||
'tags': ['Aile', 'Komedi', 'Klasikler'],
|
||||
},
|
||||
}, {
|
||||
# episode, geo restricted, bypassable with --geo-verification-proxy
|
||||
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 4k, with subtitles
|
||||
'url': 'https://puhutv.com/dip-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUBTITLE_LANGS = {
|
||||
'English': 'en',
|
||||
'Deutsch': 'de',
|
||||
'عربى': 'ar'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-izle' % display_id),
|
||||
display_id)['data']
|
||||
|
||||
video_id = compat_str(info['id'])
|
||||
title = info.get('name') or info['title']['name']
|
||||
if info.get('display_name'):
|
||||
title = '%s %s' % (title, info.get('display_name'))
|
||||
|
||||
try:
|
||||
videos = self._download_json(
|
||||
'https://puhutv.com/api/assets/%s/videos' % video_id,
|
||||
display_id, 'Downloading video JSON',
|
||||
headers=self.geo_verification_headers())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for video in videos['data']['videos']:
|
||||
media_url = url_or_none(video.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
playlist = video.get('is_playlist')
|
||||
if video.get('stream_type') == 'hls' and playlist is True:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
quality = int_or_none(video.get('quality'))
|
||||
f = {
|
||||
'url': media_url,
|
||||
'ext': 'mp4',
|
||||
'height': quality
|
||||
}
|
||||
video_format = video.get('video_format')
|
||||
if video_format == 'hls' and playlist is False:
|
||||
format_id = 'hls'
|
||||
f['protocol'] = 'm3u8_native'
|
||||
elif video_format == 'mp4':
|
||||
format_id = 'http'
|
||||
|
||||
else:
|
||||
continue
|
||||
if quality:
|
||||
format_id += '-%sp' % quality
|
||||
f['format_id'] = format_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = try_get(
|
||||
info, lambda x: x['title']['description'],
|
||||
compat_str) or info.get('description')
|
||||
timestamp = unified_timestamp(info.get('created_at'))
|
||||
creator = try_get(
|
||||
info, lambda x: x['title']['producer']['name'], compat_str)
|
||||
|
||||
duration = float_or_none(
|
||||
try_get(info, lambda x: x['content']['duration_in_ms'], int),
|
||||
scale=1000)
|
||||
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
|
||||
|
||||
images = try_get(
|
||||
info, lambda x: x['content']['images']['wide'], dict) or {}
|
||||
thumbnails = []
|
||||
for image_id, image_url in images.items():
|
||||
if not isinstance(image_url, compat_str):
|
||||
continue
|
||||
if not image_url.startswith(('http', '//')):
|
||||
image_url = 'https://%s' % image_url
|
||||
t = parse_resolution(image_id)
|
||||
t.update({
|
||||
'id': image_id,
|
||||
'url': image_url
|
||||
})
|
||||
thumbnails.append(t)
|
||||
|
||||
release_year = try_get(info, lambda x: x['title']['released_at'], int)
|
||||
|
||||
season_number = int_or_none(info.get('season_number'))
|
||||
season_id = str_or_none(info.get('season_id'))
|
||||
episode_number = int_or_none(info.get('episode_number'))
|
||||
|
||||
tags = []
|
||||
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
|
||||
if not isinstance(genre, dict):
|
||||
continue
|
||||
genre_name = genre.get('name')
|
||||
if genre_name and isinstance(genre_name, compat_str):
|
||||
tags.append(genre_name)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in try_get(
|
||||
info, lambda x: x['content']['subtitles'], list) or []:
|
||||
if not isinstance(subtitle, dict):
|
||||
continue
|
||||
lang = subtitle.get('language')
|
||||
sub_url = url_or_none(subtitle.get('url'))
|
||||
if not lang or not isinstance(lang, compat_str) or not sub_url:
|
||||
continue
|
||||
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||
'url': sub_url
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'timestamp': timestamp,
|
||||
'creator': creator,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class PuhuTVSerieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
|
||||
IE_NAME = 'puhutv:serie'
|
||||
_TESTS = [{
|
||||
'url': 'https://puhutv.com/deniz-yildizi-detay',
|
||||
'info_dict': {
|
||||
'title': 'Deniz Yıldızı',
|
||||
'id': 'deniz-yildizi',
|
||||
},
|
||||
'playlist_mincount': 205,
|
||||
}, {
|
||||
# a film detail page which is using same url with serie page
|
||||
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_entries(self, seasons):
|
||||
for season in seasons:
|
||||
season_id = season.get('id')
|
||||
if not season_id:
|
||||
continue
|
||||
page = 1
|
||||
has_more = True
|
||||
while has_more is True:
|
||||
season = self._download_json(
|
||||
'https://galadriel.puhutv.com/seasons/%s' % season_id,
|
||||
season_id, 'Downloading page %s' % page, query={
|
||||
'page': page,
|
||||
'per': 40,
|
||||
})
|
||||
episodes = season.get('episodes')
|
||||
if isinstance(episodes, list):
|
||||
for ep in episodes:
|
||||
slug_path = str_or_none(ep.get('slugPath'))
|
||||
if not slug_path:
|
||||
continue
|
||||
video_id = str_or_none(int_or_none(ep.get('id')))
|
||||
yield self.url_result(
|
||||
'https://puhutv.com/%s' % slug_path,
|
||||
ie=PuhuTVIE.ie_key(), video_id=video_id,
|
||||
video_title=ep.get('name') or ep.get('eventLabel'))
|
||||
page += 1
|
||||
has_more = season.get('hasMore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-detay' % playlist_id),
|
||||
playlist_id)['data']
|
||||
|
||||
seasons = info.get('seasons')
|
||||
if seasons:
|
||||
return self.playlist_result(
|
||||
self._extract_entries(seasons), playlist_id, info.get('name'))
|
||||
|
||||
# For films, these are using same url with series
|
||||
video_id = info.get('slug') or info['assets'][0]['slug']
|
||||
return self.url_result(
|
||||
'https://puhutv.com/%s-izle' % video_id,
|
||||
PuhuTVIE.ie_key(), video_id)
|
@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id):
|
||||
if not re.match(r'https?://', relinker_url):
|
||||
return {'formats': [{'url': relinker_url}]}
|
||||
|
||||
formats = []
|
||||
geoprotection = None
|
||||
is_live = None
|
||||
@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_content_id(self, content_id, url):
|
||||
|
@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}
|
||||
|
@ -7,8 +7,10 @@ from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
||||
'md5': 'b0ce2b05ca215042124fbc9e3886493a',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 853,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
||||
'info_dict': {
|
||||
'id': 'tSVI8ta_P4w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
||||
'upload_date': '20140122',
|
||||
'uploader_id': 'TEDInstitute',
|
||||
'uploader': 'TED Institute',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# missing HTTP bitrates
|
||||
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
|
||||
'info_dict': {
|
||||
'id': '6069',
|
||||
'ext': 'mp4',
|
||||
'title': 'The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
|
||||
'uploader': 'Vishal Sikka',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
||||
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'mp4',
|
||||
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
'duration': 1128,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||
'info_dict': {
|
||||
@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube video
|
||||
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
||||
'add_ie': ['Youtube'],
|
||||
# no nativeDownloads
|
||||
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
|
||||
'info_dict': {
|
||||
'id': 'aFBIPO-P7LM',
|
||||
'id': '1792',
|
||||
'ext': 'mp4',
|
||||
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
||||
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
||||
'uploader': 'TEDx Talks',
|
||||
'uploader_id': 'TEDxTalks',
|
||||
'upload_date': '20111216',
|
||||
'title': 'The orchestra in my mouth',
|
||||
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
||||
'uploader': 'Tom Thum',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
info = self._extract_info(webpage)
|
||||
|
||||
talk_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
||||
dict) or info['talks'][0]
|
||||
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
|
||||
talk_info = data['talks'][0]
|
||||
|
||||
title = talk_info['title'].strip()
|
||||
|
||||
external = talk_info.get('external')
|
||||
if external:
|
||||
service = external['service']
|
||||
self.to_screen('Found video from %s' % service)
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
native_downloads = try_get(
|
||||
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
||||
dict) or talk_info['nativeDownloads']
|
||||
talk_info,
|
||||
(lambda x: x['downloads']['nativeDownloads'],
|
||||
lambda x: x['nativeDownloads']),
|
||||
dict) or {}
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
player_talk = talk_info['player_talks'][0]
|
||||
|
||||
external = player_talk.get('external')
|
||||
if isinstance(external, dict):
|
||||
service = external.get('service')
|
||||
if isinstance(service, compat_str):
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in resources_.items():
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
stream_url = url_or_none(resources.get('stream'))
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
stream_url, video_name, 'mp4', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
if not bitrate:
|
||||
continue
|
||||
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
|
||||
if not self._is_valid_url(
|
||||
bitrate_url, video_name, '%s bitrate' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
||||
'url': bitrate_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
'duration': float_or_none(talk_info.get('duration')),
|
||||
'view_count': int_or_none(data.get('viewed_count')),
|
||||
'comment_count': int_or_none(
|
||||
try_get(data, lambda x: x['comments']['count'])),
|
||||
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, talk_info):
|
||||
|
@ -1,26 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mitele import MiTeleBaseIE
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class TelecincoIE(MiTeleBaseIE):
|
||||
class TelecincoIE(InfoExtractor):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'id': '1876350223',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'duration': 662,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
|
||||
'duration': 662,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title')
|
||||
info = self._get_player_info(url, webpage)
|
||||
article = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
||||
webpage, 'article'), display_id)['article']
|
||||
title = article.get('title')
|
||||
description = clean_html(article.get('leadParagraph'))
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
for p in article.get('body', []):
|
||||
content = p.get('content')
|
||||
if p.get('type') != 'video' or not content:
|
||||
continue
|
||||
entries.append(self._parse_content(content, url))
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')), title, description)
|
||||
content = article['opening']['content']
|
||||
info = self._parse_content(content, url)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['og:description', 'twitter:description'],
|
||||
webpage, 'title', fatal=False),
|
||||
'description': description,
|
||||
})
|
||||
return info
|
||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@ -106,10 +107,10 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'ProtivoGunz - Хуёвая песня',
|
||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||
'uploader_id': '-77521',
|
||||
'duration': 195,
|
||||
'timestamp': 1329060660,
|
||||
'timestamp': 1329049880,
|
||||
'upload_date': '20120212',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -118,12 +119,12 @@ class VKIE(VKBaseIE):
|
||||
'info_dict': {
|
||||
'id': '165548505',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Tom Cruise',
|
||||
'title': 'No name',
|
||||
'uploader': 'Tom Cruise',
|
||||
'uploader_id': '205387401',
|
||||
'duration': 9,
|
||||
'timestamp': 1374374880,
|
||||
'upload_date': '20130721',
|
||||
'view_count': int,
|
||||
'timestamp': 1374364108,
|
||||
'upload_date': '20130720',
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -207,10 +208,10 @@ class VKIE(VKBaseIE):
|
||||
'id': 'V3K4mi0SYkc',
|
||||
'ext': 'webm',
|
||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||
'description': 'md5:d9903938abdc74c738af77f527ca0596',
|
||||
'duration': 178,
|
||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||
'duration': 179,
|
||||
'upload_date': '20130116',
|
||||
'uploader': "Children's Joy Foundation",
|
||||
'uploader': "Children's Joy Foundation Inc.",
|
||||
'uploader_id': 'thecjf',
|
||||
'view_count': int,
|
||||
},
|
||||
@ -222,6 +223,7 @@ class VKIE(VKBaseIE):
|
||||
'id': 'k3lz2cmXyRuJQSjGHUv',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
|
||||
# TODO: fix test by fixing dailymotion description extraction
|
||||
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
|
||||
'uploader': 'AniLibria.Tv',
|
||||
'upload_date': '20160914',
|
||||
@ -241,9 +243,12 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'S-Dance, репетиции к The way show',
|
||||
'uploader': 'THE WAY SHOW | 17 апреля',
|
||||
'timestamp': 1454870100,
|
||||
'uploader_id': '-110305615',
|
||||
'timestamp': 1454859345,
|
||||
'upload_date': '20160207',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -296,7 +301,7 @@ class VKIE(VKBaseIE):
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
list_id = mobj.group('list_id')
|
||||
if list_id:
|
||||
@ -346,6 +351,9 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>This video is no longer available, because its author has been blocked.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
|
||||
r'<!>This video is no longer available, because it has been deleted.':
|
||||
'Video %s is no longer available, because it has been deleted.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
@ -394,7 +402,8 @@ class VKIE(VKBaseIE):
|
||||
if not data:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
|
||||
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
|
||||
info_page, 'json', default='{}'),
|
||||
video_id)
|
||||
if data:
|
||||
data = data['player']['params'][0]
|
||||
@ -416,7 +425,7 @@ class VKIE(VKBaseIE):
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
|
||||
'upload date', fatal=False))
|
||||
'upload date', default=None)) or int_or_none(data.get('date'))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
|
||||
@ -454,9 +463,12 @@ class VKIE(VKBaseIE):
|
||||
'title': title,
|
||||
'thumbnail': data.get('jpg'),
|
||||
'uploader': data.get('md_author'),
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'duration': data.get('duration'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': int_or_none(data.get('liked')),
|
||||
'dislike_count': int_or_none(data.get('nolikes')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
@ -3569,7 +3569,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
setattr(self, '%s_open' % type,
|
||||
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
||||
meth(r, proxy, type))
|
||||
return compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||
compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||
|
||||
def proxy_open(self, req, proxy, type):
|
||||
req_proxy = req.headers.get('Ytdl-request-proxy')
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.07.21'
|
||||
__version__ = '2018.07.29'
|
||||
|
Loading…
Reference in New Issue
Block a user