mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-11 07:07:46 +08:00
Merge branch 'master' into fix.25.12.2018
# Conflicts: # youtube_dl/version.py
This commit is contained in:
commit
7ef4a71efa
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.01.30.1
|
||||
[debug] youtube-dl version 2019.02.08
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
29
ChangeLog
29
ChangeLog
@ -1,3 +1,32 @@
|
||||
version 2019.02.08
|
||||
|
||||
Core
|
||||
* [utils] Improve JSON-LD regular expression (#18058)
|
||||
* [YoutubeDL] Fallback to ie_key of matching extractor while making
|
||||
download archive id when no explicit ie_key is provided (#19022)
|
||||
|
||||
Extractors
|
||||
+ [malltv] Add support for mall.tv (#18058, #17856)
|
||||
+ [spankbang:playlist] Add support for playlists (#19145)
|
||||
* [spankbang] Extend URL regular expression
|
||||
* [trutv] Fix extraction (#17336)
|
||||
* [toutv] Fix authentication (#16398, #18700)
|
||||
* [pornhub] Fix tags and categories extraction (#13720, #19135)
|
||||
* [pornhd] Fix formats extraction
|
||||
+ [pornhd] Extract like count (#19123, #19125)
|
||||
* [radiocanada] Switch to the new media requests (#19115)
|
||||
+ [teachable] Add support for courses.workitdaily.com (#18871)
|
||||
- [vporn] Remove extractor (#16276)
|
||||
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
|
||||
+ [drtuber] Extract duration (#19078)
|
||||
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
|
||||
* [soundcloud] Update client id
|
||||
* [drtv] Improve preference (#19079)
|
||||
+ [openload] Add support for openload.pw and oload.pw (#18930)
|
||||
+ [openload] Add support for oload.info (#19073)
|
||||
* [crackle] Authorize media detail request (#16931)
|
||||
|
||||
|
||||
version 2019.01.30.1
|
||||
|
||||
Core
|
||||
|
@ -476,6 +476,7 @@
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
@ -827,6 +828,7 @@
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **SpankBang**
|
||||
- **SpankBangPlaylist**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
@ -1057,7 +1059,6 @@
|
||||
- **Voot**
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
|
@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
<meta content='Foo' property=og:foobar>
|
||||
<meta name="og:test1" content='foo > < bar'/>
|
||||
<meta name="og:test2" content="foo >//< bar"/>
|
||||
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
|
||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||
|
@ -82,6 +82,7 @@ from .utils import (
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
@ -2067,9 +2068,12 @@ class YoutubeDL(object):
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
url = str_or_none(info_dict.get('url'))
|
||||
if not url:
|
||||
return
|
||||
# Try to find matching extractor for the URL and take its ie_key
|
||||
for ie in self._ies:
|
||||
if ie.suitable(info_dict['url']):
|
||||
if ie.suitable(url):
|
||||
extractor = ie.ie_key()
|
||||
break
|
||||
else:
|
||||
|
@ -1058,7 +1058,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
|
@ -619,6 +619,7 @@ from .mailru import (
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makertv import MakerTVIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
@ -1058,7 +1059,10 @@ from .southpark import (
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankbang import (
|
||||
SpankBangIE,
|
||||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
@ -1167,6 +1171,7 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
|
@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no title
|
||||
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': self._og_search_title(webpage, default=video_id),
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor):
|
||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||
}, query=query)['elements'][0]
|
||||
|
||||
def _get_video_id(self, urn, course_slug, video_slug):
|
||||
def _get_urn_id(self, video_data):
|
||||
urn = video_data.get('urn')
|
||||
if urn:
|
||||
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
return '%s/%s' % (course_slug, video_slug)
|
||||
|
||||
def _get_video_id(self, video_data, course_slug, video_slug):
|
||||
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||
|
||||
return {
|
||||
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
|
||||
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('defaultThumbnail'),
|
||||
@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||
|
||||
entries = []
|
||||
for chapter in course_data.get('chapters', []):
|
||||
for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
|
||||
chapter_title = chapter.get('title')
|
||||
chapter_id = self._get_urn_id(chapter)
|
||||
for video in chapter.get('videos', []):
|
||||
video_slug = video.get('slug')
|
||||
if not video_slug:
|
||||
continue
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
|
||||
'id': self._get_video_id(video, course_slug, video_slug),
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||
'chapter': chapter_title,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_id': chapter_id,
|
||||
'ie_key': LinkedInLearningIE.ie_key(),
|
||||
})
|
||||
|
||||
|
53
youtube_dl/extractor/malltv.py
Normal file
53
youtube_dl/extractor/malltv.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts
|
||||
|
||||
|
||||
class MallTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||
'info_dict': {
|
||||
'id': 't0zzt0',
|
||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'ext': 'mp4',
|
||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||
'duration': 216,
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
|
||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||
video_id = self._search_regex(
|
||||
SOURCE_RE, webpage, 'video id', group='id')
|
||||
|
||||
media = self._parse_html5_media_entries(
|
||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(media, info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
})
|
@ -4,9 +4,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||
'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
|
||||
'md5': '87f1540746c1d32ec7a2305c12b96b25',
|
||||
'info_dict': {
|
||||
'id': '9864',
|
||||
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||
@ -23,6 +25,7 @@ class PornHdIE(InfoExtractor):
|
||||
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@ -37,6 +40,7 @@ class PornHdIE(InfoExtractor):
|
||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Not available anymore',
|
||||
@ -65,12 +69,14 @@ class PornHdIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in sources.items():
|
||||
video_url = urljoin(url, video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url, 'mp4'),
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
@ -85,6 +91,11 @@ class PornHdIE(InfoExtractor):
|
||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
(r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
|
||||
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@ -92,6 +103,7 @@ class PornHdIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -16,7 +16,6 @@ from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
@ -303,14 +302,12 @@ class PornHubIE(PornHubBaseIE):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
page_params = self._parse_json(self._search_regex(
|
||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||
webpage, 'page parameters', group='data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
tags = categories = None
|
||||
if page_params:
|
||||
tags = page_params.get('tags', '').split(',')
|
||||
categories = page_params.get('categories', '').split(',')
|
||||
def extract_list(meta_key):
|
||||
div = self._search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||
% meta_key, webpage, meta_key, default=None)
|
||||
if div:
|
||||
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -325,8 +322,8 @@ class PornHubIE(PornHubBaseIE):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'tags': extract_list('tags'),
|
||||
'categories': extract_list('categories'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
@ -4,16 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
determine_protocol,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@ -61,107 +57,67 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_access_token = None
|
||||
_claims = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
||||
video_id, note='Downloading metadata XML', query={
|
||||
def _call_api(self, path, video_id=None, app_code=None, query=None):
|
||||
if not query:
|
||||
query = {}
|
||||
query.update({
|
||||
'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
|
||||
'output': 'json',
|
||||
})
|
||||
if video_id:
|
||||
query.update({
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
})
|
||||
if self._access_token:
|
||||
query['access_token'] = self._access_token
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://services.radio-canada.ca/media/' + path, video_id, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
|
||||
data = self._parse_json(e.cause.read().decode(), None)
|
||||
error = data.get('error_description') or data['errorMessage']['text']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _extract_info(self, app_code, video_id):
|
||||
metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
|
||||
|
||||
def get_meta(name):
|
||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
||||
return el.text if el is not None else None
|
||||
for meta in metas:
|
||||
if meta.get('name') == name:
|
||||
text = meta.get('text')
|
||||
if text:
|
||||
return text
|
||||
|
||||
# protectionType does not necessarily mean the video is DRM protected (see
|
||||
# https://github.com/rg3/youtube-dl/pull/18609).
|
||||
if get_meta('protectionType'):
|
||||
self.report_warning('This video is probably DRM protected.')
|
||||
|
||||
device_types = ['ipad']
|
||||
if not smuggled_data:
|
||||
device_types.append('flash')
|
||||
device_types.append('android')
|
||||
|
||||
formats = []
|
||||
error = None
|
||||
# TODO: extract f4m formats
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in device_types:
|
||||
validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
|
||||
query = {
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
'connectionType': 'broadband',
|
||||
'multibitrate': 'true',
|
||||
'deviceType': device_type,
|
||||
}
|
||||
if smuggled_data:
|
||||
validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
|
||||
query.update(smuggled_data)
|
||||
else:
|
||||
query.update({
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
error = xpath_text(v_data, 'message')
|
||||
continue
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
for url_e in bitrates.findall('url'):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
||||
protocol = determine_protocol({'url': f_url})
|
||||
f = {
|
||||
'format_id': '%s-%d' % (protocol, tbr),
|
||||
'url': f_url,
|
||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
}
|
||||
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'url': mobj.group('url') + mobj.group('auth'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
})
|
||||
formats.append(f)
|
||||
if protocol == 'rtsp':
|
||||
base_url = self._search_regex(
|
||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
||||
if base_url:
|
||||
base_url = 'http://' + base_url
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
base_url + '/manifest.f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
if not formats and error:
|
||||
query = {
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'multibitrate': 'true',
|
||||
}
|
||||
if self._claims:
|
||||
query['claims'] = self._claims
|
||||
v_data = self._call_api('validation/v2/', video_id, app_code, query)
|
||||
v_url = v_data.get('url')
|
||||
if not v_url:
|
||||
error = v_data['message']
|
||||
if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
|
||||
raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
|
||||
if error == 'Le contenu sélectionné est disponible seulement en premium':
|
||||
self.raise_login_required(error)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
@ -186,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_info(*re.match(self._VALID_URL, url).groups())
|
||||
|
||||
|
||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'radiocanada:audiovideo'
|
||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
@ -203,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
||||
|
@ -21,7 +21,17 @@ from ..utils import (
|
||||
|
||||
|
||||
class RutubeBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video, video_id=None, require_title=True):
|
||||
def _download_api_info(self, video_id, query=None):
|
||||
if not query:
|
||||
query = {}
|
||||
query['format'] = 'json'
|
||||
return self._download_json(
|
||||
'http://rutube.ru/api/video/%s/' % video_id,
|
||||
video_id, 'Downloading video JSON',
|
||||
'Unable to download video JSON', query=query)
|
||||
|
||||
@staticmethod
|
||||
def _extract_info(video, video_id=None, require_title=True):
|
||||
title = video['title'] if require_title else video.get('title')
|
||||
|
||||
age_limit = video.get('is_adult')
|
||||
@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
|
||||
category = try_get(video, lambda x: x['category']['name'])
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'id': video.get('id') or video_id if video_id else video['id'],
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('thumbnail_url'),
|
||||
@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
|
||||
'is_live': bool_or_none(video.get('is_livestream')),
|
||||
}
|
||||
|
||||
def _download_and_extract_info(self, video_id, query=None):
|
||||
return self._extract_info(
|
||||
self._download_api_info(video_id, query=query), video_id)
|
||||
|
||||
def _download_api_options(self, video_id, query=None):
|
||||
if not query:
|
||||
query = {}
|
||||
query['format'] = 'json'
|
||||
return self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/' % video_id,
|
||||
video_id, 'Downloading options JSON',
|
||||
'Unable to download options JSON',
|
||||
headers=self.geo_verification_headers(), query=query)
|
||||
|
||||
def _extract_formats(self, options, video_id):
|
||||
formats = []
|
||||
for format_id, format_url in options['video_balancer'].items():
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _download_and_extract_formats(self, video_id, query=None):
|
||||
return self._extract_formats(
|
||||
self._download_api_options(video_id, query=query), video_id)
|
||||
|
||||
|
||||
class RutubeIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube'
|
||||
@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
'md5': '79938ade01294ef7e27574890d0d3769',
|
||||
'md5': '1d24f180fac7a02f3900712e5a5764d6',
|
||||
'info_dict': {
|
||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Раненный кенгуру забежал в аптеку',
|
||||
'description': 'http://www.ntdtv.ru ',
|
||||
'duration': 80,
|
||||
'duration': 81,
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'timestamp': 1381943602,
|
||||
@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
info = self._extract_video(video, video_id)
|
||||
|
||||
options = self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading options JSON',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in options['video_balancer'].items():
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
info = self._download_and_extract_info(video_id)
|
||||
info['formats'] = self._download_and_extract_formats(video_id)
|
||||
return info
|
||||
|
||||
|
||||
class RutubeEmbedIE(InfoExtractor):
|
||||
class RutubeEmbedIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube:embed'
|
||||
IE_DESC = 'Rutube embedded videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||
@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'info_dict': {
|
||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1387830582,
|
||||
'upload_date': '20131223',
|
||||
'uploader_id': '297833',
|
||||
@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/8083783',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private video
|
||||
'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
embed_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, embed_id)
|
||||
|
||||
canonical_url = self._html_search_regex(
|
||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||
'Canonical URL')
|
||||
return self.url_result(canonical_url, RutubeIE.ie_key())
|
||||
# Query may contain private videos token and should be passed to API
|
||||
# requests (see #19163)
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
options = self._download_api_options(embed_id, query)
|
||||
video_id = options['effective_video']
|
||||
formats = self._extract_formats(options, video_id)
|
||||
info = self._download_and_extract_info(video_id, query)
|
||||
info.update({
|
||||
'extractor_key': 'Rutube',
|
||||
'formats': formats,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
video_url = url_or_none(result.get('video_url'))
|
||||
if not video_url:
|
||||
continue
|
||||
entry = self._extract_video(result, require_title=False)
|
||||
entry = self._extract_info(result, require_title=False)
|
||||
entry.update({
|
||||
'_type': 'url',
|
||||
'url': video_url,
|
||||
|
@ -16,7 +16,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
@ -51,12 +52,17 @@ class SoundcloudIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '62986583',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20121011',
|
||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||
'uploader': 'E.T. ExTerrestrial Music',
|
||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'timestamp': 1349920598,
|
||||
'upload_date': '20121011',
|
||||
'duration': 143,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
# not streamable song
|
||||
@ -68,9 +74,14 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'timestamp': 1337635207,
|
||||
'upload_date': '20120521',
|
||||
'duration': 227,
|
||||
'duration': 30,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
@ -85,11 +96,16 @@ class SoundcloudIE(InfoExtractor):
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'timestamp': 1386604920,
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
@ -100,11 +116,16 @@ class SoundcloudIE(InfoExtractor):
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'timestamp': 1386604920,
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
@ -117,9 +138,14 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': 'Bus Brakes',
|
||||
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
||||
'uploader': 'oddsamples',
|
||||
'timestamp': 1389232924,
|
||||
'upload_date': '20140109',
|
||||
'duration': 17,
|
||||
'license': 'cc-by-sa',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
@ -132,9 +158,14 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||
'uploader': 'Ori Uplift Music',
|
||||
'timestamp': 1504206263,
|
||||
'upload_date': '20170831',
|
||||
'duration': 7449,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
@ -147,10 +178,15 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': 'Sideways (Prod. Mad Real)',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'uploader': 'garyvee',
|
||||
'timestamp': 1488152409,
|
||||
'upload_date': '20170226',
|
||||
'duration': 207,
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -176,22 +212,33 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
||||
track_id = compat_str(info['id'])
|
||||
title = info['title']
|
||||
name = full_title or track_id
|
||||
if quiet:
|
||||
self.report_extraction(name)
|
||||
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
||||
if isinstance(thumbnail, compat_str):
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
username = try_get(info, lambda x: x['user']['username'], compat_str)
|
||||
|
||||
def extract_count(key):
|
||||
return int_or_none(info.get('%s_count' % key))
|
||||
|
||||
result = {
|
||||
'id': track_id,
|
||||
'uploader': info.get('user', {}).get('username'),
|
||||
'upload_date': unified_strdate(info.get('created_at')),
|
||||
'title': info['title'],
|
||||
'uploader': username,
|
||||
'timestamp': unified_timestamp(info.get('created_at')),
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
'webpage_url': info.get('permalink_url'),
|
||||
'license': info.get('license'),
|
||||
'view_count': extract_count('playback'),
|
||||
'like_count': extract_count('favoritings'),
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genre': info.get('genre'),
|
||||
}
|
||||
formats = []
|
||||
query = {'client_id': self._CLIENT_ID}
|
||||
|
@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
@ -12,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
@ -41,13 +42,22 @@ class SpankBangIE(InfoExtractor):
|
||||
# 4k
|
||||
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.spankbang.com/3vvn/play',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2y3td/embed/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, headers={
|
||||
'Cookie': 'country=US'
|
||||
})
|
||||
webpage = self._download_webpage(
|
||||
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
|
||||
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
||||
raise ExtractorError(
|
||||
@ -94,3 +104,33 @@ class SpankBangIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||
_TEST = {
|
||||
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||
'info_dict': {
|
||||
'id': 'ug0k',
|
||||
'title': 'Big Ass Titties',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://spankbang.com/%s/video' % video_id,
|
||||
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@ -3,22 +3,19 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .radiocanada import RadioCanadaIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
urlencode_postdata,
|
||||
extract_attributes,
|
||||
smuggle_url,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
class TouTvIE(RadioCanadaIE):
|
||||
_NETRC_MACHINE = 'toutv'
|
||||
IE_NAME = 'tou.tv'
|
||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
|
||||
_access_token = None
|
||||
_claims = None
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||
@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
state = 'http://ici.tou.tv/'
|
||||
webpage = self._download_webpage(state, None, 'Downloading homepage')
|
||||
toutvlogin = self._parse_json(self._search_regex(
|
||||
r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
|
||||
authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
|
||||
login_webpage = self._download_webpage(
|
||||
authorize_url, None, 'Downloading login page', query={
|
||||
'client_id': toutvlogin['clientId'],
|
||||
'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
|
||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize',
|
||||
None, 'Downloading login page', query={
|
||||
'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36',
|
||||
'redirect_uri': 'https://ici.tou.tv/logincallback',
|
||||
'response_type': 'token',
|
||||
'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
|
||||
'state': state,
|
||||
'scope': 'id.write media-validation.read',
|
||||
'state': '/',
|
||||
})
|
||||
|
||||
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
|
||||
@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor):
|
||||
self._access_token = self._search_regex(
|
||||
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
urlh.geturl(), 'access token')
|
||||
self._claims = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/getClaims',
|
||||
None, 'Extracting Claims', query={
|
||||
'token': self._access_token,
|
||||
'access_token': self._access_token,
|
||||
})['claims']
|
||||
self._claims = self._call_api('validation/v2/getClaims')['claims']
|
||||
|
||||
def _real_extract(self, url):
|
||||
path = self._match_id(url)
|
||||
@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor):
|
||||
self.report_warning('This video is probably DRM protected.', path)
|
||||
video_id = metadata['IdMedia']
|
||||
details = metadata['Details']
|
||||
title = details['OriginalTitle']
|
||||
video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
|
||||
if self._access_token and self._claims:
|
||||
video_url = smuggle_url(video_url, {
|
||||
'access_token': self._access_token,
|
||||
'claims': self._claims,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': details.get('OriginalTitle'),
|
||||
'thumbnail': details.get('ImageUrl'),
|
||||
'duration': int_or_none(details.get('LengthInSeconds')),
|
||||
}
|
||||
}, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
|
||||
|
75
youtube_dl/extractor/trunews.py
Normal file
75
youtube_dl/extractor/trunews.py
Normal file
@ -0,0 +1,75 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TruNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||
'md5': 'a19c024c3906ff954fac9b96ce66bb08',
|
||||
'info_dict': {
|
||||
'id': '5c5a21e65d3c196e1c0020cc',
|
||||
'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||
'ext': 'mp4',
|
||||
'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
|
||||
'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
|
||||
'duration': 3685,
|
||||
'timestamp': 1549411440,
|
||||
'upload_date': '20190206',
|
||||
},
|
||||
'add_ie': ['Zype'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.zype.com/videos', display_id, query={
|
||||
'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
|
||||
'per_page': 1,
|
||||
'active': 'true',
|
||||
'friendly_title': display_id,
|
||||
})['response'][0]
|
||||
|
||||
zype_id = video['_id']
|
||||
|
||||
thumbnails = []
|
||||
thumbnails_list = video.get('thumbnails')
|
||||
if isinstance(thumbnails_list, list):
|
||||
for thumbnail in thumbnails_list:
|
||||
if not isinstance(thumbnail, dict):
|
||||
continue
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': update_url_query(
|
||||
'https://player.zype.com/embed/%s.js' % zype_id,
|
||||
{'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
|
||||
'ie_key': 'Zype',
|
||||
'id': zype_id,
|
||||
'display_id': display_id,
|
||||
'title': video.get('title'),
|
||||
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('published_at')),
|
||||
'average_rating': float_or_none(video.get('rating')),
|
||||
'view_count': int_or_none(video.get('request_count')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@ -4,44 +4,72 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TruTVIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
|
||||
'md5': '2cdc844f317579fed1a7251b087ff417',
|
||||
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
|
||||
'info_dict': {
|
||||
'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
|
||||
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
|
||||
'ext': 'mp4',
|
||||
'title': 'You Won\'t Believe These Sports Bets',
|
||||
'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
|
||||
'upload_date': '20130305',
|
||||
}
|
||||
'title': 'Sunlight-Activated Flower',
|
||||
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
auth_required = False
|
||||
if path:
|
||||
data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
|
||||
series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if video_id:
|
||||
path = 'episode'
|
||||
display_id = video_id
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._search_regex(
|
||||
r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
|
||||
webpage, 'video id', default=video_id)
|
||||
auth_required = self._search_regex(
|
||||
r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true'
|
||||
data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
data_src, path, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
|
||||
'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
}, {
|
||||
path = 'series/clip'
|
||||
display_id = clip_slug
|
||||
|
||||
data = self._download_json(
|
||||
'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
|
||||
display_id)
|
||||
video_data = data['episode'] if video_id else data['info']
|
||||
media_id = video_data['mediaId']
|
||||
title = video_data['title'].strip()
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {}, {
|
||||
'url': url,
|
||||
'site_name': 'truTV',
|
||||
'auth_required': auth_required,
|
||||
'auth_required': video_data.get('isAuthRequired'),
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('srcUrl')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(video_data.get('publicationDate')),
|
||||
'series': video_data.get('showTitle'),
|
||||
'season_number': int_or_none(video_data.get('seasonNum')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNum')),
|
||||
})
|
||||
return info
|
||||
|
@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
|
||||
default=None)
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
|
||||
|
||||
if video_id:
|
||||
if len(video_id) < 8:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
@ -537,8 +536,9 @@ class TVPlayHomeIE(InfoExtractor):
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
|
||||
default=None, group='value')
|
||||
(r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'episode', default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
|
@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor):
|
||||
source = next(f for f in formats if f['format_id'] == 'Source')
|
||||
source['preference'] = 10
|
||||
except StopIteration:
|
||||
pass # No Source stream present
|
||||
for f in formats:
|
||||
if '/chunked/' in f['url']:
|
||||
f.update({
|
||||
'source_preference': 10,
|
||||
'format_note': 'Source',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
|
@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
|
||||
])
|
||||
|
||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||
JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
||||
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = 'vc.2019.02.03.1'
|
||||
__version__ = 'vc.2019.02.14'
|
||||
|
Loading…
x
Reference in New Issue
Block a user