1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-11 07:27:14 +08:00

Merge branch 'master' into fix.25.12.2018

# Conflicts:
#	youtube_dl/version.py
This commit is contained in:
Avi Peretz 2019-02-14 14:18:53 +02:00
commit 7ef4a71efa
23 changed files with 527 additions and 249 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.01.30.1 [debug] youtube-dl version 2019.02.08
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,32 @@
version 2019.02.08
Core
* [utils] Improve JSON-LD regular expression (#18058)
* [YoutubeDL] Fallback to ie_key of matching extractor while making
download archive id when no explicit ie_key is provided (#19022)
Extractors
+ [malltv] Add support for mall.tv (#18058, #17856)
+ [spankbang:playlist] Add support for playlists (#19145)
* [spankbang] Extend URL regular expression
* [trutv] Fix extraction (#17336)
* [toutv] Fix authentication (#16398, #18700)
* [pornhub] Fix tags and categories extraction (#13720, #19135)
* [pornhd] Fix formats extraction
+ [pornhd] Extract like count (#19123, #19125)
* [radiocanada] Switch to the new media requests (#19115)
+ [teachable] Add support for courses.workitdaily.com (#18871)
- [vporn] Remove extractor (#16276)
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
+ [drtuber] Extract duration (#19078)
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
* [soundcloud] Update client id
* [drtv] Improve preference (#19079)
+ [openload] Add support for openload.pw and oload.pw (#18930)
+ [openload] Add support for oload.info (#19073)
* [crackle] Authorize media detail request (#16931)
version 2019.01.30.1 version 2019.01.30.1
Core Core

View File

@ -476,6 +476,7 @@
- **mailru:music**: Музыка@Mail.Ru - **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru
- **MakerTV** - **MakerTV**
- **MallTV**
- **mangomolo:live** - **mangomolo:live**
- **mangomolo:video** - **mangomolo:video**
- **ManyVids** - **ManyVids**
@ -827,6 +828,7 @@
- **southpark.nl** - **southpark.nl**
- **southparkstudios.dk** - **southparkstudios.dk**
- **SpankBang** - **SpankBang**
- **SpankBangPlaylist**
- **Spankwire** - **Spankwire**
- **Spiegel** - **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de - **Spiegel:Article**: Articles on spiegel.de
@ -1057,7 +1059,6 @@
- **Voot** - **Voot**
- **VoxMedia** - **VoxMedia**
- **VoxMediaVolume** - **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak** - **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be

View File

@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
<meta content='Foo' property=og:foobar> <meta content='Foo' property=og:foobar>
<meta name="og:test1" content='foo > < bar'/> <meta name="og:test1" content='foo > < bar'/>
<meta name="og:test2" content="foo >//< bar"/> <meta name="og:test2" content="foo >//< bar"/>
<meta property=og-test3 content='Ill-formatted opengraph'/>
''' '''
self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)

View File

@ -82,6 +82,7 @@ from .utils import (
sanitize_url, sanitize_url,
sanitized_Request, sanitized_Request,
std_headers, std_headers,
str_or_none,
subtitles_filename, subtitles_filename,
UnavailableVideoError, UnavailableVideoError,
url_basename, url_basename,
@ -2067,9 +2068,12 @@ class YoutubeDL(object):
# and backwards compatibility with prior versions # and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
if extractor is None: if extractor is None:
url = str_or_none(info_dict.get('url'))
if not url:
return
# Try to find matching extractor for the URL and take its ie_key # Try to find matching extractor for the URL and take its ie_key
for ie in self._ies: for ie in self._ies:
if ie.suitable(info_dict['url']): if ie.suitable(url):
extractor = ie.ie_key() extractor = ie.ie_key()
break break
else: else:

View File

@ -1058,7 +1058,7 @@ class InfoExtractor(object):
@staticmethod @staticmethod
def _og_regexes(prop): def _og_regexes(prop):
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
% {'prop': re.escape(prop)}) % {'prop': re.escape(prop)})
template = r'<meta[^>]+?%s[^>]+?%s' template = r'<meta[^>]+?%s[^>]+?%s'
return [ return [

View File

@ -619,6 +619,7 @@ from .mailru import (
MailRuMusicSearchIE, MailRuMusicSearchIE,
) )
from .makertv import MakerTVIE from .makertv import MakerTVIE
from .malltv import MallTVIE
from .mangomolo import ( from .mangomolo import (
MangomoloVideoIE, MangomoloVideoIE,
MangomoloLiveIE, MangomoloLiveIE,
@ -1058,7 +1059,10 @@ from .southpark import (
SouthParkEsIE, SouthParkEsIE,
SouthParkNlIE SouthParkNlIE
) )
from .spankbang import SpankBangIE from .spankbang import (
SpankBangIE,
SpankBangPlaylistIE,
)
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE from .spiegeltv import SpiegeltvIE
@ -1167,6 +1171,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trunews import TruNewsIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubitv import TubiTvIE from .tubitv import TubiTvIE

View File

@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor):
}, { }, {
'url': 'https://i.imgur.com/crGpqCV.mp4', 'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True, 'only_matching': True,
}, {
# no title
'url': 'https://i.imgur.com/jxBXAMC.gifv',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage, default=video_id),
} }

View File

@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor):
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
}, query=query)['elements'][0] }, query=query)['elements'][0]
def _get_video_id(self, urn, course_slug, video_slug): def _get_urn_id(self, video_data):
urn = video_data.get('urn')
if urn: if urn:
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
if mobj: if mobj:
return mobj.group(1) return mobj.group(1)
return '%s/%s' % (course_slug, video_slug)
def _get_video_id(self, video_data, course_slug, video_slug):
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
def _real_initialize(self): def _real_initialize(self):
email, password = self._get_login_info() email, password = self._get_login_info()
@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
return { return {
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), 'id': self._get_video_id(video_data, course_slug, video_slug),
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'), 'thumbnail': video_data.get('defaultThumbnail'),
@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
course_data = self._call_api(course_slug, 'chapters,description,title') course_data = self._call_api(course_slug, 'chapters,description,title')
entries = [] entries = []
for chapter in course_data.get('chapters', []): for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
chapter_title = chapter.get('title') chapter_title = chapter.get('title')
chapter_id = self._get_urn_id(chapter)
for video in chapter.get('videos', []): for video in chapter.get('videos', []):
video_slug = video.get('slug') video_slug = video.get('slug')
if not video_slug: if not video_slug:
continue continue
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'id': self._get_video_id(video.get('urn'), course_slug, video_slug), 'id': self._get_video_id(video, course_slug, video_slug),
'title': video.get('title'), 'title': video.get('title'),
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
'chapter': chapter_title, 'chapter': chapter_title,
'chapter_number': chapter_number,
'chapter_id': chapter_id,
'ie_key': LinkedInLearningIE.ie_key(), 'ie_key': LinkedInLearningIE.ie_key(),
}) })

View File

@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import merge_dicts
class MallTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'md5': '1c4a37f080e1f3023103a7b43458e518',
'info_dict': {
'id': 't0zzt0',
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'ext': 'mp4',
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
'duration': 216,
'timestamp': 1538870400,
'upload_date': '20181007',
'view_count': int,
}
}, {
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
video_id = self._search_regex(
SOURCE_RE, webpage, 'video id', group='id')
media = self._parse_html5_media_entries(
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(media, info, {
'id': video_id,
'display_id': display_id,
'title': self._og_search_title(webpage, default=None) or display_id,
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
})

View File

@ -4,9 +4,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
urljoin,
) )
@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', 'md5': '87f1540746c1d32ec7a2305c12b96b25',
'info_dict': { 'info_dict': {
'id': '9864', 'id': '9864',
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
@ -23,6 +25,7 @@ class PornHdIE(InfoExtractor):
'description': 'md5:3748420395e03e31ac96857a8f125b2b', 'description': 'md5:3748420395e03e31ac96857a8f125b2b',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int, 'view_count': int,
'like_count': int,
'age_limit': 18, 'age_limit': 18,
} }
}, { }, {
@ -37,6 +40,7 @@ class PornHdIE(InfoExtractor):
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int, 'view_count': int,
'like_count': int,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'Not available anymore', 'skip': 'Not available anymore',
@ -65,12 +69,14 @@ class PornHdIE(InfoExtractor):
formats = [] formats = []
for format_id, video_url in sources.items(): for format_id, video_url in sources.items():
video_url = urljoin(url, video_url)
if not video_url: if not video_url:
continue continue
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None)) r'^(\d+)[pP]', format_id, 'height', default=None))
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'ext': determine_ext(video_url, 'mp4'),
'format_id': format_id, 'format_id': format_id,
'height': height, 'height': height,
}) })
@ -85,6 +91,11 @@ class PornHdIE(InfoExtractor):
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
'thumbnail', fatal=False, group='url') 'thumbnail', fatal=False, group='url')
like_count = int_or_none(self._search_regex(
(r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
webpage, 'like count', fatal=False))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
@ -92,6 +103,7 @@ class PornHdIE(InfoExtractor):
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'view_count': view_count, 'view_count': view_count,
'like_count': like_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
} }

View File

@ -16,7 +16,6 @@ from .openload import PhantomJSwrapper
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json,
orderedSet, orderedSet,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
@ -303,14 +302,12 @@ class PornHubIE(PornHubBaseIE):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
page_params = self._parse_json(self._search_regex( def extract_list(meta_key):
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', div = self._search_regex(
webpage, 'page parameters', group='data', default='{}'), r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
video_id, transform_source=js_to_json, fatal=False) % meta_key, webpage, meta_key, default=None)
tags = categories = None if div:
if page_params: return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
tags = page_params.get('tags', '').split(',')
categories = page_params.get('categories', '').split(',')
return { return {
'id': video_id, 'id': video_id,
@ -325,8 +322,8 @@ class PornHubIE(PornHubBaseIE):
'comment_count': comment_count, 'comment_count': comment_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
'tags': tags, 'tags': extract_list('tags'),
'categories': categories, 'categories': extract_list('categories'),
'subtitles': subtitles, 'subtitles': subtitles,
} }

View File

@ -4,16 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
xpath_text,
find_xpath_attr,
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
unified_strdate, unified_strdate,
xpath_element,
ExtractorError,
determine_protocol,
unsmuggle_url,
) )
@ -61,107 +57,67 @@ class RadioCanadaIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
} }
] ]
_GEO_COUNTRIES = ['CA']
_access_token = None
_claims = None
def _real_extract(self, url): def _call_api(self, path, video_id=None, app_code=None, query=None):
url, smuggled_data = unsmuggle_url(url, {}) if not query:
app_code, video_id = re.match(self._VALID_URL, url).groups() query = {}
query.update({
metadata = self._download_xml( 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
'http://api.radio-canada.ca/metaMedia/v1/index.ashx', 'output': 'json',
video_id, note='Downloading metadata XML', query={ })
if video_id:
query.update({
'appCode': app_code, 'appCode': app_code,
'idMedia': video_id, 'idMedia': video_id,
}) })
if self._access_token:
query['access_token'] = self._access_token
try:
return self._download_json(
'https://services.radio-canada.ca/media/' + path, video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
data = self._parse_json(e.cause.read().decode(), None)
error = data.get('error_description') or data['errorMessage']['text']
raise ExtractorError(error, expected=True)
raise
def _extract_info(self, app_code, video_id):
metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
def get_meta(name): def get_meta(name):
el = find_xpath_attr(metadata, './/Meta', 'name', name) for meta in metas:
return el.text if el is not None else None if meta.get('name') == name:
text = meta.get('text')
if text:
return text
# protectionType does not necessarily mean the video is DRM protected (see # protectionType does not necessarily mean the video is DRM protected (see
# https://github.com/rg3/youtube-dl/pull/18609). # https://github.com/rg3/youtube-dl/pull/18609).
if get_meta('protectionType'): if get_meta('protectionType'):
self.report_warning('This video is probably DRM protected.') self.report_warning('This video is probably DRM protected.')
device_types = ['ipad'] query = {
if not smuggled_data: 'connectionType': 'hd',
device_types.append('flash') 'deviceType': 'ipad',
device_types.append('android') 'multibitrate': 'true',
}
formats = [] if self._claims:
error = None query['claims'] = self._claims
# TODO: extract f4m formats v_data = self._call_api('validation/v2/', video_id, app_code, query)
# f4m formats can be extracted using flashhd device_type but they produce unplayable file v_url = v_data.get('url')
for device_type in device_types: if not v_url:
validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' error = v_data['message']
query = { if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
'appCode': app_code, raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
'idMedia': video_id, if error == 'Le contenu sélectionné est disponible seulement en premium':
'connectionType': 'broadband', self.raise_login_required(error)
'multibitrate': 'true',
'deviceType': device_type,
}
if smuggled_data:
validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
query.update(smuggled_data)
else:
query.update({
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
'paysJ391wsHjbOJwvCs26toz': 'CA',
'bypasslock': 'NZt5K62gRqfc',
})
v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
v_url = xpath_text(v_data, 'url')
if not v_url:
continue
if v_url == 'null':
error = xpath_text(v_data, 'message')
continue
ext = determine_ext(v_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
v_url, video_id, f4m_id='hds', fatal=False))
else:
ext = determine_ext(v_url)
bitrates = xpath_element(v_data, 'bitrates')
for url_e in bitrates.findall('url'):
tbr = int_or_none(url_e.get('bitrate'))
if not tbr:
continue
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
protocol = determine_protocol({'url': f_url})
f = {
'format_id': '%s-%d' % (protocol, tbr),
'url': f_url,
'ext': 'flv' if protocol == 'rtmp' else ext,
'protocol': protocol,
'width': int_or_none(url_e.get('width')),
'height': int_or_none(url_e.get('height')),
'tbr': tbr,
}
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
if mobj:
f.update({
'url': mobj.group('url') + mobj.group('auth'),
'play_path': mobj.group('playpath'),
})
formats.append(f)
if protocol == 'rtsp':
base_url = self._search_regex(
r'rtsp://([^?]+)', f_url, 'base url', default=None)
if base_url:
base_url = 'http://' + base_url
formats.extend(self._extract_m3u8_formats(
base_url + '/playlist.m3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
base_url + '/manifest.f4m', video_id,
f4m_id='hds', fatal=False))
if not formats and error:
raise ExtractorError( raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True) '%s said: %s' % (self.IE_NAME, error), expected=True)
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}
@ -186,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
return self._extract_info(*re.match(self._VALID_URL, url).groups())
class RadioCanadaAudioVideoIE(InfoExtractor): class RadioCanadaAudioVideoIE(InfoExtractor):
'radiocanada:audiovideo' 'radiocanada:audiovideo'
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': { 'info_dict': {
'id': '7527184', 'id': '7527184',
@ -203,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) return self.url_result('radiocanada:medianet:%s' % self._match_id(url))

View File

@ -21,7 +21,17 @@ from ..utils import (
class RutubeBaseIE(InfoExtractor): class RutubeBaseIE(InfoExtractor):
def _extract_video(self, video, video_id=None, require_title=True): def _download_api_info(self, video_id, query=None):
if not query:
query = {}
query['format'] = 'json'
return self._download_json(
'http://rutube.ru/api/video/%s/' % video_id,
video_id, 'Downloading video JSON',
'Unable to download video JSON', query=query)
@staticmethod
def _extract_info(video, video_id=None, require_title=True):
title = video['title'] if require_title else video.get('title') title = video['title'] if require_title else video.get('title')
age_limit = video.get('is_adult') age_limit = video.get('is_adult')
@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
category = try_get(video, lambda x: x['category']['name']) category = try_get(video, lambda x: x['category']['name'])
return { return {
'id': video.get('id') or video_id, 'id': video.get('id') or video_id if video_id else video['id'],
'title': title, 'title': title,
'description': video.get('description'), 'description': video.get('description'),
'thumbnail': video.get('thumbnail_url'), 'thumbnail': video.get('thumbnail_url'),
@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
'is_live': bool_or_none(video.get('is_livestream')), 'is_live': bool_or_none(video.get('is_livestream')),
} }
def _download_and_extract_info(self, video_id, query=None):
return self._extract_info(
self._download_api_info(video_id, query=query), video_id)
def _download_api_options(self, video_id, query=None):
if not query:
query = {}
query['format'] = 'json'
return self._download_json(
'http://rutube.ru/api/play/options/%s/' % video_id,
video_id, 'Downloading options JSON',
'Unable to download options JSON',
headers=self.geo_verification_headers(), query=query)
def _extract_formats(self, options, video_id):
formats = []
for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return formats
def _download_and_extract_formats(self, video_id, query=None):
return self._extract_formats(
self._download_api_options(video_id, query=query), video_id)
class RutubeIE(RutubeBaseIE): class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube' IE_NAME = 'rutube'
@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': '79938ade01294ef7e27574890d0d3769', 'md5': '1d24f180fac7a02f3900712e5a5764d6',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'flv', 'ext': 'mp4',
'title': 'Раненный кенгуру забежал в аптеку', 'title': 'Раненный кенгуру забежал в аптеку',
'description': 'http://www.ntdtv.ru ', 'description': 'http://www.ntdtv.ru ',
'duration': 80, 'duration': 81,
'uploader': 'NTDRussian', 'uploader': 'NTDRussian',
'uploader_id': '29790', 'uploader_id': '29790',
'timestamp': 1381943602, 'timestamp': 1381943602,
@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._download_and_extract_info(video_id)
video = self._download_json( info['formats'] = self._download_and_extract_formats(video_id)
'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON')
info = self._extract_video(video, video_id)
options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON',
headers=self.geo_verification_headers())
formats = []
for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
info['formats'] = formats
return info return info
class RutubeEmbedIE(InfoExtractor): class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed' IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos' IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
'ext': 'flv', 'ext': 'mp4',
'timestamp': 1387830582, 'timestamp': 1387830582,
'upload_date': '20131223', 'upload_date': '20131223',
'uploader_id': '297833', 'uploader_id': '297833',
@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
}, { }, {
'url': 'http://rutube.ru/play/embed/8083783', 'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True, 'only_matching': True,
}, {
# private video
'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
embed_id = self._match_id(url) embed_id = self._match_id(url)
webpage = self._download_webpage(url, embed_id) # Query may contain private videos token and should be passed to API
# requests (see #19163)
canonical_url = self._html_search_regex( query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, options = self._download_api_options(embed_id, query)
'Canonical URL') video_id = options['effective_video']
return self.url_result(canonical_url, RutubeIE.ie_key()) formats = self._extract_formats(options, video_id)
info = self._download_and_extract_info(video_id, query)
info.update({
'extractor_key': 'Rutube',
'formats': formats,
})
return info
class RutubePlaylistBaseIE(RutubeBaseIE): class RutubePlaylistBaseIE(RutubeBaseIE):
@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
video_url = url_or_none(result.get('video_url')) video_url = url_or_none(result.get('video_url'))
if not video_url: if not video_url:
continue continue
entry = self._extract_video(result, require_title=False) entry = self._extract_info(result, require_title=False)
entry.update({ entry.update({
'_type': 'url', '_type': 'url',
'url': video_url, 'url': video_url,

View File

@ -16,7 +16,8 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unified_strdate, try_get,
unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
@ -51,12 +52,17 @@ class SoundcloudIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '62986583', 'id': '62986583',
'ext': 'mp3', 'ext': 'mp3',
'upload_date': '20121011', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'timestamp': 1349920598,
'upload_date': '20121011',
'duration': 143, 'duration': 143,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
} }
}, },
# not streamable song # not streamable song
@ -68,9 +74,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed', 'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 227, 'duration': 30,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': { 'params': {
# rtmp # rtmp
@ -85,11 +96,16 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367', 'id': '123998367',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# private link (alt format) # private link (alt format)
@ -100,11 +116,16 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367', 'id': '123998367',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# downloadable song # downloadable song
@ -117,9 +138,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes', 'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples', 'uploader': 'oddsamples',
'timestamp': 1389232924,
'upload_date': '20140109', 'upload_date': '20140109',
'duration': 17, 'duration': 17,
'license': 'cc-by-sa', 'license': 'cc-by-sa',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -132,9 +158,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music', 'uploader': 'Ori Uplift Music',
'timestamp': 1504206263,
'upload_date': '20170831', 'upload_date': '20170831',
'duration': 7449, 'duration': 7449,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -147,10 +178,15 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)', 'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee', 'uploader': 'garyvee',
'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207, 'duration': 207,
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -176,22 +212,33 @@ class SoundcloudIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
title = info['title']
name = full_title or track_id name = full_title or track_id
if quiet: if quiet:
self.report_extraction(name) self.report_extraction(name)
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str): if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500') thumbnail = thumbnail.replace('-large', '-t500x500')
username = try_get(info, lambda x: x['user']['username'], compat_str)
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
result = { result = {
'id': track_id, 'id': track_id,
'uploader': info.get('user', {}).get('username'), 'uploader': username,
'upload_date': unified_strdate(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': info['title'], 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000), 'duration': int_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'), 'webpage_url': info.get('permalink_url'),
'license': info.get('license'), 'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': extract_count('favoritings'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
} }
formats = [] formats = []
query = {'client_id': self._CLIENT_ID} query = {'client_id': self._CLIENT_ID}

View File

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
orderedSet,
parse_duration, parse_duration,
parse_resolution, parse_resolution,
str_to_int, str_to_int,
@ -12,7 +13,7 @@ from ..utils import (
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
_TESTS = [{ _TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7', 'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -41,13 +42,22 @@ class SpankBangIE(InfoExtractor):
# 4k # 4k
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
'only_matching': True,
}, {
'url': 'https://m.spankbang.com/3vvn/play',
'only_matching': True,
}, {
'url': 'https://spankbang.com/2y3td/embed/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, headers={ webpage = self._download_webpage(
'Cookie': 'country=US' url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
}) video_id, headers={'Cookie': 'country=US'})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage): if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(
@ -94,3 +104,33 @@ class SpankBangIE(InfoExtractor):
'formats': formats, 'formats': formats,
'age_limit': age_limit, 'age_limit': age_limit,
} }
class SpankBangPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
_TEST = {
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
'info_dict': {
'id': 'ug0k',
'title': 'Big Ass Titties',
},
'playlist_mincount': 50,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
entries = [self.url_result(
'https://spankbang.com/%s/video' % video_id,
ie=SpankBangIE.ie_key(), video_id=video_id)
for video_id in orderedSet(re.findall(
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
title = self._html_search_regex(
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
fatal=False)
return self.playlist_result(entries, playlist_id, title)

View File

@ -3,22 +3,19 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .radiocanada import RadioCanadaIE
from ..utils import ( from ..utils import (
int_or_none,
js_to_json,
urlencode_postdata,
extract_attributes, extract_attributes,
smuggle_url, int_or_none,
merge_dicts,
urlencode_postdata,
) )
class TouTvIE(InfoExtractor): class TouTvIE(RadioCanadaIE):
_NETRC_MACHINE = 'toutv' _NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv' IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
_access_token = None
_claims = None
_TESTS = [{ _TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor):
email, password = self._get_login_info() email, password = self._get_login_info()
if email is None: if email is None:
return return
state = 'http://ici.tou.tv/'
webpage = self._download_webpage(state, None, 'Downloading homepage')
toutvlogin = self._parse_json(self._search_regex(
r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
login_webpage = self._download_webpage( login_webpage = self._download_webpage(
authorize_url, None, 'Downloading login page', query={ 'https://services.radio-canada.ca/auth/oauth/v2/authorize',
'client_id': toutvlogin['clientId'], None, 'Downloading login page', query={
'redirect_uri': 'https://ici.tou.tv/login/loginCallback', 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36',
'redirect_uri': 'https://ici.tou.tv/logincallback',
'response_type': 'token', 'response_type': 'token',
'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', 'scope': 'id.write media-validation.read',
'state': state, 'state': '/',
}) })
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor):
self._access_token = self._search_regex( self._access_token = self._search_regex(
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
urlh.geturl(), 'access token') urlh.geturl(), 'access token')
self._claims = self._download_json( self._claims = self._call_api('validation/v2/getClaims')['claims']
'https://services.radio-canada.ca/media/validation/v2/getClaims',
None, 'Extracting Claims', query={
'token': self._access_token,
'access_token': self._access_token,
})['claims']
def _real_extract(self, url): def _real_extract(self, url):
path = self._match_id(url) path = self._match_id(url)
@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor):
self.report_warning('This video is probably DRM protected.', path) self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia'] video_id = metadata['IdMedia']
details = metadata['Details'] details = metadata['Details']
title = details['OriginalTitle']
video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
if self._access_token and self._claims:
video_url = smuggle_url(video_url, {
'access_token': self._access_token,
'claims': self._claims,
})
return { return merge_dicts({
'_type': 'url_transparent',
'url': video_url,
'id': video_id, 'id': video_id,
'title': title, 'title': details.get('OriginalTitle'),
'thumbnail': details.get('ImageUrl'), 'thumbnail': details.get('ImageUrl'),
'duration': int_or_none(details.get('LengthInSeconds')), 'duration': int_or_none(details.get('LengthInSeconds')),
} }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))

View File

@ -0,0 +1,75 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
dict_get,
float_or_none,
int_or_none,
unified_timestamp,
update_url_query,
url_or_none,
)
class TruNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
'md5': 'a19c024c3906ff954fac9b96ce66bb08',
'info_dict': {
'id': '5c5a21e65d3c196e1c0020cc',
'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
'ext': 'mp4',
'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
'duration': 3685,
'timestamp': 1549411440,
'upload_date': '20190206',
},
'add_ie': ['Zype'],
}
def _real_extract(self, url):
display_id = self._match_id(url)
video = self._download_json(
'https://api.zype.com/videos', display_id, query={
'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
'per_page': 1,
'active': 'true',
'friendly_title': display_id,
})['response'][0]
zype_id = video['_id']
thumbnails = []
thumbnails_list = video.get('thumbnails')
if isinstance(thumbnails_list, list):
for thumbnail in thumbnails_list:
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'_type': 'url_transparent',
'url': update_url_query(
'https://player.zype.com/embed/%s.js' % zype_id,
{'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
'ie_key': 'Zype',
'id': zype_id,
'display_id': display_id,
'title': video.get('title'),
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('published_at')),
'average_rating': float_or_none(video.get('rating')),
'view_count': int_or_none(video.get('request_count')),
'thumbnails': thumbnails,
}

View File

@ -4,44 +4,72 @@ from __future__ import unicode_literals
import re import re
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
parse_iso8601,
)
class TruTVIE(TurnerBaseIE): class TruTVIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))' _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = { _TEST = {
'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html', 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'md5': '2cdc844f317579fed1a7251b087ff417',
'info_dict': { 'info_dict': {
'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets', 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'You Won\'t Believe These Sports Bets', 'title': 'Sunlight-Activated Flower',
'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.', 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
'upload_date': '20130305', },
} 'params': {
# m3u8 download
'skip_download': True,
},
} }
def _real_extract(self, url): def _real_extract(self, url):
path, video_id = re.match(self._VALID_URL, url).groups() series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
auth_required = False
if path: if video_id:
data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path path = 'episode'
display_id = video_id
else: else:
webpage = self._download_webpage(url, video_id) path = 'series/clip'
video_id = self._search_regex( display_id = clip_slug
r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
webpage, 'video id', default=video_id) data = self._download_json(
auth_required = self._search_regex( 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
r'TTV\.TVE\.authRequired\s*=\s*(true|false);', display_id)
webpage, 'auth required', default='false') == 'true' video_data = data['episode'] if video_id else data['info']
data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id media_id = video_data['mediaId']
return self._extract_cvp_info( title = video_data['title'].strip()
data_src, path, {
'secure': { info = self._extract_ngtv_info(
'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', media_id, {}, {
'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
},
}, {
'url': url, 'url': url,
'site_name': 'truTV', 'site_name': 'truTV',
'auth_required': auth_required, 'auth_required': video_data.get('isAuthRequired'),
}) })
thumbnails = []
for image in video_data.get('images', []):
image_url = image.get('srcUrl')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(video_data.get('publicationDate')),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('seasonNum')),
'episode_number': int_or_none(video_data.get('episodeNum')),
})
return info

View File

@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_id = self._search_regex( video_id = self._search_regex(
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id', r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
default=None)
if video_id: if len(video_id) < 8:
return self.url_result( return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id) 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
@ -537,8 +536,9 @@ class TVPlayHomeIE(InfoExtractor):
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number', r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None)) default=None))
episode = self._search_regex( episode = self._search_regex(
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode', (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
default=None, group='value') r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'episode', default=None, group='value')
episode_number = int_or_none(self._search_regex( episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number', r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None)) default=None))

View File

@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor):
source = next(f for f in formats if f['format_id'] == 'Source') source = next(f for f in formats if f['format_id'] == 'Source')
source['preference'] = 10 source['preference'] = 10
except StopIteration: except StopIteration:
pass # No Source stream present for f in formats:
if '/chunked/' in f['url']:
f.update({
'source_preference': 10,
'format_note': 'Source',
})
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
]) ])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding(): def preferredencoding():

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = 'vc.2019.02.03.1' __version__ = 'vc.2019.02.14'