1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-25 03:53:00 +08:00

Merge pull request #3 from rg3/master

updating again.
This commit is contained in:
Kade 2017-11-13 12:54:22 -05:00 committed by GitHub
commit e0e6429fac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 722 additions and 285 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.20**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.10.20
[debug] youtube-dl version 2017.11.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,3 +1,50 @@
version <unreleased>
Extractors
+ [wsj] Recognize another URL pattern (#14704)
version 2017.11.06
Core
+ [extractor/common] Add protocol for f4m formats
* [f4m] Prefer baseURL for relative URLs (#14660)
* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
Extractors
+ [hotstar:playlist] Add support for playlists (#12465)
* [hotstar] Bypass geo restriction (#14672)
- [22tracks] Remove extractor (#11024, #14628)
+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
* [gamespot] Extract formats referenced with new data fields (#14652)
* [spankbang] Detect unavailable videos (#14644)
version 2017.10.29
Core
* [extractor/common] Prefix format id for audio only HLS formats
+ [utils] Add support for zero years and months in parse_duration
Extractors
* [egghead] Fix extraction (#14388)
+ [fxnetworks] Extract series metadata (#14603)
+ [younow] Add support for younow.com (#9255, #9432, #12436)
* [dctptv] Fix extraction (#14599)
* [youtube] Restrict embed regex (#14600)
* [vimeo] Restrict iframe embed regex (#14600)
* [soundgasm] Improve extraction (#14588)
- [myvideo] Remove extractor (#8557)
+ [nbc] Add support for classic-tv videos (#14575)
+ [vrtnu] Add support for cookies authentication and simplify (#11873)
+ [canvas] Add support for vrt.be/vrtnu (#11873)
* [twitch:clips] Fix title extraction (#14566)
+ [ndtv] Add support for sub-sites (#14534)
* [dramafever] Fix login error message extraction
+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
ro, hu) (#14553)
version 2017.10.20
Core

View File

@ -3,8 +3,6 @@
- **1up.com**
- **20min**
- **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@ -342,6 +340,7 @@
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
@ -498,7 +497,6 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
@ -977,6 +975,7 @@
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@ -1035,6 +1034,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com

View File

@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
'ext': 'flv',
'format_id': '2148',
'protocol': 'f4m',
'tbr': 2148,
'width': 1280,
'height': 720,
}]
),
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()

View File

@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(

10
test/testdata/f4m/custom_base_url.f4m vendored Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://ns.adobe.com/f4m/1.0">
<streamType>recorded</streamType>
<baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
<duration>269.293</duration>
<bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
<media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
<metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
</media>
</manifest>

View File

@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD):
@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:

View File

@ -78,7 +78,7 @@ class AnimeOnDemandIE(InfoExtractor):
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in as %s' % username,
post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})

View File

@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
error = self._html_search_regex(
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',

View File

@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>',

View File

@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE):
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
},
}, {
'url': url,

View File

@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor):
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@ -29,7 +29,10 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..utils import (
NO_DEFAULT,
age_restricted,
@ -1255,11 +1258,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@ -1291,7 +1291,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@ -1326,6 +1326,7 @@ class InfoExtractor(object):
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
@ -1417,7 +1418,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
for v in (group_id, name):
for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@ -2249,27 +2250,35 @@ class InfoExtractor(object):
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
formats = []
def manifest_url(manifest):
m_url = '%s/%s' % (http_base_url, manifest)
if query:
m_url += '?%s' % query
return m_url
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
http_base_url + '/manifest.mpd',
manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()

View File

@ -43,6 +43,17 @@ class CrunchyrollBaseIE(InfoExtractor):
if username is None:
return
self._download_webpage(
'https://www.crunchyroll.com/?a=formhandler',
None, 'Logging in', 'Wrong login info',
data=urlencode_postdata({
'formname': 'RpcApiUser_Login',
'next_url': 'https://www.crunchyroll.com/acct/membership',
'name': username,
'password': password,
}))
'''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@ -86,6 +97,7 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
'''
def _real_initialize(self):
self._login()

View File

@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_strdate
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'mp4',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
object_id = self._html_search_meta('DC.identifier', webpage)
webpage = self._download_webpage(url, display_id)
servers_json = self._download_json(
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
server = servers_json[0]['server']
m3u8_path = self._search_regex(
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
formats = self._extract_m3u8_formats(
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
entry_protocol='m3u8_native')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
return {
'id': object_id,
'id': video_id,
'title': title,
'formats': formats,
'display_id': video_id,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE):
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):

View File

@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp,
@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
'id': 'professor-frisby-introduces-composable-functional-javascript',
'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
lessons = self._download_json(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
playlist_id, 'Downloading course lessons JSON')
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
continue
lesson_id = lesson.get('id')
if lesson_id:
lesson_id = compat_str(lesson_id)
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id,
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id')
if playlist_id:
playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'id': '1196',
'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
'format': 'bestvideo',
},
}
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
display_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
lesson_id = compat_str(lesson['id'])
title = lesson['title']
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, lesson_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
'series': try_get(
lesson, lambda x: x['series']['title'], compat_str),
'formats': formats,
}

View File

@ -432,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE
from .hotstar import (
HotStarIE,
HotStarPlaylistIE,
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrti import (
@ -1110,10 +1113,6 @@ from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentytwotracks import (
TwentyTwoTracksIE,
TwentyTwoTracksGenreIE
)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@ -1335,6 +1334,11 @@ from .youku import (
YoukuIE,
YoukuShowIE,
)
from .younow import (
YouNowLiveIE,
YouNowChannelIE,
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (

View File

@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor):
try:
data = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
None, 'Logging in as %s' % username, data=urlencode_postdata({
None, 'Logging in', data=urlencode_postdata({
'username': username,
'password': password,
}))

View File

@ -3,27 +3,31 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
extract_attributes,
int_or_none,
parse_age_limit,
smuggle_url,
update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fxnetworks.com/video/719841347694',
'md5': '1447d4722e42ebca19e5232ab93abb22',
'url': 'http://www.fxnetworks.com/video/1032565827847',
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
'id': '719841347694',
'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
'title': 'Vanpage',
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
'title': 'First Look: Better Things - Season 2',
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
'upload_date': '20160706',
'timestamp': 1467844741,
'upload_date': '20170825',
'timestamp': 1503686274,
'episode_number': 0,
'season_number': 2,
'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'series': video_data.get('data-show-title'),
'episode_number': int_or_none(video_data.get('data-episode')),
'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',

View File

@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@ -35,6 +35,12 @@ class GameSpotIE(OnceIE):
'params': {
'skip_download': True, # m3u8 downloads
},
}, {
'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True,
}, {
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -52,7 +58,7 @@ class GameSpotIE(OnceIE):
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
m3u8_url = streams.get('m3u8_stream')
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
@ -60,7 +66,7 @@ class GameSpotIE(OnceIE):
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
@ -105,7 +111,8 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
http_formats_preference=-1))
if not formats:
for quality in ['sd', 'hd']:

View File

@ -1,22 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
ExtractorError,
int_or_none,
)
class HotStarIE(InfoExtractor):
class HotStarBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['IN']
def _download_json(self, *args, **kwargs):
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
if response['resultCode'] != 'OK':
if kwargs.get('fatal'):
raise ExtractorError(
response['errorDescription'], expected=True)
return None
return response['resultObj']
def _download_content_info(self, content_id):
return self._download_json(
'https://account.hotstar.com/AVS/besc', content_id, query={
'action': 'GetAggregatedContentDetails',
'appVersion': '5.0.40',
'channel': 'PCTV',
'contentId': content_id,
})['contentInfo'][0]
class HotStarIE(HotStarBaseIE):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': {
'id': '1000076273',
'ext': 'mp4',
'title': 'On Air With AIB - English',
'title': 'On Air With AIB',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
'timestamp': 1447227000,
'upload_date': '20151111',
@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
'only_matching': True,
}]
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
json_data = super(HotStarIE, self)._download_json(
url_or_request, video_id, note, fatal=fatal, query=query)
if json_data['resultCode'] != 'OK':
if fatal:
raise ExtractorError(json_data['errorDescription'])
return None
return json_data['resultObj']
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://account.hotstar.com/AVS/besc', video_id, query={
'action': 'GetAggregatedContentDetails',
'channel': 'PCTV',
'contentId': video_id,
})['contentInfo'][0]
video_data = self._download_content_info(video_id)
title = video_data['episodeTitle']
if video_data.get('encrypted') == 'Y':
@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('contentTitle'),
}
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
'info_dict': {
'id': '14812',
},
'playlist_mincount': 75,
}, {
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
'only_matching': True,
}]
_ITEM_TYPES = {
'episodes': 'EPISODE',
'popular-clips': 'CLIPS',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
base_url = mobj.group('url')
content_id = mobj.group('content_id')
playlist_type = mobj.group('type')
content_info = self._download_content_info(content_id)
playlist_id = compat_str(content_info['categoryId'])
collection = self._download_json(
'https://search.hotstar.com/AVS/besc', playlist_id, query={
'action': 'SearchContents',
'appVersion': '5.0.40',
'channel': 'PCTV',
'moreFilters': 'series:%s;' % playlist_id,
'query': '*',
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
})
entries = [
self.url_result(
'%s/_/%s' % (base_url, video['contentId']),
ie=HotStarIE.ie_key(), video_id=video['contentId'])
for video in collection['response']['docs']
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
@ -7,7 +8,6 @@ from ..compat import compat_str
from ..utils import (
get_element_by_attribute,
int_or_none,
limit_length,
lowercase_escape,
try_get,
)
@ -212,7 +212,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
@ -221,82 +221,79 @@ class InstagramUserIE(InfoExtractor):
'id': 'porsche',
'title': 'porsche',
},
'playlist_mincount': 2,
'playlist': [{
'info_dict': {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
},
}],
'playlist_count': 5,
'params': {
'extract_flat': True,
'skip_download': True,
'playlistend': 5,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('username')
def _entries(self, uploader_id):
query = {
'__a': 1,
}
entries = []
page_count = 0
media_url = 'http://instagram.com/%s/media' % uploader_id
while True:
def get_count(kind):
return int_or_none(try_get(
node, lambda x: x['%ss' % kind]['count']))
for page_num in itertools.count(1):
page = self._download_json(
media_url, uploader_id,
note='Downloading page %d ' % (page_count + 1),
)
page_count += 1
'https://instagram.com/%s/' % uploader_id, uploader_id,
note='Downloading page %d' % page_num,
fatal=False, query=query)
if not page:
break
for it in page['items']:
if it.get('type') != 'video':
nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
if not nodes:
break
max_id = None
for node in nodes:
node_id = node.get('id')
if node_id:
max_id = node_id
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
continue
video_id = node.get('code')
if not video_id:
continue
like_count = int_or_none(it.get('likes', {}).get('count'))
user = it.get('user', {})
formats = [{
'format_id': k,
'height': v.get('height'),
'width': v.get('width'),
'url': v['url'],
} for k, v in it['videos'].items()]
self._sort_formats(formats)
info = self.url_result(
'https://instagram.com/p/%s/' % video_id,
ie=InstagramIE.ie_key(), video_id=video_id)
thumbnails_el = it.get('images', {})
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
description = try_get(
node, [lambda x: x['caption'], lambda x: x['text']['id']],
compat_str)
thumbnail = node.get('thumbnail_src') or node.get('display_src')
timestamp = int_or_none(node.get('date'))
# In some cases caption is null, which corresponds to None
# in python. As a result, it.get('caption', {}) gives None
title = (it.get('caption') or {}).get('text', it['id'])
comment_count = get_count('comment')
like_count = get_count('like')
view_count = int_or_none(node.get('video_views'))
entries.append({
'id': it['id'],
'title': limit_length(title, 80),
'formats': formats,
info.update({
'description': description,
'thumbnail': thumbnail,
'webpage_url': it.get('link'),
'uploader': user.get('full_name'),
'uploader_id': user.get('username'),
'timestamp': timestamp,
'comment_count': comment_count,
'like_count': like_count,
'timestamp': int_or_none(it.get('created_time')),
'view_count': view_count,
})
if not page['items']:
break
max_id = page['items'][-1]['id'].split('_')[0]
media_url = (
'http://instagram.com/%s/media?max_id=%s' % (
uploader_id, max_id))
yield info
return {
'_type': 'playlist',
'entries': entries,
'id': uploader_id,
'title': uploader_id,
}
if not max_id:
break
query['max_id'] = max_id
def _real_extract(self, url):
uploader_id = self._match_id(url)
return self.playlist_result(
self._entries(uploader_id), uploader_id, uploader_id)

View File

@ -70,7 +70,7 @@ class NocoIE(InfoExtractor):
return
login = self._download_json(
self._LOGIN_URL, None, 'Logging in as %s' % username,
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata({
'a': 'login',
'cookie': '1',

View File

@ -11,7 +11,7 @@ class OnceIE(InfoExtractor):
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
def _extract_once_formats(self, url):
def _extract_once_formats(self, url, http_formats_preference=None):
domain_id, application_id, media_item_id = re.match(
OnceIE._VALID_URL, url).groups()
formats = self._extract_m3u8_formats(
@ -35,6 +35,7 @@ class OnceIE(InfoExtractor):
'format_id': adaptive_format['format_id'].replace(
'hls', 'http'),
'protocol': 'http',
'preference': http_formats_preference,
})
progressive_formats.append(progressive_format)
self._check_formats(progressive_formats, media_item_id)

View File

@ -33,7 +33,7 @@ class PandaTVIE(InfoExtractor):
video_id = self._match_id(url)
config = self._download_json(
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
if error_code is not 0:
@ -66,6 +66,11 @@ class PandaTVIE(InfoExtractor):
plflag1 = '4'
live_panda = 'live_panda' if plflag0 < 1 else ''
plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
sign = plflag_auth['auth']['sign']
ts = plflag_auth['auth']['time']
rid = plflag_auth['auth']['rid']
quality_key = qualities(['OD', 'HD', 'SD'])
suffix = ['_small', '_mid', '']
formats = []
@ -77,8 +82,8 @@ class PandaTVIE(InfoExtractor):
continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
'format_id': '%s-%s' % (k, ext),
'quality': quality,
'source_preference': pref,

View File

@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor):
'https://www.patreon.com/processLogin',
compat_urllib_parse_urlencode(login_form).encode('utf-8')
)
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
login_page = self._download_webpage(request, None, note='Logging in')
if re.search(r'onLoginFailed', login_page):
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)

View File

@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in as %s' % username,
post_url, None, 'Logging in',
data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})

View File

@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor):
login_request = self._download_webpage(
self._LOGIN_URL, None,
note='Logging in as %s' % username,
note='Logging in',
data=urlencode_postdata(login_form),
headers={
'Referer': self._LOGIN_URL,

View File

@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor):
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
if not is_logged(login_page):
raise ExtractorError(

View File

@ -2,7 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import strip_or_none
from ..utils import (
extract_attributes,
smuggle_url,
strip_or_none,
urljoin,
)
class SkySportsIE(InfoExtractor):
@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
if embed_token:
video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'url': video_url,
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',

View File

@ -8,36 +8,49 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
'title': 'Piano sample',
'description': 'Royalty Free Sample Music',
'uploader': 'ytdl',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split(r'\/|\.', audio_url)[-2]
r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'audio URL', group='url')
title = self._search_regex(
r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
webpage, 'title', default=display_id)
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
(r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
r'(?s)<li>Description:\s(.*?)<\/li>'),
webpage, 'description', fatal=False)
audio_id = self._search_regex(
r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
'vcodec': 'none',
'title': title,
'description': description,
'uploader': mobj.group('user'),
}

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SpankBangIE(InfoExtractor):
@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
stream_key = self._html_search_regex(
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
webpage, 'stream key')

View File

@ -32,6 +32,8 @@ class TVAIE(InfoExtractor):
video_data = self._download_json(
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json',
}, query={
'appId': '5955fc5f23eec60006c951f1',
})
def get_attribute(key):

View File

@ -1,86 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
# 22Tracks regularly replace the audio tracks that can be streamed on their
# site. The tracks usually expire after 1 months, so we can't add tests.
class TwentyTwoTracksIE(InfoExtractor):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
IE_NAME = '22tracks:track'
_API_BASE = 'http://22tracks.com/api'
def _extract_info(self, city, genre_name, track_id=None):
item_id = track_id if track_id else genre_name
cities = self._download_json(
'%s/cities' % self._API_BASE, item_id,
'Downloading cities info',
'Unable to download cities info')
city_id = [x['id'] for x in cities if x['slug'] == city][0]
genres = self._download_json(
'%s/genres/%s' % (self._API_BASE, city_id), item_id,
'Downloading %s genres info' % city,
'Unable to download %s genres info' % city)
genre = [x for x in genres if x['slug'] == genre_name][0]
genre_id = genre['id']
tracks = self._download_json(
'%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
'Downloading %s genre tracks info' % genre_name,
'Unable to download track info')
return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
def _get_track_url(self, filename, track_id):
token = self._download_json(
'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
track_id, 'Downloading token', 'Unable to download token')
return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
def _extract_track_info(self, track_info, track_id):
download_url = self._get_track_url(track_info['filename'], track_id)
title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
return {
'id': track_id,
'url': download_url,
'ext': 'mp3',
'title': title,
'duration': int_or_none(track_info.get('duration')),
'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
track_id = mobj.group('id')
track_info = self._extract_info(city, genre, track_id)
return self._extract_track_info(track_info, track_id)
class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
IE_NAME = '22tracks:genre'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
genre_title, tracks = self._extract_info(city, genre)
entries = [
self._extract_track_info(track_info, track_info['id'])
for track_info in tracks]
return self.playlist_result(entries, genre, genre_title)

View File

@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor):
fail(clean_html(login_page))
redirect_page, handle = login_step(
login_page, handle, 'Logging in as %s' % username, {
login_page, handle, 'Logging in', {
'username': username,
'password': password,
})

View File

@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor):
})
response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in as %s' % username,
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form),
headers={
'Referer': self._ORIGIN_URL,

View File

@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor):
login = self._call_api(
'sessions.json', None,
'Logging in as %s' % username, post_data=login_form)
'Logging in', post_data=login_form)
self._token = login.get('token')
if not self._token:

View File

@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (

View File

@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor):
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username,
note='Logging in',
data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page):

View File

@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
wsj:
)
(?P<id>[a-fA-F0-9-]{36})
@ -38,6 +38,9 @@ class WSJIE(InfoExtractor):
}, {
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
'only_matching': True,
}, {
'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -0,0 +1,202 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
CDN_API_BASE = 'https://cdn.younow.com/php/api'
MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
class YouNowLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/AmandaPadeezy',
'info_dict': {
'id': 'AmandaPadeezy',
'ext': 'mp4',
'is_live': True,
'title': 'March 26, 2017',
'thumbnail': r're:^https?://.*\.jpg$',
'tags': ['girls'],
'categories': ['girls'],
'uploader': 'AmandaPadeezy',
'uploader_id': '6716501',
'uploader_url': 'https://www.younow.com/AmandaPadeezy',
'creator': 'AmandaPadeezy',
},
'skip': True,
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
else super(YouNowLiveIE, cls).suitable(url))
def _real_extract(self, url):
username = self._match_id(url)
data = self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username)
if data.get('errorCode') != 0:
raise ExtractorError(data['errorMsg'], expected=True)
uploader = try_get(
data, lambda x: x['user']['profileUrlString'],
compat_str) or username
return {
'id': uploader,
'is_live': True,
'title': self._live_title(uploader),
'thumbnail': data.get('awsUrl'),
'tags': data.get('tags'),
'categories': data.get('tags'),
'uploader': uploader,
'uploader_id': data.get('userId'),
'uploader_url': 'https://www.younow.com/%s' % username,
'creator': uploader,
'view_count': int_or_none(data.get('viewers')),
'like_count': int_or_none(data.get('likes')),
'formats': [{
'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
% (CDN_API_BASE, data['broadcastId'], data['userId']),
'ext': 'mp4',
'protocol': 'm3u8',
}],
}
def _extract_moment(item, fatal=True):
moment_id = item.get('momentId')
if not moment_id:
if not fatal:
return
raise ExtractorError('Unable to extract moment id')
moment_id = compat_str(moment_id)
title = item.get('text')
if not title:
title = 'YouNow %s' % (
item.get('momentType') or item.get('titleType') or 'moment')
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
entry = {
'extractor_key': 'YouNowMoment',
'id': moment_id,
'title': title,
'view_count': int_or_none(item.get('views')),
'like_count': int_or_none(item.get('likes')),
'timestamp': int_or_none(item.get('created')),
'creator': uploader,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
'formats': [{
'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
% (moment_id, moment_id),
'ext': 'mp4',
'protocol': 'm3u8_native',
}],
}
return entry
class YouNowChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
_TEST = {
'url': 'https://www.younow.com/its_Kateee_/channel',
'info_dict': {
'id': '14629760',
'title': 'its_Kateee_ moments'
},
'playlist_mincount': 8,
}
def _entries(self, username, channel_id):
created_before = 0
for page_num in itertools.count(1):
if created_before is None:
break
info = self._download_json(
'%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
% (CDN_API_BASE, channel_id, created_before), username,
note='Downloading moments page %d' % page_num)
items = info.get('items')
if not items or not isinstance(items, list):
break
for item in items:
if not isinstance(item, dict):
continue
item_type = item.get('type')
if item_type == 'moment':
entry = _extract_moment(item, fatal=False)
if entry:
yield entry
elif item_type == 'collection':
moments = item.get('momentsIds')
if isinstance(moments, list):
for moment_id in moments:
m = self._download_json(
MOMENT_URL_FORMAT % moment_id, username,
note='Downloading %s moment JSON' % moment_id,
fatal=False)
if m and isinstance(m, dict) and m.get('item'):
entry = _extract_moment(m['item'])
if entry:
yield entry
created_before = int_or_none(item.get('created'))
def _real_extract(self, url):
username = self._match_id(url)
channel_id = compat_str(self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username, note='Downloading user information')['userId'])
return self.playlist_result(
self._entries(username, channel_id), channel_id,
'%s moments' % username)
class YouNowMomentIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
'info_dict': {
'id': '20712117',
'ext': 'mp4',
'title': 'YouNow capture',
'view_count': int,
'like_count': int,
'timestamp': 1490432040,
'upload_date': '20170325',
'uploader': 'GABO...',
'uploader_id': 35917228,
},
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url)
else super(YouNowMomentIE, cls).suitable(url))
def _real_extract(self, url):
video_id = self._match_id(url)
item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
return _extract_moment(item['item'])

View File

@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
\1''', webpage)]
# lazyYT YouTube embed

View File

@ -1835,10 +1835,20 @@ def parse_duration(s):
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
r'''(?ix)(?:P?T)?
r'''(?ix)(?:P?
(?:
[0-9]+\s*y(?:ears?)?\s*
)?
(?:
[0-9]+\s*m(?:onths?)?\s*
)?
(?:
[0-9]+\s*w(?:eeks?)?\s*
)?
(?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
)?
T)?
(?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)?

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.10.20'
__version__ = '2017.11.06'