1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-16 18:27:24 +08:00

Merge pull request #8 from rg3/master

update
This commit is contained in:
siddht1 2016-05-10 11:56:43 +05:30
commit b1d9071b37
41 changed files with 1407 additions and 396 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.24**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.04.24
[debug] youtube-dl version 2016.05.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -168,3 +168,7 @@ José Joaquín Atria
Viťas Strádal
Kagami Hiiragi
Philip Huppert
blahgeek
Kevin Deldycke
inondle
Tomáš Čech

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete

View File

@ -465,7 +465,7 @@ The basic usage is not to set any template arguments when downloading a single f
- `display_id`: An alternative identifier for the video
- `uploader`: Full name of the video uploader
- `license`: License name the video is licensed under
- `creator`: The main artist who created the video
- `creator`: The creator of the video
- `release_date`: The date (YYYYMMDD) when the video was released
- `timestamp`: UNIX timestamp of the moment the video became available
- `upload_date`: Video upload date (YYYYMMDD)

View File

@ -338,7 +338,6 @@
- **mailru**: Видео@Mail.Ru
- **MakersChannel**
- **MakerTV**
- **Malemotion**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@ -375,8 +374,8 @@
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- **MusicPlayOn**
- **muzu.tv**
- **Mwave**
- **MwaveMeetGreet**
- **MySpace**
- **MySpace:album**
- **MySpass**
@ -554,7 +553,6 @@
- **SenateISVP**
- **ServingSys**
- **Sexu**
- **SexyKarma**: Sexy Karma and Watch Indian Porn
- **Shahid**
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
@ -567,8 +565,6 @@
- **smotri:broadcast**: Smotri.com broadcasts
- **smotri:community**: Smotri.com community videos
- **smotri:user**: Smotri.com user videos
- **SnagFilms**
- **SnagFilmsEmbed**
- **Snotr**
- **Sohu**
- **soundcloud**
@ -610,6 +606,7 @@
- **Syfy**
- **SztvHu**
- **Tagesschau**
- **tagesschau:player**
- **Tapely**
- **Tass**
- **TDSLifeway**
@ -725,6 +722,8 @@
- **Vidzi**
- **vier**
- **vier:videos**
- **ViewLift**
- **ViewLiftEmbed**
- **Viewster**
- **Viidea**
- **viki**
@ -756,6 +755,7 @@
- **Walla**
- **WashingtonPost**
- **wat.tv**
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
- **WDRMaus**: Sendung mit der Maus
@ -775,6 +775,10 @@
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集
- **xiami:song**: 虾米音乐
- **XMinus**
- **XNXX**
- **Xstream**

View File

@ -10,9 +10,9 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.utils import get_filesystem_encoding
from youtube_dl.compat import (
compat_getenv,
compat_setenv,
compat_etree_fromstring,
compat_expanduser,
compat_shlex_split,
@ -26,19 +26,22 @@ from youtube_dl.compat import (
class TestCompat(unittest.TestCase):
def test_compat_getenv(self):
test_str = 'тест'
os.environ['YOUTUBE-DL-TEST'] = (
test_str if sys.version_info >= (3, 0)
else test_str.encode(get_filesystem_encoding()))
compat_setenv('YOUTUBE-DL-TEST', test_str)
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
def test_compat_setenv(self):
test_var = 'YOUTUBE-DL-TEST'
test_str = 'тест'
compat_setenv(test_var, test_str)
compat_getenv(test_var)
self.assertEqual(compat_getenv(test_var), test_str)
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
test_str = 'C:\Documents and Settings\тест\Application Data'
os.environ['HOME'] = (
test_str if sys.version_info >= (3, 0)
else test_str.encode(get_filesystem_encoding()))
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
os.environ['HOME'] = old_home
compat_setenv('HOME', old_home or '')
def test_all_present(self):
import youtube_dl.compat

View File

@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
tests = 'aäb\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
@ -155,6 +155,10 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')

View File

@ -580,7 +580,7 @@ class YoutubeDL(object):
is_id=(k == 'id'))
template_dict = dict((k, sanitize(k, v))
for k, v in template_dict.items()
if v is not None)
if v is not None and not isinstance(v, (list, tuple, dict)))
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@ -1639,7 +1639,7 @@ class YoutubeDL(object):
# Just a single file
success = dl(filename, info_dict)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error('unable to download video data: %s' % str(err))
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
except (OSError, IOError) as err:
raise UnavailableVideoError(err)
@ -2018,6 +2018,7 @@ class YoutubeDL(object):
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
opts_cookiefile = compat_expanduser(opts_cookiefile)
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):

View File

@ -86,7 +86,9 @@ def _real_main(argv=None):
if opts.batchfile == '-':
batchfd = sys.stdin
else:
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
batchfd = io.open(
compat_expanduser(opts.batchfile),
'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd)
if opts.verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
@ -404,7 +406,7 @@ def _real_main(argv=None):
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
else:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:

View File

@ -373,6 +373,9 @@ compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser
def compat_setenv(key, value, env=os.environ):
env[key] = value
else:
# Environment variables should be decoded with filesystem encoding.
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
@ -384,6 +387,12 @@ else:
env = env.decode(get_filesystem_encoding())
return env
def compat_setenv(key, value, env=os.environ):
def encode(v):
from .utils import get_filesystem_encoding
return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
env[encode(key)] = encode(value)
# HACK: The default implementations of os.path.expanduser from cpython do not decode
# environment variables with filesystem encoding. We will work around this by
# providing adjusted implementations.
@ -604,6 +613,7 @@ __all__ = [
'compat_os_name',
'compat_parse_qs',
'compat_print',
'compat_setenv',
'compat_shlex_split',
'compat_socket_create_connection',
'compat_str',

View File

@ -6,6 +6,7 @@ import sys
import re
from .common import FileDownloader
from ..compat import compat_setenv
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import (
cli_option,
@ -198,6 +199,18 @@ class FFmpegFD(ExternalFD):
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
env = None
proxy = self.params.get('proxy')
if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy):
proxy = 'http://%s' % proxy
# Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
compat_setenv('HTTP_PROXY', proxy, env=env)
protocol = info_dict.get('protocol')
if protocol == 'rtmp':
@ -224,7 +237,7 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_live', 'live']
args += ['-i', url, '-c', 'copy']
if protocol == 'm3u8':
if protocol in ('m3u8', 'm3u8_native'):
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts']
else:
@ -239,7 +252,7 @@ class FFmpegFD(ExternalFD):
self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
try:
retval = proc.wait()
except KeyboardInterrupt:

View File

@ -4,6 +4,7 @@ import os.path
import re
from .fragment import FragmentFD
from .external import FFmpegFD
from ..compat import compat_urlparse
from ..utils import (
@ -17,12 +18,39 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative'
@staticmethod
def can_download(manifest):
UNSUPPORTED_FEATURES = (
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
# event media playlists [4]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
)
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(man_url).read()
s = manifest.decode('utf-8', 'ignore')
if not self.can_download(s):
self.report_warning(
'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg')
fd = FFmpegFD(self.ydl, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
fragment_urls = []
for line in s.splitlines():
line = line.strip()

View File

@ -12,7 +12,7 @@ from ..utils import (
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)'
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
_TESTS = [{
# video with 5min ID
@ -53,6 +53,12 @@ class AolIE(InfoExtractor):
}, {
'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
'only_matching': True,
}, {
'url': 'http://on.aol.com/video/519442220',
'only_matching': True,
}, {
'url': 'aol-video:5707d6b8e4b090497b04f706',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor):
'es': 'E[ESP]',
}
langcode = LANGS.get(lang, lang)
formats = []
for format_id, format_dict in player_info['VSR'].items():
f = dict(format_dict)
versionCode = f.get('versionCode')
langcode = LANGS.get(lang, lang)
lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
lang_pref = None
if versionCode:
matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
source_pref = 0
if versionCode is not None:
# The original version with subtitles has lower relevance
if re.match(r'VO-ST(F|A|E)', versionCode):
source_pref -= 10
# The version with sourds/mal subtitles has also lower relevance
elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
source_pref -= 9
l = re.escape(langcode)
# Language preference from most to least priority
# Reference: section 5.6.3 of
# http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
PREFERENCES = (
# original version in requested language, without subtitles
r'VO{0}$'.format(l),
# original version in requested language, with partial subtitles in requested language
r'VO{0}-ST{0}$'.format(l),
# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
r'VO{0}-STM{0}$'.format(l),
# non-original (dubbed) version in requested language, without subtitles
r'V{0}$'.format(l),
# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
r'V{0}-ST{0}$'.format(l),
# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
r'V{0}-STM{0}$'.format(l),
# original version in requested language, with partial subtitles in different language
r'VO{0}-ST(?!{0}).+?$'.format(l),
# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
r'VO{0}-STM(?!{0}).+?$'.format(l),
# original version in different language, with partial subtitles in requested language
r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
# original version in different language, without subtitles
r'VO(?:(?!{0}))?$'.format(l),
# original version in different language, with partial subtitles in different language
r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
)
for pref, p in enumerate(PREFERENCES):
if re.match(p, versionCode):
lang_pref = len(PREFERENCES) - pref
break
else:
lang_pref = -1
format = {
'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor):
'height': int_or_none(f.get('height')),
'tbr': int_or_none(f.get('bitrate')),
'quality': qfunc(f.get('quality')),
'source_preference': source_pref,
}
if f.get('mediaType') == 'rtmp':

View File

@ -0,0 +1,39 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
'url': 'http://www.biqle.ru/watch/847655_160197695',
'md5': 'ad5f746a874ccded7b8f211aeea96637',
'info_dict': {
'id': '160197695',
'ext': 'mp4',
'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
'uploader': 'Andrey Rogozin',
'upload_date': '20110605',
}
}, {
'url': 'https://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': {
'id': '168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
'uploader': 'Dmitry Kotov',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
return {
'_type': 'url_transparent',
'url': embed_url,
}

View File

@ -1,13 +1,9 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
qualities,
unified_strdate,
parse_iso8601,
)
@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
'info_dict': {
'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
'id': '1839',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
'description': 'md5:80be298773966f66d56cb11260b879af',
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'upload_date': '20131228',
'duration': 3660,
'timestamp': 1388188800,
'duration': 3710,
}
}, {
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
if self._downloader.params.get('prefer_free_formats'):
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
else:
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r'(?s)<h3>About</h3>(.+?)<h3>',
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
webpage, 'duration', fatal=False, group='duration'))
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = []
for m in matches:
format = m.group('format')
format_id = self._search_regex(
r'.*/([a-z0-9_-]+)/[^/]*$',
m.group('http_url'), 'format id', default=None)
if format_id:
format_id = m.group('lang') + '-' + format_id
vcodec = 'h264' if 'h264' in format_id else (
'none' if format_id in ('mp3', 'opus') else None
for recording in event_data.get('recordings', []):
recording_url = recording.get('recording_url')
if not recording_url:
continue
language = recording.get('language')
folder = recording.get('folder')
format_id = None
if language:
format_id = language
if folder:
if language:
format_id += '-' + folder
else:
format_id = folder
vcodec = 'h264' if 'h264' in folder else (
'none' if folder in ('mp3', 'opus') else None
)
formats.append({
'format_id': format_id,
'format': format,
'language': m.group('lang'),
'url': m.group('http_url'),
'url': recording_url,
'width': int_or_none(recording.get('width')),
'height': int_or_none(recording.get('height')),
'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
'language': language,
'vcodec': vcodec,
'preference': preference(format_id),
})
if m.group('torrent_url'):
formats.append({
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
'format': '%s (torrent)' % format,
'proto': 'torrent',
'format_note': '(unsupported; will just download the .torrent file)',
'vcodec': vcodec,
'preference': -100 + preference(format_id),
'url': m.group('torrent_url'),
})
self._sort_formats(formats)
thumbnail = self._html_search_regex(
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'upload_date': upload_date,
'duration': duration,
'id': event_id,
'display_id': display_id,
'title': event_data['title'],
'description': event_data.get('description'),
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
'duration': int_or_none(event_data.get('length')),
'tags': event_data.get('tags'),
'formats': formats,
}

View File

@ -33,19 +33,33 @@ class CeskaTelevizeIE(InfoExtractor):
'skip_download': True,
},
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
'info_dict': {
'id': '61924494876844374',
'id': '61924494877028507',
'ext': 'mp4',
'title': 'První republika: Zpěvačka z Dupárny Bobina',
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
'title': 'Hyde Park Civilizace: Bonus 01 - En',
'description': 'English Subtittles',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 88.4,
'duration': 81.3,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# live stream
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'info_dict': {
'id': 402,
'ext': 'mp4',
'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Georestricted to Czech Republic',
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
@ -118,19 +132,21 @@ class CeskaTelevizeIE(InfoExtractor):
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage)
playlist_description = self._og_search_description(webpage)
playlist_title = self._og_search_title(webpage, default=None)
playlist_description = self._og_search_description(webpage, default=None)
playlist = self._download_json(req, playlist_id)['playlist']
playlist_len = len(playlist)
entries = []
for item in playlist:
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item['streamUrls'].items():
formats.extend(self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4',
entry_protocol='m3u8_native', fatal=False))
entry_protocol='m3u8' if is_live else 'm3u8_native',
fatal=False))
self._sort_formats(formats)
item_id = item.get('id') or item['assetId']
@ -145,14 +161,22 @@ class CeskaTelevizeIE(InfoExtractor):
if subs:
subtitles = self.extract_subtitles(episode_id, subs)
if playlist_len == 1:
final_title = playlist_title or title
if is_live:
final_title = self._live_title(final_title)
else:
final_title = '%s (%s)' % (playlist_title, title)
entries.append({
'id': item_id,
'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
'title': final_title,
'description': playlist_description if playlist_len == 1 else None,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
})
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)

View File

@ -163,7 +163,7 @@ class InfoExtractor(object):
description: Full video description.
uploader: Full name of the video uploader.
license: License name the video is licensed under.
creator: The main artist who created the video.
creator: The creator of the video.
release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).

View File

@ -0,0 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
determine_protocol,
)
class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
'md5': '2f639d446394f53f3a33658b518b6615',
'info_dict': {
'id': '1288527',
'ext': 'mp4',
'title': 'Turn any video into an impressionist masterpiece',
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex(
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
title = video_data['title']
video_sources = self._download_json(video_data.get(
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
formats = []
for rendition in video_sources['renditions']:
rendition_url = rendition.get('url')
if not rendition_url:
continue
tbr = int_or_none(rendition.get('encodingRate'), 1000)
container = rendition.get('videoContainer')
is_hls = container == 'M2TS'
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
formats.append({
'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
'url': rendition_url,
'width': int_or_none(rendition.get('frameWidth')),
'height': int_or_none(rendition.get('frameHeight')),
'tbr': tbr,
'vcodec': rendition.get('videoCodec'),
'container': container,
'protocol': protocol,
'ext': 'mp4' if is_hls else None,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('descr'),
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
'formats': formats,
}

View File

@ -71,7 +71,7 @@ class DiscoveryIE(InfoExtractor):
entries = []
for idx, video_info in enumerate(info['playlist']):
subtitles = []
subtitles = {}
caption_url = video_info.get('captionsUrl')
if caption_url:
subtitles = {

View File

@ -75,6 +75,7 @@ from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .biobiochiletv import BioBioChileTVIE
from .biqle import BIQLEIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
@ -157,6 +158,7 @@ from .cspan import CSpanIE
from .ctsnews import CtsNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .cwtv import CWTVIE
from .dailymail import DailyMailIE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
@ -382,6 +384,7 @@ from .limelight import (
LimelightChannelIE,
LimelightChannelListIE,
)
from .litv import LiTVIE
from .liveleak import LiveLeakIE
from .livestream import (
LivestreamIE,
@ -406,6 +409,10 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .mgtv import MGTVIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
)
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
@ -560,7 +567,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .people import PeopleIE
from .periscope import PeriscopeIE
from .periscope import (
PeriscopeIE,
PeriscopeUserIE,
)
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
@ -724,7 +734,10 @@ from .svt import (
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .tagesschau import (
TagesschauPlayerIE,
TagesschauIE,
)
from .tapely import TapelyIE
from .tass import TassIE
from .tdslifeway import TDSLifewayIE
@ -846,7 +859,10 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vevo import (
VevoIE,
VevoPlaylistIE,
)
from .vgtv import (
BTArticleIE,
BTVestlendingenIE,

View File

@ -1,20 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
class FczenitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://fc-zenit.ru/video/gl6785/',
'md5': '458bacc24549173fe5a5aa29174a5606',
'url': 'http://fc-zenit.ru/video/41044/',
'md5': '0e3fab421b455e970fa1aa3891e57df0',
'info_dict': {
'id': '6785',
'id': '41044',
'ext': 'mp4',
'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
},
}
@ -22,15 +21,23 @@ class FczenitIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
video_title = self._html_search_regex(
r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
video_items = self._parse_json(self._search_regex(
r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
video_id)
def merge_dicts(*dicts):
ret = {}
for a_dict in dicts:
ret.update(a_dict)
return ret
formats = [{
'url': furl,
'tbr': tbr,
} for furl, tbr in bitrates]
'url': compat_urlparse.urljoin(url, video_url),
'tbr': int(tbr),
} for tbr, video_url in merge_dicts(*video_items).items()]
self._sort_formats(formats)

View File

@ -24,13 +24,28 @@ class FlickrIE(InfoExtractor):
'upload_date': '20110423',
'uploader_id': '10922353@N03',
'uploader': 'Forest Wander',
'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
'comment_count': int,
'view_count': int,
'tags': list,
'license': 'Attribution-ShareAlike',
}
}
_API_BASE_URL = 'https://api.flickr.com/services/rest?'
# https://help.yahoo.com/kb/flickr/SLN25525.html
_LICENSES = {
'0': 'All Rights Reserved',
'1': 'Attribution-NonCommercial-ShareAlike',
'2': 'Attribution-NonCommercial',
'3': 'Attribution-NonCommercial-NoDerivs',
'4': 'Attribution',
'5': 'Attribution-ShareAlike',
'6': 'Attribution-NoDerivs',
'7': 'No known copyright restrictions',
'8': 'United States government work',
'9': 'Public Domain Dedication (CC0)',
'10': 'Public Domain Work',
}
def _call_api(self, method, video_id, api_key, note, secret=None):
query = {
@ -75,6 +90,9 @@ class FlickrIE(InfoExtractor):
self._sort_formats(formats)
owner = video_info.get('owner', {})
uploader_id = owner.get('nsid')
uploader_path = owner.get('path_alias') or uploader_id
uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
return {
'id': video_id,
@ -83,11 +101,13 @@ class FlickrIE(InfoExtractor):
'formats': formats,
'timestamp': int_or_none(video_info.get('dateuploaded')),
'duration': int_or_none(video_info.get('video', {}).get('duration')),
'uploader_id': owner.get('nsid'),
'uploader_id': uploader_id,
'uploader': owner.get('realname'),
'uploader_url': uploader_url,
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
'view_count': int_or_none(video_info.get('views')),
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
'license': self._LICENSES.get(video_info.get('license')),
}
else:
raise ExtractorError('not a video', expected=True)

View File

@ -283,6 +283,8 @@ class KuwoCategoryIE(InfoExtractor):
category_desc = remove_start(
get_element_by_id('intro', webpage).strip(),
'%s简介:' % category_name)
if category_desc == '暂无':
category_desc = None
jsonm = self._parse_json(self._html_search_regex(
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)

View File

@ -0,0 +1,137 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
smuggle_url,
unsmuggle_url,
)
class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
_TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'info_dict': {
'id': 'VOD00041606',
'title': '花千骨',
},
'playlist_count': 50,
}, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'info_dict': {
'id': 'VOD00041610',
'ext': 'mp4',
'title': '花千骨第1集',
'thumbnail': 're:https?://.*\.jpg$',
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'episode_number': 1,
},
'params': {
'noplaylist': True,
'skip_download': True, # m3u8 download
},
'skip': 'Georestricted to Taiwan',
}]
def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
episode_title = view_data['title']
content_id = season_list['contentId']
if prompt:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
all_episodes = [
self.url_result(smuggle_url(
self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion
for episode in season_list['episode']]
return self.playlist_result(all_episodes, content_id, episode_title)
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
video_id = self._match_id(url)
noplaylist = self._downloader.params.get('noplaylist')
noplaylist_prompt = True
if 'force_noplaylist' in data:
noplaylist = data['force_noplaylist']
noplaylist_prompt = False
webpage = self._download_webpage(url, video_id)
view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
webpage)))
vod_data = self._parse_json(self._search_regex(
'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id)
season_list = list(vod_data.get('seasonList', {}).values())
if season_list:
if not noplaylist:
return self._extract_playlist(
season_list[0], video_id, vod_data, view_data,
prompt=noplaylist_prompt)
if noplaylist_prompt:
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
# In browsers `getMainUrl` request is always issued. Usually this
# endpoint gives the same result as the data embedded in the webpage.
# If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {
'assetId': view_data['assetId'],
'watchDevices': vod_data['watchDevices'],
'contentType': view_data['contentType'],
}
video_data = self._download_json(
'https://www.litv.tv/vod/getMainUrl', video_id,
data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'})
if not video_data.get('fullpath'):
error_msg = video_data.get('errorMessage')
if error_msg == 'vod.error.outsideregionerror':
self.raise_geo_restricted('This video is available in Taiwan only')
if error_msg:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
formats = self._extract_m3u8_formats(
video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
for a_format in formats:
# LiTV HLS segments doesn't like compressions
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
title = view_data['title'] + view_data.get('secondaryMark', '')
description = view_data.get('description')
thumbnail = view_data.get('imageFile')
categories = [item['name'] for item in vod_data.get('category', [])]
episode = int_or_none(view_data.get('episode'))
return {
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
'categories': categories,
'episode_number': episode,
}

View File

@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor):
'ext': 'flv',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
'title': 'Most unlucky car accident',
'thumbnail': 're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor):
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
'thumbnail': 're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
@ -49,7 +51,8 @@ class LiveLeakIE(InfoExtractor):
'ext': 'mp4',
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
'uploader': 'bony333',
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': 're:^https?://.*\.jpg$'
}
}]
@ -72,6 +75,7 @@ class LiveLeakIE(InfoExtractor):
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
video_thumbnail = self._og_search_thumbnail(webpage)
sources_raw = self._search_regex(
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
@ -124,4 +128,5 @@ class LiveLeakIE(InfoExtractor):
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
}

View File

@ -0,0 +1,192 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_xpath,
)
from ..utils import (
int_or_none,
parse_duration,
smuggle_url,
unsmuggle_url,
xpath_text,
)
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
def _extract_base_url(self, course_id, display_id):
return self._download_json(
'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
display_id, 'Downloading course base URL')
def _extract_chapter_and_title(self, title):
if not title:
return None, None
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva'
IE_DESC = 'Microsoft Virtual Academy videos'
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
'info_dict': {
'id': 'gfVXISmEB_6804984382',
'ext': 'mp4',
'title': 'Course Introduction',
'formats': 'mincount:3',
'subtitles': {
'en': [{
'ext': 'ttml',
}],
},
}
}, {
'url': 'mva:11788:gfVXISmEB_6804984382',
'only_matching': True,
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
course_id = mobj.group('course_id')
video_id = mobj.group('id')
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
settings = self._download_xml(
'%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
video_id, 'Downloading video settings XML')
_, title = self._extract_chapter_and_title(xpath_text(
settings, './/Title', 'title', fatal=True))
formats = []
for sources in settings.findall(compat_xpath('.//MediaSources')):
if sources.get('videoType') == 'smoothstreaming':
continue
for source in sources.findall(compat_xpath('./MediaSource')):
video_url = source.text
if not video_url or not video_url.startswith('http'):
continue
video_mode = source.get('videoMode')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
codec = source.get('codec')
acodec, vcodec = [None] * 2
if codec:
codecs = codec.split(',')
if len(codecs) == 2:
acodec, vcodec = codecs
elif len(codecs) == 1:
vcodec = codecs[0]
formats.append({
'url': video_url,
'format_id': video_mode,
'height': height,
'acodec': acodec,
'vcodec': vcodec,
})
self._sort_formats(formats)
subtitles = {}
for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
subtitle_url = source.text
if not subtitle_url:
continue
subtitles.setdefault('en', []).append({
'url': '%s/%s' % (base_url, subtitle_url),
'ext': source.get('type'),
})
return {
'id': video_id,
'title': title,
'subtitles': subtitles,
'formats': formats
}
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva:course'
IE_DESC = 'Microsoft Virtual Academy courses'
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'info_dict': {
'id': '11788',
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
},
'playlist_count': 36,
}, {
# with emphasized chapters
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
'info_dict': {
'id': '16335',
'title': 'Developing Windows 10 Games with Construct 2',
},
'playlist_count': 10,
}, {
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'only_matching': True,
}, {
'url': 'mva:course:11788',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id = mobj.group('id')
display_id = mobj.group('display_id')
base_url = self._extract_base_url(course_id, display_id)
manifest = self._download_json(
'%s/imsmanifestlite.json' % base_url,
display_id, 'Downloading course manifest JSON')['manifest']
organization = manifest['organizations']['organization'][0]
entries = []
for chapter in organization['item']:
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
chapter_id = chapter.get('@identifier')
for item in chapter.get('item', []):
item_id = item.get('@identifier')
if not item_id:
continue
metadata = item.get('resource', {}).get('metadata') or {}
if metadata.get('learningresourcetype') != 'Video':
continue
_, title = self._extract_chapter_and_title(item.get('title'))
duration = parse_duration(metadata.get('duration'))
description = metadata.get('description')
entries.append({
'_type': 'url_transparent',
'url': smuggle_url(
'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
'title': title,
'description': description,
'duration': duration,
'chapter': chapter_title,
'chapter_number': chapter_number,
'chapter_id': chapter_id,
})
title = organization.get('title') or manifest.get('metadata', {}).get('title')
return self.playlist_result(entries, course_id, title)

View File

@ -7,6 +7,7 @@ from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
@ -79,3 +80,39 @@ class PeriscopeIE(InfoExtractor):
'thumbnails': thumbnails,
'formats': formats,
}
class PeriscopeUserIE(InfoExtractor):
_VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$'
IE_DESC = 'Periscope user videos'
IE_NAME = 'periscope:user'
_TEST = {
'url': 'https://www.periscope.tv/LularoeHusbandMike/',
'info_dict': {
'id': 'LularoeHusbandMike',
'title': 'LULAROE HUSBAND MIKE',
},
# Periscope only shows videos in the last 24 hours, so it's possible to
# get 0 videos
'playlist_mincount': 0,
}
def _real_extract(self, url):
user_id = self._match_id(url)
webpage = self._download_webpage(url, user_id)
broadcast_data = self._parse_json(self._html_search_meta(
'broadcast-data', webpage, default='{}'), user_id)
username = broadcast_data.get('user', {}).get('display_name')
user_broadcasts = self._parse_json(
self._html_search_meta('user-broadcasts', webpage, default='{}'),
user_id)
entries = [
self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
for broadcast in user_broadcasts.get('broadcasts', [])]
return self.playlist_result(entries, user_id, username)

View File

@ -1,7 +1,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
int_or_none,
str_to_int,
unified_strdate,
)
class RedTubeIE(InfoExtractor):
@ -13,6 +18,9 @@ class RedTubeIE(InfoExtractor):
'id': '66418',
'ext': 'mp4',
'title': 'Sucked on a toilet',
'upload_date': '20120831',
'duration': 596,
'view_count': int,
'age_limit': 18,
}
}
@ -24,12 +32,39 @@ class RedTubeIE(InfoExtractor):
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
(r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
webpage, 'title', group='title')
formats = []
sources = self._parse_json(
self._search_regex(
r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
video_id, fatal=False)
if sources and isinstance(sources, dict):
for format_id, format_url in sources.items():
if format_url:
formats.append({
'url': format_url,
'format_id': format_id,
'height': int_or_none(format_id),
})
else:
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, 'title')
video_thumbnail = self._og_search_thumbnail(webpage)
formats.append({'url': video_url})
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
@ -37,9 +72,12 @@ class RedTubeIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'thumbnail': video_thumbnail,
'title': title,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -4,42 +4,178 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import parse_filesize
from ..utils import (
determine_ext,
js_to_json,
parse_iso8601,
parse_filesize,
)
class TagesschauPlayerIE(InfoExtractor):
IE_NAME = 'tagesschau:player'
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
'md5': '8d09548d5c15debad38bee3a4d15ca21',
'info_dict': {
'id': '179517',
'ext': 'mp4',
'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
'thumbnail': 're:^https?:.*\.jpg$',
'formats': 'mincount:6',
},
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
'info_dict': {
'id': '29417',
'ext': 'mp3',
'title': 'Trabi - Bye, bye Rennpappe',
'thumbnail': 're:^https?:.*\.jpg$',
'formats': 'mincount:2',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
'only_matching': True,
}]
_FORMATS = {
'xs': {'quality': 0},
's': {'width': 320, 'height': 180, 'quality': 1},
'm': {'width': 512, 'height': 288, 'quality': 2},
'l': {'width': 960, 'height': 540, 'quality': 3},
'xl': {'width': 1280, 'height': 720, 'quality': 4},
'xxl': {'quality': 5},
}
def _extract_via_api(self, kind, video_id):
info = self._download_json(
'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
video_id)
title = info['headline']
formats = []
for media in info['mediadata']:
for format_id, format_url in media.items():
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls'))
else:
formats.append({
'url': format_url,
'format_id': format_id,
'vcodec': 'none' if kind == 'audio' else None,
})
self._sort_formats(formats)
timestamp = parse_iso8601(info.get('date'))
return {
'id': video_id,
'title': title,
'timestamp': timestamp,
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# kind = mobj.group('kind').lower()
# if kind == 'video':
# return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage).strip()
formats = []
for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
if not media:
continue
src = media.get('src')
if not src:
return
quality = media.get('quality')
kind = media.get('type', '').split('/')[0]
ext = determine_ext(src)
f = {
'url': src,
'format_id': '%s_%s' % (quality, ext) if quality else ext,
'ext': ext,
'vcodec': 'none' if kind == 'audio' else None,
}
f.update(self._FORMATS.get(quality, {}))
formats.append(f)
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
'md5': '917a228bc7df7850783bc47979673a09',
'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
'info_dict': {
'id': '102143',
'id': 'video-102143',
'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
'description': '18.07.2015 20:10 Uhr',
'thumbnail': 're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'info_dict': {
'id': '5727',
'id': 'ts-5727',
'ext': 'mp4',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'thumbnail': 're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
'md5': 'aef45de271c4bf0a5db834aa40bf774c',
# exclusive audio
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
'info_dict': {
'id': '18407',
'id': 'audio-29417',
'ext': 'mp3',
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
'title': 'Trabi - Bye, bye Rennpappe',
'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
'thumbnail': 're:^https?:.*\.jpg$',
},
}, {
# audio in article
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
'md5': 'e0916c623e85fc1d2b26b78f299d3958',
'info_dict': {
'id': 'bnd-303',
'ext': 'mp3',
'title': 'Viele Baustellen für neuen BND-Chef',
'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
'thumbnail': 're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
'info_dict': {
'id': 'afd-parteitag-135',
'title': 'Möchtegern-Underdog mit Machtanspruch',
},
'playlist_count': 2,
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
'only_matching': True,
@ -62,68 +198,38 @@ class TagesschauIE(InfoExtractor):
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
'only_matching': True,
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
'url': 'http://www.tagesschau.de/100sekunden/index.html',
'only_matching': True,
}, {
# playlist article with collapsing sections
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
'only_matching': True,
}]
_FORMATS = {
'xs': {'quality': 0},
's': {'width': 320, 'height': 180, 'quality': 1},
'm': {'width': 512, 'height': 288, 'quality': 2},
'l': {'width': 960, 'height': 540, 'quality': 3},
'xl': {'width': 1280, 'height': 720, 'quality': 4},
'xxl': {'quality': 5},
}
@classmethod
def suitable(cls, url):
return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id)
player_url = self._html_search_meta(
'twitter:player', webpage, 'player URL', default=None)
if player_url:
playerpage = self._download_webpage(
player_url, display_id, 'Downloading player page')
formats = []
for media in re.finditer(
r'''(?x)
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
''', playerpage):
url = media.group('url')
type_ = media.group('type')
ext = media.group('ext')
res = media.group('quality')
f = {
'format_id': '%s_%s' % (res, ext) if res else ext,
'url': url,
'ext': ext,
'vcodec': 'none' if type_ == 'audio' else None,
}
f.update(self._FORMATS.get(res, {}))
formats.append(f)
thumbnail = self._og_search_thumbnail(playerpage)
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
else:
download_text = self._search_regex(
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
webpage, 'download links')
def _extract_formats(self, download_text, media_kind):
links = re.finditer(
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
download_text)
formats = []
for l in links:
link_url = l.group('url')
if not link_url:
continue
format_id = self._search_regex(
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
default=determine_ext(link_url))
format = {
'format_id': format_id,
'url': l.group('url'),
'format_name': l.group('name'),
}
title = l.group('title')
if title:
if media_kind.lower() == 'video':
m = re.match(
r'''(?x)
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
@ -131,7 +237,7 @@ class TagesschauIE(InfoExtractor):
(?P<vbr>[0-9]+)kbps&\#10;
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
l.group('title'))
title)
if m:
format.update({
'format_note': m.group('audio_desc'),
@ -142,13 +248,57 @@ class TagesschauIE(InfoExtractor):
'vbr': int(m.group('vbr')),
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
else:
m = re.match(
r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
title)
if m:
format.update({
'format_note': '%s, %s' % (m.group('format'), m.group('note')),
'vcodec': 'none',
'abr': int(m.group('abr')),
})
formats.append(format)
self._sort_formats(formats)
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('path')
display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
webpage, 'title', default=None) or self._og_search_title(webpage)
DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
webpage_type = self._og_search_property('type', webpage, default=None)
if webpage_type == 'website': # Article
entries = []
for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
webpage), 1):
entries.append({
'id': '%s-%d' % (display_id, num),
'title': '%s' % entry_title,
'formats': self._extract_formats(download_text, media_kind),
})
if len(entries) > 1:
return self.playlist_result(entries, display_id, title)
formats = entries[0]['formats']
else: # Assume single video
download_text = self._search_regex(
DOWNLOAD_REGEX, webpage, 'download links', group='links')
media_kind = self._search_regex(
DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
formats = self._extract_formats(download_text, media_kind)
thumbnail = self._og_search_thumbnail(webpage)
description = self._html_search_regex(
r'(?s)<p class="teasertext">(.*?)</p>',
webpage, 'description', default=None)
title = self._html_search_regex(
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
self._sort_formats(formats)

View File

@ -2,14 +2,16 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
from ..utils import (
determine_ext,
remove_end,
)
class TelegraafIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
_TEST = {
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
'info_dict': {
'id': '24353229',
'ext': 'mp4',
@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 33,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
video_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
webpage = self._download_webpage(url, video_id)
player_url = self._html_search_regex(
r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
player_page = self._download_webpage(
player_url, video_id, note='Download player webpage')
playlist_url = self._search_regex(
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
playlist_data = self._download_json(playlist_url, video_id)
item = playlist_data['items'][0]
formats = []
locations = item['locations']
for location in locations.get('adaptive', []):
manifest_url = location['src']
ext = determine_ext(manifest_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', m3u8_id='hls'))
elif ext == 'mpd':
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
continue
else:
self.report_warning('Unknown adaptive format %s' % ext)
for location in locations.get('progressive', []):
formats.append({
'url': location['sources'][0]['src'],
'width': location.get('width'),
'height': location.get('height'),
'format_id': 'http-%s' % location['label'],
})
self._sort_formats(formats)
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
description = self._og_search_description(webpage)
duration = item.get('duration')
thumbnail = item.get('poster')
return self.playlist_result(entries, playlist_id, title, description)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': duration,
'thumbnail': thumbnail,
}

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
)
@ -84,18 +83,19 @@ class UdemyIE(InfoExtractor):
if enroll_url:
webpage = self._download_webpage(
combine_url(base_url, enroll_url),
course_id, 'Enrolling in the course')
course_id, 'Enrolling in the course',
headers={'Referer': base_url})
if '>You have enrolled in' in webpage:
self.to_screen('%s: Successfully enrolled in the course' % course_id)
def _download_lecture(self, course_id, lecture_id):
return self._download_json(
'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % (
course_id, lecture_id, compat_urllib_parse_urlencode({
'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
% (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
})),
lecture_id, 'Downloading lecture JSON')
})
def _handle_error(self, response):
if not isinstance(response, dict):
@ -155,13 +155,13 @@ class UdemyIE(InfoExtractor):
'password': password,
})
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._ORIGIN_URL)
request.add_header('Origin', self._ORIGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
self._LOGIN_URL, None, 'Logging in as %s' % username,
data=urlencode_postdata(login_form),
headers={
'Referer': self._ORIGIN_URL,
'Origin': self._ORIGIN_URL,
})
if not is_logged(response):
error = self._html_search_regex(

View File

@ -3,7 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
@ -12,13 +16,22 @@ from ..utils import (
)
class VevoIE(InfoExtractor):
class VevoBaseIE(InfoExtractor):
def _extract_json(self, webpage, video_id, item):
return self._parse_json(
self._search_regex(
r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
webpage, 'initial store'),
video_id)['default'][item]
class VevoIE(VevoBaseIE):
'''
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE and MySpaceIE)
'''
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
(?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
vevo:)
@ -30,11 +43,15 @@ class VevoIE(InfoExtractor):
'info_dict': {
'id': 'GB1101300280',
'ext': 'mp4',
'title': 'Somebody to Die For',
'title': 'Hurts - Somebody to Die For',
'timestamp': 1372057200,
'upload_date': '20130624',
'uploader': 'Hurts',
'timestamp': 1372057200,
'track': 'Somebody to Die For',
'artist': 'Hurts',
'genre': 'Pop',
},
'expected_warnings': ['Unable to download SMIL file'],
}, {
'note': 'v3 SMIL format',
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@ -42,23 +59,31 @@ class VevoIE(InfoExtractor):
'info_dict': {
'id': 'USUV71302923',
'ext': 'mp4',
'title': 'I Wish I Could Break Your Heart',
'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
'timestamp': 1392796919,
'upload_date': '20140219',
'uploader': 'Cassadee Pope',
'timestamp': 1392796919,
'track': 'I Wish I Could Break Your Heart',
'artist': 'Cassadee Pope',
'genre': 'Country',
},
'expected_warnings': ['Unable to download SMIL file'],
}, {
'note': 'Age-limited video',
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
'info_dict': {
'id': 'USRV81300282',
'ext': 'mp4',
'title': 'Tunnel Vision (Explicit)',
'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'age_limit': 18,
'uploader': 'Justin Timberlake',
'timestamp': 1372888800,
'upload_date': '20130703',
'uploader': 'Justin Timberlake',
'track': 'Tunnel Vision (Explicit)',
'artist': 'Justin Timberlake',
'genre': 'Pop',
},
'expected_warnings': ['Unable to download SMIL file'],
}, {
'note': 'No video_info',
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@ -66,12 +91,36 @@ class VevoIE(InfoExtractor):
'info_dict': {
'id': 'USUV71503000',
'ext': 'mp4',
'title': 'Till I Die',
'upload_date': '20151207',
'title': 'K Camp - Till I Die',
'age_limit': 18,
'uploader': 'K Camp',
'timestamp': 1449468000,
'upload_date': '20151207',
'uploader': 'K Camp',
'track': 'Till I Die',
'artist': 'K Camp',
'genre': 'Rap/Hip-Hop',
},
}, {
'note': 'Only available via webpage',
'url': 'http://www.vevo.com/watch/GBUV71600656',
'md5': '67e79210613865b66a47c33baa5e37fe',
'info_dict': {
'id': 'GBUV71600656',
'ext': 'mp4',
'title': 'ABC - Viva Love',
'age_limit': 0,
'timestamp': 1461830400,
'upload_date': '20160428',
'uploader': 'ABC',
'track': 'Viva Love',
'artist': 'ABC',
'genre': 'Pop',
},
'expected_warnings': ['Failed to download video versions info'],
}, {
# no genres available
'url': 'http://www.vevo.com/watch/INS171400764',
'only_matching': True,
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
_SOURCE_TYPES = {
@ -140,30 +189,31 @@ class VevoIE(InfoExtractor):
errnote='Unable to retrieve oauth token')
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
raise ExtractorError(
'%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
self.raise_geo_restricted(
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
auth_info = self._parse_json(webpage, video_id)
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
def _call_api(self, path, video_id, note, errnote, fatal=True):
return self._download_json(self._api_url_template % path, video_id, note, errnote)
def _call_api(self, path, *args, **kwargs):
return self._download_json(self._api_url_template % path, *args, **kwargs)
def _real_extract(self, url):
video_id = self._match_id(url)
json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
response = self._download_json(
json_url, video_id, 'Downloading video info', 'Unable to download info')
json_url, video_id, 'Downloading video info',
'Unable to download info', fatal=False) or {}
video_info = response.get('video') or {}
video_versions = video_info.get('videoVersions')
artist = None
featured_artist = None
uploader = None
timestamp = None
view_count = None
formats = []
if not video_info:
if response.get('statusCode') != 909:
if response and response.get('statusCode') != 909:
ytid = response.get('errorInfo', {}).get('ytid')
if ytid:
self.report_warning(
@ -183,12 +233,19 @@ class VevoIE(InfoExtractor):
video_versions = self._call_api(
'video/%s/streams' % video_id, video_id,
'Downloading video versions info',
'Failed to download video versions info')
'Failed to download video versions info',
fatal=False)
# Some videos are only available via webpage (e.g.
# https://github.com/rg3/youtube-dl/issues/9366)
if not video_versions:
webpage = self._download_webpage(url, video_id)
video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
timestamp = parse_iso8601(video_info.get('releaseDate'))
artists = video_info.get('artists')
if artists:
uploader = artists[0]['name']
artist = uploader = artists[0]['name']
view_count = int_or_none(video_info.get('views', {}).get('total'))
for video_version in video_versions:
@ -241,7 +298,11 @@ class VevoIE(InfoExtractor):
scale=1000)
artists = video_info.get('mainArtists')
if artists:
uploader = artists[0]['artistName']
artist = uploader = artists[0]['artistName']
featured_artists = video_info.get('featuredArtists')
if featured_artists:
featured_artist = featured_artists[0]['artistName']
smil_parsed = False
for video_version in video_info['videoVersions']:
@ -278,7 +339,15 @@ class VevoIE(InfoExtractor):
smil_parsed = True
self._sort_formats(formats)
title = video_info['title']
track = video_info['title']
if featured_artist:
artist = '%s ft. %s' % (artist, featured_artist)
title = '%s - %s' % (artist, track) if artist else track
genres = video_info.get('genres')
genre = (
genres[0] if genres and isinstance(genres, list) and
isinstance(genres[0], compat_str) else None)
is_explicit = video_info.get('isExplicit')
if is_explicit is True:
@ -300,4 +369,75 @@ class VevoIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'track': track,
'artist': uploader,
'genre': genre,
}
class VevoPlaylistIE(VevoBaseIE):
_VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
'info_dict': {
'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
'title': 'Best-Of: Birdman',
},
'playlist_count': 10,
}, {
'url': 'http://www.vevo.com/watch/genre/rock',
'info_dict': {
'id': 'rock',
'title': 'Rock',
},
'playlist_count': 20,
}, {
'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
'md5': '32dcdfddddf9ec6917fc88ca26d36282',
'info_dict': {
'id': 'USCMV1100073',
'ext': 'mp4',
'title': 'Birdman - Y.U. MAD',
'timestamp': 1323417600,
'upload_date': '20111209',
'uploader': 'Birdman',
'track': 'Y.U. MAD',
'artist': 'Birdman',
'genre': 'Rap/Hip-Hop',
},
'expected_warnings': ['Unable to download SMIL file'],
}, {
'url': 'http://www.vevo.com/watch/genre/rock?index=0',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
playlist_kind = mobj.group('kind')
webpage = self._download_webpage(url, playlist_id)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
index = qs.get('index', [None])[0]
if index:
video_id = self._search_regex(
r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
webpage, 'video id', default=None, group='id')
if video_id:
return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
playlist = (list(playlists.values())[0]
if playlist_kind == 'playlist' else playlists[playlist_id])
entries = [
self.url_result('vevo:%s' % src, VevoIE.ie_key())
for src in playlist['isrcs']]
return self.playlist_result(
entries, playlist.get('playlistId') or playlist_id,
playlist.get('name'), playlist.get('description'))

View File

@ -26,12 +26,16 @@ class VKIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|
(?:
(?:m\.)?vk\.com/video_|
(?:www\.)?daxab.com/
)
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
(?:
(?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
(?:www\.)?biqle\.ru/watch/
(?:www\.)?daxab.com/embed/
)
(?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
)
'''
_NETRC_MACHINE = 'vk'
@ -75,7 +79,8 @@ class VKIE(InfoExtractor):
'duration': 101,
'upload_date': '20120730',
'view_count': int,
}
},
'skip': 'This video has been removed from public access.',
},
{
# VIDEO NOW REMOVED
@ -142,7 +147,7 @@ class VKIE(InfoExtractor):
'id': 'V3K4mi0SYkc',
'ext': 'webm',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'description': 'md5:d9903938abdc74c738af77f527ca0596',
'duration': 178,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation",
@ -173,11 +178,6 @@ class VKIE(InfoExtractor):
'url': 'https://vk.com/video205387401_164765225',
'only_matching': True,
},
{
# vk wrapper
'url': 'http://www.biqle.ru/watch/847655_160197695',
'only_matching': True,
},
{
# pladform embed
'url': 'https://vk.com/video-76116461_171554880',
@ -217,20 +217,22 @@ class VKIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
info_url = url
if video_id:
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
# Some videos (removed?) can only be downloaded with list id specified
list_id = mobj.group('list_id')
if list_id:
info_url += '&list=%s' % list_id
else:
info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query')
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
info_page = self._download_webpage(info_url, video_id)
error_message = self._html_search_regex(
r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
info_page, 'error message', default=None)
if error_message:
raise ExtractorError(error_message, expected=True)
@ -305,17 +307,17 @@ class VKIE(InfoExtractor):
view_count = None
views = self._html_search_regex(
r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
info_page, 'view count', fatal=False)
info_page, 'view count', default=None)
if views:
view_count = str_to_int(self._search_regex(
r'([\d,.]+)', views, 'view count', fatal=False))
formats = []
for k, v in data.items():
if not k.startswith('url') and k != 'extra_data' or not v:
if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
continue
height = int_or_none(self._search_regex(
r'^url(\d+)', k, 'height', default=None))
r'^(?:url|cache)(\d+)', k, 'height', default=None))
formats.append({
'format_id': k,
'url': v,

View File

@ -13,12 +13,21 @@ from ..utils import (
class XFileShareIE(InfoExtractor):
IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
_SITES = (
('daclips.in', 'DaClips'),
('filehoot.com', 'FileHoot'),
('gorillavid.in', 'GorillaVid'),
('movpod.in', 'MovPod'),
('powerwatch.pw', 'PowerWatch'),
('rapidvideo.ws', 'Rapidvideo.ws'),
('thevideobee.to', 'TheVideoBee'),
('vidto.me', 'Vidto'),
('streamin.to', 'Streamin.To'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
_VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))
_FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
@ -43,25 +52,6 @@ class XFileShareIE(InfoExtractor):
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': 're:http://.*\.jpg',
}
}, {
# video with countdown timeout
'url': 'http://fastvideo.in/1qmdn1lmsmbw',
'md5': '8b87ec3f6564a3108a0e8e66594842ba',
'info_dict': {
'id': '1qmdn1lmsmbw',
'ext': 'mp4',
'title': 'Man of Steel - Trailer',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://realvid.net/ctn2y6p2eviw',
'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
'info_dict': {
'id': 'ctn2y6p2eviw',
'ext': 'flv',
'title': 'rdx 1955',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,

View File

@ -9,6 +9,11 @@ from ..utils import int_or_none
class XiamiBaseIE(InfoExtractor):
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
def _download_webpage(self, *args, **kwargs):
webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
if '>Xiami is currently not available in your country.<' in webpage:
self.raise_geo_restricted('Xiami is currently not available in your country')
def _extract_track(self, track, track_id=None):
title = track['title']
track_url = self._decrypt(track['location'])
@ -81,7 +86,8 @@ class XiamiSongIE(XiamiBaseIE):
'ext': 'lrc',
}],
},
}
},
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/song/1775256504',
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
@ -100,7 +106,8 @@ class XiamiSongIE(XiamiBaseIE):
'ext': 'lrc',
}],
},
}
},
'skip': 'Georestricted',
}]
def _real_extract(self, url):
@ -124,6 +131,7 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE):
'id': '2100300444',
},
'playlist_count': 10,
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
'only_matching': True,
@ -141,6 +149,7 @@ class XiamiArtistIE(XiamiPlaylistBaseIE):
'id': '2132',
},
'playlist_count': 20,
'skip': 'Georestricted',
}
@ -155,4 +164,5 @@ class XiamiCollectionIE(XiamiPlaylistBaseIE):
'id': '156527391',
},
'playlist_mincount': 29,
'skip': 'Georestricted',
}

View File

@ -10,8 +10,6 @@ from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
sanitized_Request,
urlencode_postdata,
)
@ -177,7 +175,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
IE_NAME = 'yandexmusic:playlist'
IE_DESC = 'Яндекс.Музыка - Плейлист'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
_TESTS = [{
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
@ -196,47 +194,64 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'id': '1036',
'title': 'Музыка 90-х',
},
'playlist_count': 310,
'playlist_mincount': 300,
'skip': 'Travis CI servers blocked by YandexMusic',
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
user = mobj.group('user')
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
playlist = self._download_json(
'https://music.yandex.%s/handlers/playlist.jsx' % tld,
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
'X-Retpath-Y': url,
},
query={
'owner': user,
'kinds': playlist_id,
'light': 'true',
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
})['playlist']
mu = self._parse_json(
self._search_regex(
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
playlist_id)
tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds'])
playlist = mu['pageData']['playlist']
tracks, track_ids = playlist['tracks'], playlist['trackIds']
# tracks dictionary shipped with webpage is limited to 150 tracks,
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
request = sanitized_Request(
'https://music.yandex.ru/handlers/track-entries.jsx',
urlencode_postdata({
'entries': ','.join(missing_track_ids),
'lang': mu.get('settings', {}).get('lang', 'en'),
'external-domain': 'music.yandex.ru',
'overembed': 'false',
'sign': mu.get('authData', {}).get('user', {}).get('sign'),
'strict': 'true',
}))
request.add_header('Referer', url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
present_track_ids = set([
compat_str(track['id'])
for track in tracks if track.get('id')])
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids]
missing_tracks = self._download_json(
request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
},
query={
'entries': ','.join(missing_track_ids),
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
'strict': 'true',
})
if missing_tracks:
tracks.extend(missing_tracks)
return self.playlist_result(
self._build_playlist(tracks),
compat_str(playlist_id),
playlist['title'], playlist.get('description'))
playlist.get('title'), playlist.get('description'))

View File

@ -1326,9 +1326,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if video_description:
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
(?:[a-zA-Z-]+="[^"]*"\s+)*?
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
[^<]+\.{3}\s*
</a>

View File

@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
class FFmpegMetadataPP(FFmpegPostProcessor):
def run(self, info):
metadata = {}
if info.get('title') is not None:
metadata['title'] = info['title']
if info.get('upload_date') is not None:
metadata['date'] = info['upload_date']
if info.get('artist') is not None:
metadata['artist'] = info['artist']
elif info.get('uploader') is not None:
metadata['artist'] = info['uploader']
elif info.get('uploader_id') is not None:
metadata['artist'] = info['uploader_id']
if info.get('description') is not None:
metadata['description'] = info['description']
metadata['comment'] = info['description']
if info.get('webpage_url') is not None:
metadata['purl'] = info['webpage_url']
if info.get('album') is not None:
metadata['album'] = info['album']
def add(meta_list, info_list=None):
if not info_list:
info_list = meta_list
if not isinstance(meta_list, (list, tuple)):
meta_list = (meta_list,)
if not isinstance(info_list, (list, tuple)):
info_list = (info_list,)
for info_f in info_list:
if info.get(info_f) is not None:
for meta_f in meta_list:
metadata[meta_f] = info[info_f]
break
add('title', ('track', 'title'))
add('date', 'upload_date')
add(('description', 'comment'), 'description')
add('purl', 'webpage_url')
add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
add('genre')
add('album')
add('album_artist')
add('disc', 'disc_number')
if not metadata:
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')

View File

@ -14,8 +14,8 @@ import email.utils
import errno
import functools
import gzip
import itertools
import io
import itertools
import json
import locale
import math
@ -24,8 +24,8 @@ import os
import pipes
import platform
import re
import ssl
import socket
import ssl
import struct
import subprocess
import sys
@ -89,6 +89,11 @@ KNOWN_EXTENSIONS = (
'wav',
'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
def preferredencoding():
"""Get preferred encoding.
@ -251,9 +256,9 @@ def get_element_by_attribute(attribute, value, html):
m = re.search(r'''(?xs)
<([a-zA-Z0-9:._-]+)
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
\s+%s=['"]?%s['"]?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
\s*>
(?P<content>.*?)
</\1>
@ -365,6 +370,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
"""
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
return ACCENT_CHARS[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.04.24'
__version__ = '2016.05.01'