1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 03:27:15 +08:00

Merge pull request #12 from rg3/master

update 30 may
This commit is contained in:
siddht1 2016-05-30 13:08:31 +05:30
commit 846fc61450
23 changed files with 520 additions and 191 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.05.21.2 [debug] youtube-dl version 2016.05.30.2
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

6
.gitignore vendored
View File

@ -28,12 +28,16 @@ updates_key.pem
*.mp4 *.mp4
*.m4a *.m4a
*.m4v *.m4v
*.mp3
*.part *.part
*.swp *.swp
test/testdata test/testdata
test/local_parameters.json test/local_parameters.json
.tox .tox
youtube-dl.zsh youtube-dl.zsh
# IntelliJ related files
.idea .idea
.idea/* *.iml
tmp/ tmp/

View File

@ -14,7 +14,6 @@ script: nosetests test --verbose
notifications: notifications:
email: email:
- filippo.valsorda@gmail.com - filippo.valsorda@gmail.com
- phihag@phihag.de
- yasoob.khld@gmail.com - yasoob.khld@gmail.com
# irc: # irc:
# channels: # channels:

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete find . -name "*.class" -delete
@ -69,7 +69,7 @@ README.txt: README.md
pandoc -f markdown -t plain README.md -o README.txt pandoc -f markdown -t plain README.md -o README.txt
youtube-dl.1: README.md youtube-dl.1: README.md
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
rm -f youtube-dl.1.temp.md rm -f youtube-dl.1.temp.md

View File

@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like.
repairs broken URLs, but emits an error if repairs broken URLs, but emits an error if
this is not possible instead of searching. this is not possible instead of searching.
--ignore-config Do not read configuration files. When given --ignore-config Do not read configuration files. When given
in the global configuration file /etc in the global configuration file
/youtube-dl.conf: Do not read the user /etc/youtube-dl.conf: Do not read the user
configuration in ~/.config/youtube- configuration in ~/.config/youtube-
dl/config (%APPDATA%/youtube-dl/config.txt dl/config (%APPDATA%/youtube-dl/config.txt
on Windows) on Windows)
@ -256,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like.
jar in jar in
--cache-dir DIR Location in the filesystem where youtube-dl --cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information can store some downloaded information
permanently. By default $XDG_CACHE_HOME permanently. By default
/youtube-dl or ~/.cache/youtube-dl . At the $XDG_CACHE_HOME/youtube-dl or
moment, only YouTube player files (for ~/.cache/youtube-dl . At the moment, only
videos with obfuscated signatures) are YouTube player files (for videos with
cached, but that may change. obfuscated signatures) are cached, but that
may change.
--no-cache-dir Disable filesystem caching --no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files --rm-cache-dir Delete all filesystem cache files
@ -784,9 +785,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws a *Runtime error from Visual C++* ### The exe throws an error due to missing `MSVCR100.dll`
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?

View File

@ -1,17 +1,42 @@
#!/usr/bin/python3 #!/usr/bin/python3
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
import argparse import argparse
import ctypes import ctypes
import functools import functools
import shutil
import subprocess
import sys import sys
import tempfile
import threading import threading
import traceback import traceback
import os.path import os.path
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
from youtube_dl.compat import (
compat_http_server,
compat_str,
compat_urlparse,
)
class BuildHTTPServer(ThreadingMixIn, HTTPServer): # These are not used outside of buildserver.py thus not in compat.py
try:
import winreg as compat_winreg
except ImportError: # Python 2
import _winreg as compat_winreg
try:
import socketserver as compat_socketserver
except ImportError: # Python 2
import SocketServer as compat_socketserver
try:
compat_input = raw_input
except NameError: # Python 3
compat_input = input
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
allow_reuse_address = True allow_reuse_address = True
@ -191,7 +216,7 @@ def main(args=None):
action='store_const', dest='action', const='service', action='store_const', dest='action', const='service',
help='Run as a Windows service') help='Run as a Windows service')
parser.add_argument('-b', '--bind', metavar='<host:port>', parser.add_argument('-b', '--bind', metavar='<host:port>',
action='store', default='localhost:8142', action='store', default='0.0.0.0:8142',
help='Bind to host:port (default %default)') help='Bind to host:port (default %default)')
options = parser.parse_args(args=args) options = parser.parse_args(args=args)
@ -216,7 +241,7 @@ def main(args=None):
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
thr = threading.Thread(target=srv.serve_forever) thr = threading.Thread(target=srv.serve_forever)
thr.start() thr.start()
input('Press ENTER to shut down') compat_input('Press ENTER to shut down')
srv.shutdown() srv.shutdown()
thr.join() thr.join()
@ -231,8 +256,6 @@ def rmtree(path):
os.remove(fname) os.remove(fname)
os.rmdir(path) os.rmdir(path)
#==============================================================================
class BuildError(Exception): class BuildError(Exception):
def __init__(self, output, code=500): def __init__(self, output, code=500):
@ -249,15 +272,25 @@ class HTTPError(BuildError):
class PythonBuilder(object): class PythonBuilder(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
pythonVersion = kwargs.pop('python', '2.7') python_version = kwargs.pop('python', '3.4')
try: python_path = None
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) for node in ('Wow6432Node\\', ''):
try: try:
self.pythonPath, _ = _winreg.QueryValueEx(key, '') key = compat_winreg.OpenKey(
finally: compat_winreg.HKEY_LOCAL_MACHINE,
_winreg.CloseKey(key) r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
except Exception: try:
raise BuildError('No such Python version: %s' % pythonVersion) python_path, _ = compat_winreg.QueryValueEx(key, '')
finally:
compat_winreg.CloseKey(key)
break
except Exception:
pass
if not python_path:
raise BuildError('No such Python version: %s' % python_version)
self.pythonPath = python_path
super(PythonBuilder, self).__init__(**kwargs) super(PythonBuilder, self).__init__(**kwargs)
@ -305,8 +338,10 @@ class YoutubeDLBuilder(object):
def build(self): def build(self):
try: try:
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
cwd=self.buildPath) proc.wait()
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
# cwd=self.buildPath)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise BuildError(e.output) raise BuildError(e.output)
@ -369,12 +404,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
pass pass
class BuildHTTPRequestHandler(BaseHTTPRequestHandler): class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
def do_GET(self): def do_GET(self):
path = urlparse.urlparse(self.path) path = compat_urlparse.urlparse(self.path)
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
action, _, path = path.path.strip('/').partition('/') action, _, path = path.path.strip('/').partition('/')
if path: if path:
path = path.split('/') path = path.split('/')
@ -388,7 +423,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
builder.close() builder.close()
except BuildError as e: except BuildError as e:
self.send_response(e.code) self.send_response(e.code)
msg = unicode(e).encode('UTF-8') msg = compat_str(e).encode('UTF-8')
self.send_header('Content-Type', 'text/plain; charset=UTF-8') self.send_header('Content-Type', 'text/plain; charset=UTF-8')
self.send_header('Content-Length', len(msg)) self.send_header('Content-Length', len(msg))
self.end_headers() self.end_headers()
@ -400,7 +435,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
else: else:
self.send_response(500, 'Malformed URL') self.send_response(500, 'Malformed URL')
#==============================================================================
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,13 +1,46 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import io import io
import optparse
import os.path import os.path
import sys
import re import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md') README_FILE = os.path.join(ROOT_DIR, 'README.md')
PREFIX = '''%YOUTUBE-DL(1)
# NAME
youtube\-dl \- download videos from youtube.com or other video platforms
# SYNOPSIS
**youtube-dl** \[OPTIONS\] URL [URL...]
'''
def main():
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
options, args = parser.parse_args()
if len(args) != 1:
parser.error('Expected an output filename')
outfile, = args
with io.open(README_FILE, encoding='utf-8') as f:
readme = f.read()
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme
readme = filter_options(readme)
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(readme)
def filter_options(readme): def filter_options(readme):
ret = '' ret = ''
@ -37,27 +70,5 @@ def filter_options(readme):
return ret return ret
with io.open(README_FILE, encoding='utf-8') as f: if __name__ == '__main__':
readme = f.read() main()
PREFIX = '''%YOUTUBE-DL(1)
# NAME
youtube\-dl \- download videos from youtube.com or other video platforms
# SYNOPSIS
**youtube-dl** \[OPTIONS\] URL [URL...]
'''
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme
readme = filter_options(readme)
if sys.version_info < (3, 0):
print(readme.encode('utf-8'))
else:
print(readme)

View File

@ -6,7 +6,7 @@
# * the git config user.signingkey is properly set # * the git config user.signingkey is properly set
# You will need # You will need
# pip install coverage nose rsa # pip install coverage nose rsa wheel
# TODO # TODO
# release notes # release notes
@ -15,10 +15,28 @@
set -e set -e
skip_tests=true skip_tests=true
if [ "$1" = '--run-tests' ]; then buildserver='localhost:8142'
skip_tests=false
shift while true
fi do
case "$1" in
--run-tests)
skip_tests=false
shift
;;
--buildserver)
buildserver="$2"
shift 2
;;
--*)
echo "ERROR: unknown option $1"
exit 1
;;
*)
break
;;
esac
done
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
version="$1" version="$1"
@ -35,6 +53,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
/bin/echo -e "\n### First of all, testing..." /bin/echo -e "\n### First of all, testing..."
make clean make clean
@ -66,7 +85,7 @@ git push origin "$version"
REV=$(git rev-parse HEAD) REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz make youtube-dl youtube-dl.tar.gz
read -p "VM running? (y/n) " -n 1 read -p "VM running? (y/n) " -n 1
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version" mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"

View File

@ -43,8 +43,8 @@
- **appletrailers:section** - **appletrailers:section**
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
- **ARD** - **ARD**
- **ARD:mediathek**: Saarländischer Rundfunk
- **ARD:mediathek** - **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv** - **arte.tv**
- **arte.tv:+7** - **arte.tv:+7**
- **arte.tv:cinema** - **arte.tv:cinema**
@ -136,6 +136,7 @@
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report - **ComedyCentralShows**: The Daily Show / The Colbert Report
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **Coub**
- **Cracked** - **Cracked**
- **Crackle** - **Crackle**
- **Criterion** - **Criterion**
@ -205,6 +206,7 @@
- **exfm**: ex.fm - **exfm**: ex.fm
- **ExpoTV** - **ExpoTV**
- **ExtremeTube** - **ExtremeTube**
- **EyedoTV**
- **facebook** - **facebook**
- **faz.net** - **faz.net**
- **fc2** - **fc2**
@ -326,8 +328,8 @@
- **LePlaylist** - **LePlaylist**
- **LetvCloud**: 乐视云 - **LetvCloud**: 乐视云
- **Libsyn** - **Libsyn**
- **life**: Life.ru
- **life:embed** - **life:embed**
- **lifenews**: LIFE | NEWS
- **limelight** - **limelight**
- **limelight:channel** - **limelight:channel**
- **limelight:channel_list** - **limelight:channel_list**
@ -512,6 +514,8 @@
- **R7** - **R7**
- **radio.de** - **radio.de**
- **radiobremen** - **radiobremen**
- **radiocanada**
- **RadioCanadaAudioVideo**
- **radiofrance** - **radiofrance**
- **RadioJavan** - **RadioJavan**
- **Rai** - **Rai**
@ -521,6 +525,7 @@
- **RedTube** - **RedTube**
- **RegioTV** - **RegioTV**
- **Restudy** - **Restudy**
- **Reuters**
- **ReverbNation** - **ReverbNation**
- **Revision3** - **Revision3**
- **RICE** - **RICE**
@ -682,8 +687,8 @@
- **TVCArticle** - **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru - **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com** - **tvland.com**
- **tvp.pl** - **tvp**: Telewizja Polska
- **tvp.pl:Series** - **tvp:series**
- **TVPlay**: TV3Play and related services - **TVPlay**: TV3Play and related services
- **Tweakers** - **Tweakers**
- **twitch:chapter** - **twitch:chapter**
@ -766,7 +771,8 @@
- **VuClip** - **VuClip**
- **vulture.com** - **vulture.com**
- **Walla** - **Walla**
- **WashingtonPost** - **washingtonpost**
- **washingtonpost:article**
- **wat.tv** - **wat.tv**
- **WatchIndianPorn**: Watch Indian Porn - **WatchIndianPorn**: Watch Indian Porn
- **WDR** - **WDR**

View File

@ -319,7 +319,7 @@ class F4mFD(FragmentFD):
doc = compat_etree_fromstring(manifest) doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)] for f in self._get_unencrypted_media(doc)]
if requested_bitrate is None: if requested_bitrate is None or len(formats) == 1:
# get the best format # get the best format
formats = sorted(formats, key=lambda f: f[0]) formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1] rate, media = formats[-1]

View File

@ -1,34 +1,42 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import calendar
import datetime
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_etree_fromstring,
compat_str,
compat_parse_qs,
compat_xml_parse_error,
)
from ..utils import ( from ..utils import (
int_or_none,
unescapeHTML,
ExtractorError, ExtractorError,
int_or_none,
float_or_none,
xpath_text, xpath_text,
) )
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86', 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': { 'info_dict': {
'id': '1554319', 'id': '1554319',
'ext': 'flv', 'ext': 'flv',
'title': '【金坷垃】金泡沫', 'title': '【金坷垃】金泡沫',
'duration': 308313, 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
'duration': 308.067,
'timestamp': 1398012660,
'upload_date': '20140420', 'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg', 'thumbnail': 're:^https?://.+\.jpg',
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
'timestamp': 1397983878,
'uploader': '菊子桑', 'uploader': '菊子桑',
'uploader_id': '156160',
}, },
}, { }, {
'url': 'http://www.bilibili.com/video/av1041170/', 'url': 'http://www.bilibili.com/video/av1041170/',
@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor):
'id': '1041170', 'id': '1041170',
'title': '【BD1080P】刀语【诸神&异域】', 'title': '【BD1080P】刀语【诸神&异域】',
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
'uploader': '枫叶逝去',
'timestamp': 1396501299,
}, },
'playlist_count': 9, 'playlist_count': 9,
}] }]
# BiliBili blocks keys from time to time. The current key is extracted from
# the Android client
# TODO: find the sign algorithm used in the flash player
_APP_KEY = '86385cdc024c0f6c'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
page_num = mobj.group('page_num') or '1'
view_data = self._download_json( webpage = self._download_webpage(url, video_id)
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
video_id)
if 'error' in view_data:
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
cid = view_data['cid'] params = compat_parse_qs(self._search_regex(
title = unescapeHTML(view_data['title']) [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))
cid = params['cid'][0]
doc = self._download_xml( info_xml_str = self._download_webpage(
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, 'http://interface.bilibili.com/v_cdn_play',
cid, cid, query={'appkey': self._APP_KEY, 'cid': cid},
'Downloading page %s/%s' % (page_num, view_data['pages']) note='Downloading video info page')
)
if xpath_text(doc, './result') == 'error': err_msg = None
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) durls = None
info_xml = None
try:
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
except compat_xml_parse_error:
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
err_msg = (info_json or {}).get('error_text')
else:
err_msg = xpath_text(info_xml, './message')
if info_xml is not None:
durls = info_xml.findall('./durl')
if not durls:
if err_msg:
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
else:
raise ExtractorError('No videos found!')
entries = [] entries = []
for durl in doc.findall('./durl'): for durl in durls:
size = xpath_text(durl, ['./filesize', './size']) size = xpath_text(durl, ['./filesize', './size'])
formats = [{ formats = [{
'url': durl.find('./url').text, 'url': durl.find('./url').text,
'filesize': int_or_none(size), 'filesize': int_or_none(size),
'ext': 'flv',
}] }]
backup_urls = durl.find('./backup_url') for backup_url in durl.findall('./backup_url/url'):
if backup_urls is not None: formats.append({
for backup_url in backup_urls.findall('./url'): 'url': backup_url.text,
formats.append({'url': backup_url.text}) # backup URLs have lower priorities
formats.reverse() 'preference': -2 if 'hd.mp4' in backup_url.text else -3,
})
self._sort_formats(formats)
entries.append({ entries.append({
'id': '%s_part%s' % (cid, xpath_text(durl, './order')), 'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
'title': title,
'duration': int_or_none(xpath_text(durl, './length'), 1000), 'duration': int_or_none(xpath_text(durl, './length'), 1000),
'formats': formats, 'formats': formats,
}) })
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
description = self._html_search_meta('description', webpage)
datetime_str = self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
if datetime_str:
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
# TODO 'view_count' requires deobfuscating Javascript
info = { info = {
'id': compat_str(cid), 'id': compat_str(cid),
'title': title, 'title': title,
'description': view_data.get('description'), 'description': description,
'thumbnail': view_data.get('pic'), 'timestamp': timestamp,
'uploader': view_data.get('author'), 'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
'timestamp': int_or_none(view_data.get('created')), 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
'view_count': int_or_none(view_data.get('play')),
'duration': int_or_none(xpath_text(doc, './timelength')),
} }
uploader_mobj = re.search(
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
webpage)
if uploader_mobj:
info.update({
'uploader': uploader_mobj.group('name'),
'uploader_id': uploader_mobj.group('id'),
})
for entry in entries:
entry.update(info)
if len(entries) == 1: if len(entries) == 1:
entries[0].update(info)
return entries[0] return entries[0]
else: else:
info.update({ return {
'_type': 'multi_video', '_type': 'multi_video',
'id': video_id, 'id': video_id,
'title': title,
'description': description,
'entries': entries, 'entries': entries,
}) }
return info

View File

@ -987,7 +987,7 @@ class InfoExtractor(object):
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(), transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True): fatal=True, m3u8_id=None):
manifest = self._download_xml( manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest', manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest', 'Unable to download f4m manifest',
@ -1001,11 +1001,11 @@ class InfoExtractor(object):
return self._parse_f4m_formats( return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal) transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(), transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True): fatal=True, m3u8_id=None):
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text: if akamai_pv is not None and ';' in akamai_pv.text:
@ -1029,9 +1029,26 @@ class InfoExtractor(object):
'base URL', default=None) 'base URL', default=None)
if base_url: if base_url:
base_url = base_url.strip() base_url = base_url.strip()
bootstrap_info = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
'bootstrap info', default=None)
for i, media_el in enumerate(media_nodes): for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0': tbr = int_or_none(media_el.attrib.get('bitrate'))
media_url = media_el.attrib.get('href') or media_el.attrib.get('url') width = int_or_none(media_el.attrib.get('width'))
height = int_or_none(media_el.attrib.get('height'))
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
# If <bootstrapInfo> is present, the specified f4m is a
# stream-level manifest, and only set-level manifests may refer to
# external resources. See section 11.4 and section 4 of F4M spec
if bootstrap_info is None:
media_url = None
# @href is introduced in 2.0, see section 11.6 of F4M spec
if manifest_version == '2.0':
media_url = media_el.attrib.get('href')
if media_url is None:
media_url = media_el.attrib.get('url')
if not media_url: if not media_url:
continue continue
manifest_url = ( manifest_url = (
@ -1041,19 +1058,37 @@ class InfoExtractor(object):
# since bitrates in parent manifest (this one) and media_url manifest # since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested # may differ leading to inability to resolve the format by requested
# bitrate in f4m downloader # bitrate in f4m downloader
if determine_ext(manifest_url) == 'f4m': ext = determine_ext(manifest_url)
formats.extend(self._extract_f4m_formats( if ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
manifest_url, video_id, preference=preference, f4m_id=f4m_id, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal)) transform_source=transform_source, fatal=fatal)
# Sometimes stream-level manifest contains single media entry that
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
# At the same time parent's media entry in set-level manifest may
# contain it. We will copy it from parent in such cases.
if len(f4m_formats) == 1:
f = f4m_formats[0]
f.update({
'tbr': f.get('tbr') or tbr,
'width': f.get('width') or width,
'height': f.get('height') or height,
'format_id': f.get('format_id') if not tbr else format_id,
})
formats.extend(f4m_formats)
continue
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', preference=preference,
m3u8_id=m3u8_id, fatal=fatal))
continue continue
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({ formats.append({
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'format_id': format_id,
'url': manifest_url, 'url': manifest_url,
'ext': 'flv', 'ext': 'flv' if bootstrap_info else None,
'tbr': tbr, 'tbr': tbr,
'width': int_or_none(media_el.attrib.get('width')), 'width': width,
'height': int_or_none(media_el.attrib.get('height')), 'height': height,
'preference': preference, 'preference': preference,
}) })
return formats return formats

View File

@ -0,0 +1,143 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
qualities,
)
class CoubIE(InfoExtractor):
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
_TESTS = [{
'url': 'http://coub.com/view/5u5n1',
'info_dict': {
'id': '5u5n1',
'ext': 'mp4',
'title': 'The Matrix Moonwalk',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 4.6,
'timestamp': 1428527772,
'upload_date': '20150408',
'uploader': 'Артём Лоскутников',
'uploader_id': 'artyom.loskutnikov',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
'age_limit': 0,
},
}, {
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
'only_matching': True,
}, {
'url': 'coub:5u5n1',
'only_matching': True,
}, {
# longer video id
'url': 'http://coub.com/view/237d5l5h',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
coub = self._download_json(
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
if coub.get('error'):
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
title = coub['title']
file_versions = coub['file_versions']
QUALITIES = ('low', 'med', 'high')
MOBILE = 'mobile'
IPHONE = 'iphone'
HTML5 = 'html5'
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
quality_key = qualities(QUALITIES)
preference_key = qualities(SOURCE_PREFERENCE)
formats = []
for kind, items in file_versions.get(HTML5, {}).items():
if kind not in ('video', 'audio'):
continue
if not isinstance(items, dict):
continue
for quality, item in items.items():
if not isinstance(item, dict):
continue
item_url = item.get('url')
if not item_url:
continue
formats.append({
'url': item_url,
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
'filesize': int_or_none(item.get('size')),
'vcodec': 'none' if kind == 'audio' else None,
'quality': quality_key(quality),
'preference': preference_key(HTML5),
})
iphone_url = file_versions.get(IPHONE, {}).get('url')
if iphone_url:
formats.append({
'url': iphone_url,
'format_id': IPHONE,
'preference': preference_key(IPHONE),
})
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
if mobile_url:
formats.append({
'url': mobile_url,
'format_id': '%s-audio' % MOBILE,
'preference': preference_key(MOBILE),
})
self._sort_formats(formats)
thumbnail = coub.get('picture')
duration = float_or_none(coub.get('duration'))
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
uploader = coub.get('channel', {}).get('title')
uploader_id = coub.get('channel', {}).get('permalink')
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
like_count = int_or_none(coub.get('likes_count'))
repost_count = int_or_none(coub.get('recoubs_count'))
comment_count = int_or_none(coub.get('comments_count'))
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
if age_restricted is not None:
age_limit = 18 if age_restricted is True else 0
else:
age_limit = None
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
'like_count': like_count,
'repost_count': repost_count,
'comment_count': comment_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -2,13 +2,16 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
unified_strdate,
)
from ..compat import compat_urlparse from ..compat import compat_urlparse
class DWIE(InfoExtractor): class DWIE(InfoExtractor):
IE_NAME = 'dw' IE_NAME = 'dw'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# video # video
'url': 'http://www.dw.com/en/intelligent-light/av-19112290', 'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
@ -31,6 +34,16 @@ class DWIE(InfoExtractor):
'description': 'md5:bc9ca6e4e063361e21c920c53af12405', 'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
'upload_date': '20160311', 'upload_date': '20160311',
} }
}, {
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
'info_dict': {
'id': '19274438',
'ext': 'mp4',
'title': 'Welcome to the 90s Hip Hop',
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
'upload_date': '20160521',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -38,6 +51,7 @@ class DWIE(InfoExtractor):
webpage = self._download_webpage(url, media_id) webpage = self._download_webpage(url, media_id)
hidden_inputs = self._hidden_inputs(webpage) hidden_inputs = self._hidden_inputs(webpage)
title = hidden_inputs['media_title'] title = hidden_inputs['media_title']
media_id = hidden_inputs.get('media_id') or media_id
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
formats = self._extract_smil_formats( formats = self._extract_smil_formats(
@ -49,13 +63,20 @@ class DWIE(InfoExtractor):
else: else:
formats = [{'url': hidden_inputs['file_name']}] formats = [{'url': hidden_inputs['file_name']}]
upload_date = hidden_inputs.get('display_date')
if not upload_date:
upload_date = self._html_search_regex(
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
'upload date', default=None)
upload_date = unified_strdate(upload_date)
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': hidden_inputs.get('preview_image'), 'thumbnail': hidden_inputs.get('preview_image'),
'duration': int_or_none(hidden_inputs.get('file_duration')), 'duration': int_or_none(hidden_inputs.get('file_duration')),
'upload_date': hidden_inputs.get('display_date'), 'upload_date': upload_date,
'formats': formats, 'formats': formats,
} }

View File

@ -143,6 +143,7 @@ from .cnn import (
CNNBlogsIE, CNNBlogsIE,
CNNArticleIE, CNNArticleIE,
) )
from .coub import CoubIE
from .collegerama import CollegeRamaIE from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
@ -833,7 +834,10 @@ from .tvc import (
) )
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvland import TVLandIE from .tvland import TVLandIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import (
TVPIE,
TVPSeriesIE,
)
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from .tweakers import TweakersIE from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE from .twentyfourvideo import TwentyFourVideoIE

View File

@ -4,9 +4,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
xpath_text, dict_get,
float_or_none, float_or_none,
int_or_none,
) )
@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
'duration': 145.94, 'duration': 145.94,
}, },
}, { }, {
# m3u8 in f4m
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
'info_dict': {
'id': '4840492',
'ext': 'mp4',
'title': 'ITV EL SHOW FULL',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# Multiple resolutions while bitrates missing
'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
'only_matching': True, 'only_matching': True,
}, { }, {
@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
thumbnail = content.get('poster') thumbnail = content.get('poster')
src = content['media']['f4m'] src = content['media']['f4m']
f4m = self._download_xml(src, video_id) formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True) for a_format in formats:
formats = [] if not dict_get(a_format, ['tbr', 'width', 'height']):
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
media_url = media.get('url')
if not media_url:
continue
tbr = int_or_none(media.get('bitrate'))
width = int_or_none(media.get('width'))
height = int_or_none(media.get('height'))
f = {
'url': '%s/%s' % (base_url, media.attrib['url']),
'tbr': tbr,
'width': width,
'height': height,
}
if not (tbr or width or height):
f['quality'] = 1 if '-hd.' in media_url else 0
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*- # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -6,20 +6,13 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
class TvpIE(InfoExtractor): class TVPIE(InfoExtractor):
IE_NAME = 'tvp.pl' IE_NAME = 'tvp'
_VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$' IE_DESC = 'Telewizja Polska'
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035', 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
'info_dict': {
'id': '4278035',
'ext': 'wmv',
'title': 'Ogniem i mieczem, odc. 2',
},
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
'info_dict': { 'info_dict': {
'id': '194536', 'id': '194536',
@ -36,12 +29,22 @@ class TvpIE(InfoExtractor):
}, },
}, { }, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
'md5': 'c3b15ed1af288131115ff17a17c19dda', 'only_matching': True,
'info_dict': { }, {
'id': '17834272', 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
'ext': 'mp4', 'only_matching': True,
'title': 'Na sygnale, odc. 39', }, {
}, 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
'only_matching': True,
}, {
'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
'only_matching': True,
}, {
'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
'only_matching': True,
}, {
'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -92,8 +95,8 @@ class TvpIE(InfoExtractor):
} }
class TvpSeriesIE(InfoExtractor): class TVPSeriesIE(InfoExtractor):
IE_NAME = 'tvp.pl:Series' IE_NAME = 'tvp:series'
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
_TESTS = [{ _TESTS = [{
@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor):
videos_paths = re.findall( videos_paths = re.findall(
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
entries = [ entries = [
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key()) self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
for v_path in videos_paths] for v_path in videos_paths]
return { return {

View File

@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor):
self._LOGIN_URL, None, 'Downloading login popup') self._LOGIN_URL, None, 'Downloading login popup')
def is_logged(webpage): def is_logged(webpage):
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) return any(re.search(p, webpage) for p in (
r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
r'>Logout<'))
# already logged in # already logged in
if is_logged(login_popup): if is_logged(login_popup):

View File

@ -217,7 +217,6 @@ class VKIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
info_url = url
if video_id: if video_id:
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
# Some videos (removed?) can only be downloaded with list id specified # Some videos (removed?) can only be downloaded with list id specified

View File

@ -20,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor):
error = response.get('error') error = response.get('error')
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
if response.get('type') == 'captcha' or 'captcha' in response:
YandexMusicBaseIE._raise_captcha()
@staticmethod
def _raise_captcha():
raise ExtractorError(
'YandexMusic has considered youtube-dl requests automated and '
'asks you to solve a CAPTCHA. You can either wait for some '
'time until unblocked and optionally use --sleep-interval '
'in future or alternatively you can go to https://music.yandex.ru/ '
'solve CAPTCHA, then export cookies and pass cookie file to '
'youtube-dl with --cookies',
expected=True)
def _download_webpage(self, *args, **kwargs): def _download_webpage(self, *args, **kwargs):
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage: if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
raise ExtractorError( self._raise_captcha()
'YandexMusic has considered youtube-dl requests automated and '
'asks you to solve a CAPTCHA. You can either wait for some '
'time until unblocked and optionally use --sleep-interval '
'in future or alternatively you can go to https://music.yandex.ru/ '
'solve CAPTCHA, then export cookies and pass cookie file to '
'youtube-dl with --cookies',
expected=True)
return webpage return webpage
def _download_json(self, *args, **kwargs): def _download_json(self, *args, **kwargs):

View File

@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor):
'format_id': self.get_format_name(fm), 'format_id': self.get_format_name(fm),
'ext': self.parse_ext_l(fm), 'ext': self.parse_ext_l(fm),
'filesize': int(seg['size']), 'filesize': int(seg['size']),
'width': stream.get('width'),
'height': stream.get('height'),
}) })
return { return {

View File

@ -1035,6 +1035,7 @@ def unified_strdate(date_str, day_first=True):
format_expressions.extend([ format_expressions.extend([
'%d-%m-%Y', '%d-%m-%Y',
'%d.%m.%Y', '%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y', '%d/%m/%Y',
'%d/%m/%y', '%d/%m/%y',
'%d/%m/%Y %H:%M:%S', '%d/%m/%Y %H:%M:%S',
@ -1910,7 +1911,7 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub( return re.sub(
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
def js_to_json(code): def js_to_json(code):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.05.21.2' __version__ = '2016.05.30.2'