mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-13 13:47:16 +08:00
Merge remote-tracking branch 'upstream/master' into multipart_videos
This commit is contained in:
commit
01c0a5f619
4
AUTHORS
4
AUTHORS
@ -163,3 +163,7 @@ Patrick Griffis
|
||||
Aidan Rowe
|
||||
mutantmonkey
|
||||
Ben Congdon
|
||||
Kacper Michajłow
|
||||
José Joaquín Atria
|
||||
Viťas Strádal
|
||||
Kagami Hiiragi
|
||||
|
10
Makefile
10
Makefile
@ -12,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
ifeq ($(PREFIX),/usr)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
ifeq ($(PREFIX),/usr/local)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
SYSCONFDIR=$(PREFIX)/etc
|
||||
endif
|
||||
endif
|
||||
SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
|
@ -831,7 +831,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
@ -81,6 +81,7 @@
|
||||
- **BokeCC**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **BravoTV**
|
||||
- **Break**
|
||||
- **brightcove:legacy**
|
||||
- **brightcove:new**
|
||||
@ -499,6 +500,7 @@
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
@ -617,6 +619,7 @@
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
@ -650,6 +653,7 @@
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
|
@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||
|
||||
ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -28,6 +28,7 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
@ -77,6 +78,7 @@ from youtube_dl.utils import (
|
||||
cli_bool_option,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
@ -575,11 +577,11 @@ class TestUtil(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
@ -629,6 +631,44 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
|
||||
self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML
|
||||
self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'})
|
||||
self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2
|
||||
self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0
|
||||
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
|
||||
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
|
||||
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
|
||||
self.assertEqual(extract_attributes('<e x >'), {'x': None})
|
||||
self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
|
||||
self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
|
||||
self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
|
||||
self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
|
||||
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
|
||||
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
|
||||
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
|
||||
self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
|
||||
self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'})
|
||||
# "Narrow" Python builds don't support unicode code points outside BMP.
|
||||
try:
|
||||
compat_chr(0x10000)
|
||||
supports_outside_bmp = True
|
||||
except ValueError:
|
||||
supports_outside_bmp = False
|
||||
if supports_outside_bmp:
|
||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
@ -662,6 +702,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_count('1.000'), 1000)
|
||||
self.assertEqual(parse_count('1.1k'), 1100)
|
||||
self.assertEqual(parse_count('1.1kk'), 1100000)
|
||||
self.assertEqual(parse_count('1.1kk '), 1100000)
|
||||
self.assertEqual(parse_count('1.1kk views'), 1100000)
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
|
2
tox.ini
2
tox.ini
@ -8,6 +8,6 @@ deps =
|
||||
passenv = HOME
|
||||
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||
--exclude test_youtube_lists.py
|
||||
--exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
||||
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
# test.test_download:TestDownload.test_NowVideo
|
||||
|
@ -906,7 +906,7 @@ class YoutubeDL(object):
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
@ -1865,7 +1865,7 @@ class YoutubeDL(object):
|
||||
if fdict.get('language'):
|
||||
if res:
|
||||
res += ' '
|
||||
res += '[%s]' % fdict['language']
|
||||
res += '[%s] ' % fdict['language']
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + ' '
|
||||
if fdict.get('tbr') is not None:
|
||||
|
@ -144,14 +144,20 @@ def _real_main(argv=None):
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid max_filesize specified')
|
||||
opts.max_filesize = numeric_limit
|
||||
if opts.retries is not None:
|
||||
if opts.retries in ('inf', 'infinite'):
|
||||
opts_retries = float('inf')
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
parsed_retries = float('inf')
|
||||
else:
|
||||
try:
|
||||
opts_retries = int(opts.retries)
|
||||
parsed_retries = int(retries)
|
||||
except (TypeError, ValueError):
|
||||
parser.error('invalid retry count specified')
|
||||
return parsed_retries
|
||||
if opts.retries is not None:
|
||||
opts.retries = parse_retries(opts.retries)
|
||||
if opts.fragment_retries is not None:
|
||||
opts.fragment_retries = parse_retries(opts.fragment_retries)
|
||||
if opts.buffersize is not None:
|
||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||
if numeric_buffersize is None:
|
||||
@ -301,7 +307,8 @@ def _real_main(argv=None):
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts_retries,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
|
@ -77,6 +77,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParser as compat_HTMLParser
|
||||
except ImportError: # Python 2
|
||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
@ -251,6 +256,16 @@ else:
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
def compat_xpath(xpath):
|
||||
if isinstance(xpath, compat_str):
|
||||
xpath = xpath.encode('ascii')
|
||||
return xpath
|
||||
else:
|
||||
compat_xpath = lambda xpath: xpath
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
@ -543,6 +558,7 @@ else:
|
||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
@ -579,6 +595,7 @@ __all__ = [
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'shlex_quote',
|
||||
'subprocess_check_output',
|
||||
'workaround_optparse_bug9161',
|
||||
|
@ -115,6 +115,10 @@ class FileDownloader(object):
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def format_retries(retries):
|
||||
return 'inf' if retries == float('inf') else '%.0f' % retries
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
@ -297,7 +301,9 @@ class FileDownloader(object):
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries))
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
||||
% (count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
|
@ -4,6 +4,7 @@ import os
|
||||
import re
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
@ -36,20 +37,41 @@ class DashSegmentsFD(FragmentFD):
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
def append_url_to_file(target_url, target_filename):
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
if not success:
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
|
||||
def append_url_to_file(target_url, tmp_filename, segment_name):
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# So, we will retry all fragments that fail with 404 HTTP error for now.
|
||||
if err.code != 404:
|
||||
raise
|
||||
# Retry fragment
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
|
||||
if initialization_url:
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init')
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i)
|
||||
append_url_to_file(segment_url, segment_filename)
|
||||
append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@ -19,8 +19,17 @@ class HttpQuietDownloader(HttpFD):
|
||||
class FragmentFD(FileDownloader):
|
||||
"""
|
||||
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, fragment_name, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (fragment_name, count, self.format_retries(retries)))
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
@ -72,6 +72,7 @@ from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
@ -81,6 +82,7 @@ from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .bravotv import BravoTVIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
@ -107,6 +109,7 @@ from .cbsnews import (
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chaturbate import ChaturbateIE
|
||||
@ -135,6 +138,7 @@ from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@ -282,6 +286,7 @@ from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
@ -405,6 +410,7 @@ from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mnet import MnetIE
|
||||
from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
@ -530,6 +536,7 @@ from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .openload import OpenloadIE
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
@ -625,6 +632,7 @@ from .ruutu import RuutuIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .sapo import SapoIE
|
||||
@ -736,7 +744,9 @@ from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
@ -783,6 +793,7 @@ from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
@ -950,7 +961,9 @@ from .youtube import (
|
||||
YoutubeChannelIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLiveIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubePlaylistsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
@ -960,7 +973,6 @@ from .youtube import (
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubePlaylistsIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
|
@ -6,7 +6,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'info_dict': {
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Slum - Episode 1: Deliverance',
|
||||
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||
'uploader': 'Al Jazeera English',
|
||||
'uploader_id': '665003303001',
|
||||
'timestamp': 1411116829,
|
||||
'upload_date': '20140919',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': (
|
||||
'brightcove:'
|
||||
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
|
||||
'&%40videoPlayer={0}'.format(brightcove_id)
|
||||
),
|
||||
'ie_key': 'BrightcoveLegacy',
|
||||
}
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
|
@ -3,10 +3,14 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
encode_dict,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@ -18,7 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@ -26,7 +30,19 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@ -36,6 +52,10 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
|
||||
self.raise_geo_restricted(
|
||||
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
|
||||
|
||||
login_form = self._form_hidden_inputs('new_user', login_page)
|
||||
|
||||
login_form.update({
|
||||
@ -91,14 +111,22 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
|
||||
m = re.search(
|
||||
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
|
||||
if not m:
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(m.group('number'))
|
||||
episode_title = m.group('title')
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
@ -110,33 +138,86 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r'data-playlist=(["\'])(?P<url>.+?)\1',
|
||||
episode_html, 'data playlist', default=None, group='url')
|
||||
if playlist_url:
|
||||
request = sanitized_Request(
|
||||
compat_urlparse.urljoin(url, playlist_url),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
})
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
||||
attributes = extract_attributes(input_)
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
playlist_urls.append(attributes[playlist_key])
|
||||
if not playlist_urls:
|
||||
continue
|
||||
|
||||
playlist = self._download_json(
|
||||
request, video_id, 'Downloading playlist JSON', fatal=False)
|
||||
if playlist:
|
||||
playlist = playlist['playlist'][0]
|
||||
title = playlist['title']
|
||||
lang = attributes.get('data-lang')
|
||||
lang_note = attributes.get('value')
|
||||
|
||||
for playlist_url in playlist_urls:
|
||||
kind = self._search_regex(
|
||||
r'videomaterialurl/\d+/([^/]+)/',
|
||||
playlist_url, 'media kind', default=None)
|
||||
format_id_list = []
|
||||
if lang:
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
request = sanitized_Request(
|
||||
compat_urlparse.urljoin(url, playlist_url),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
})
|
||||
playlist = self._download_json(
|
||||
request, video_id, 'Downloading %s playlist JSON' % format_id,
|
||||
fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
continue
|
||||
playlist = playlist[start_video]
|
||||
title = playlist.get('title')
|
||||
if not title:
|
||||
continue
|
||||
description = playlist.get('description')
|
||||
for source in playlist.get('sources', []):
|
||||
file_ = source.get('file')
|
||||
if file_ and determine_ext(file_) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
format_id_list = [lang, kind]
|
||||
if ext == 'm3u8':
|
||||
format_id_list.append('hls')
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
format_id_list.append('dash')
|
||||
format_id = '-'.join(filter(None, format_id_list))
|
||||
if ext == 'm3u8':
|
||||
file_formats = self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
continue
|
||||
file_formats = self._extract_mpd_formats(
|
||||
file_, video_id, mpd_id=format_id, fatal=False)
|
||||
else:
|
||||
continue
|
||||
for f in file_formats:
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': format_note,
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'title': title,
|
||||
@ -145,16 +226,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
# Extract teaser only when full episode is not available
|
||||
if not formats:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'on.aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||
@ -25,7 +25,7 @@ class AolIE(InfoExtractor):
|
||||
|
||||
class AolFeaturesIE(InfoExtractor):
|
||||
IE_NAME = 'features.aol.com'
|
||||
_VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
|
||||
|
@ -23,7 +23,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
|
||||
_VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor):
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
|
@ -9,7 +9,7 @@ from ..utils import unescapeHTML
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
IE_DESC = '百度视频'
|
||||
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
'info_dict': {
|
||||
|
@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
|
||||
class BBCCoUkArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'bbc.co.uk:article'
|
||||
IE_DESC = 'BBC articles'
|
||||
|
||||
|
@ -8,7 +8,7 @@ from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
|
||||
'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
|
86
youtube_dl/extractor/biobiochiletv.py
Normal file
86
youtube_dl/extractor/biobiochiletv.py
Normal file
@ -0,0 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
'md5': '26f51f03cf580265defefb4518faec09',
|
||||
'info_dict': {
|
||||
'id': 'sobre-camaras-y-camarillas-parlamentarias',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sobre Cámaras y camarillas parlamentarias',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
|
||||
'info_dict': {
|
||||
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
|
||||
'ext': 'mp4',
|
||||
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Piangella Obrador',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'file url', group='url')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
||||
group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
f = {
|
||||
'url': '%s%s' % (base_url, file_url),
|
||||
'format_id': 'http',
|
||||
'protocol': 'http',
|
||||
'preference': 1,
|
||||
}
|
||||
if formats:
|
||||
f_copy = formats[-1].copy()
|
||||
f_copy.update(f)
|
||||
f = f_copy
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor):
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
|
28
youtube_dl/extractor/bravotv.py
Normal file
28
youtube_dl/extractor/bravotv.py
Normal file
@ -0,0 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class BravoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
||||
'info_dict': {
|
||||
'id': 'LitrBdX64qLn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Last Chance Kitchen Returns',
|
||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
||||
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
||||
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'info_dict': {
|
||||
|
@ -9,7 +9,6 @@ from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
@ -24,16 +23,16 @@ from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -156,8 +155,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey
|
||||
# The three fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||
# These fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if videoPlayer is not None:
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
@ -185,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
return update_url_query(cls._FEDERATED_URL, params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@ -240,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query_str, query, referer=referer)
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
@ -249,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
req = sanitized_Request(request_url)
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
if referer is not None:
|
||||
req.add_header('Referer', referer)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
headers['Referer'] = referer
|
||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
@ -415,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url)
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
|
||||
# Look for embed_in_page embeds [2]
|
||||
for video_id, account_id, player_id, embed in re.findall(
|
||||
@ -425,7 +422,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# According to [4] data-video-id may be prefixed with ref:
|
||||
r'''(?sx)
|
||||
<video[^>]+
|
||||
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
|
||||
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
@ -459,12 +456,11 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
|
||||
% (account_id, video_id),
|
||||
headers={'Accept': 'application/json;pk=%s' % policy_key})
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
try:
|
||||
json_data = self._download_json(req, video_id)
|
||||
json_data = self._download_json(api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
||||
@ -482,8 +478,11 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
streaming_src = source.get('streaming_src')
|
||||
stream_name, app_name = source.get('stream_name'), source.get('app_name')
|
||||
@ -491,15 +490,23 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
continue
|
||||
tbr = float_or_none(source.get('avg_bitrate'), 1000)
|
||||
height = int_or_none(source.get('height'))
|
||||
width = int_or_none(source.get('width'))
|
||||
f = {
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'vcodec': source.get('codec'),
|
||||
'ext': source.get('container').lower(),
|
||||
'ext': container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': source.get('codec'),
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CamdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# single file
|
||||
'url': 'http://www.camdemy.com/media/5181/',
|
||||
@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
|
96
youtube_dl/extractor/cda.py
Executable file
96
youtube_dl/extractor/cda.py
Executable file
@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
parse_duration
|
||||
)
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cda.pl/video/5749950c',
|
||||
'md5': '6f844bf51b15f31fae165365707ae970',
|
||||
'info_dict': {
|
||||
'id': '5749950c',
|
||||
'ext': 'mp4',
|
||||
'height': 720,
|
||||
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
|
||||
'duration': 39
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cda.pl/video/57413289',
|
||||
'md5': 'a88828770a8310fc00be6c95faf7f4d5',
|
||||
'info_dict': {
|
||||
'id': '57413289',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lądowanie na lotnisku na Maderze',
|
||||
'duration': 137
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
unpacked = decode_packed_codes(page)
|
||||
format_url = self._search_regex(
|
||||
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
|
||||
if not format_url:
|
||||
return
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
m = re.search(
|
||||
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
|
||||
page)
|
||||
if m:
|
||||
f.update({
|
||||
'format_id': m.group('format_id'),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
info_dict['formats'].append(f)
|
||||
if not info_dict['duration']:
|
||||
info_dict['duration'] = parse_duration(self._search_regex(
|
||||
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
|
||||
|
||||
extract_format(webpage, 'default')
|
||||
|
||||
for href, resolution in re.findall(
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
# Manually report warning because empty page is returned when
|
||||
# invalid version is requested.
|
||||
self.report_warning('Unable to download %s version information' % resolution)
|
||||
continue
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return info_dict
|
@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
formats = []
|
||||
for format_id, stream_url in item['streamUrls'].items():
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
|
||||
stream_url, playlist_id, 'mp4',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
|
@ -19,7 +19,7 @@ def _decode(s):
|
||||
class CliphunterIE(InfoExtractor):
|
||||
IE_NAME = 'cliphunter'
|
||||
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClubicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||
|
@ -60,7 +60,7 @@ class CNETIE(ThePlatformIE):
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
|
||||
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
|
||||
if fkey == 'hds':
|
||||
release_url += '&manifest=f4m'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComCarCoffIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
|
||||
_VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||
'info_dict': {
|
||||
|
@ -866,6 +866,7 @@ class InfoExtractor(object):
|
||||
proto_preference = 0 if 'url' in f and determine_protocol(f) in ['http', 'https'] else -0.1
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
preference -= 50
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
||||
else:
|
||||
@ -876,6 +877,8 @@ class InfoExtractor(object):
|
||||
except ValueError:
|
||||
audio_ext_preference = -1
|
||||
else:
|
||||
if f.get('acodec') == 'none': # video only
|
||||
preference -= 40
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['flv', 'mp4', 'webm']
|
||||
else:
|
||||
|
36
youtube_dl/extractor/commonprotocols.py
Normal file
36
youtube_dl/extractor/commonprotocols.py
Normal file
@ -0,0 +1,36 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class RtmpIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'(?i)rtmp[est]?://.+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': [{
|
||||
'url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': compat_urlparse.urlparse(url).scheme,
|
||||
}],
|
||||
}
|
@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):
|
||||
'wmagazine': 'W Magazine',
|
||||
}
|
||||
|
||||
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
_VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
||||
|
@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
|
@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
|
||||
IE_DESC = 'C-SPAN'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||
|
@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
'md5': 'a9875cb790252b08431186d741beaabe',
|
||||
|
@ -6,7 +6,7 @@ from ..compat import compat_str
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
class DefenseGouvFrIE(InfoExtractor):
|
||||
IE_NAME = 'defense.gouv.fr'
|
||||
_VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
|
||||
_VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||
|
@ -9,7 +9,7 @@ from ..compat import compat_str
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?(?:
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
|
@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
|
@ -10,7 +10,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
||||
|
@ -7,7 +7,7 @@ from .zdf import ZDFIE
|
||||
|
||||
class DreiSatIE(ZDFIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
|
@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):
|
||||
IE_NAME = 'dvtv'
|
||||
IE_DESC = 'http://video.aktualne.cz/'
|
||||
|
||||
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
|
||||
_VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
|
||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EchoMskIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
|
||||
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
|
||||
|
@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class FirstpostIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
|
||||
|
@ -8,7 +8,7 @@ from ..utils import int_or_none
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = 'fernsehkritik.tv'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/folge-1',
|
||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'info_dict': {
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '80d72beab5d04e1655a56ad37afe6841',
|
||||
|
@ -6,7 +6,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
|
@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'preference': 1,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'preference': -1,
|
||||
})
|
||||
if self._is_valid_url(video_url, video_id, format_id):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info['titre']
|
||||
subtitle = info.get('sous_titre')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
title = title.strip()
|
||||
|
||||
subtitles = {}
|
||||
subtitles_list = [{
|
||||
@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soir 3',
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'fr': 'mincount:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
'info_dict': {
|
||||
@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': '556e03339473995ee145930c',
|
||||
'id': 'NI_173343',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
'timestamp': 1433273139,
|
||||
'upload_date': '20150602',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': 'NI_657393',
|
||||
'ext': 'mp4',
|
||||
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
|
||||
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1458300695,
|
||||
'upload_date': '20160318',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
||||
webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class FreeVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
|
||||
_VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
|
||||
|
@ -10,7 +10,7 @@ from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# YouTube embed video
|
||||
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GametrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
|
||||
|
@ -59,6 +59,7 @@ from .videomore import VideomoreIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -239,6 +240,35 @@ class GenericIE(InfoExtractor):
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
},
|
||||
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
|
||||
{
|
||||
'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
|
||||
'info_dict': {
|
||||
'id': 'content',
|
||||
'ext': 'mp4',
|
||||
'title': 'content',
|
||||
'formats': 'mincount:8',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# m3u8 served with Content-Type: text/plain
|
||||
{
|
||||
'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
|
||||
'info_dict': {
|
||||
'id': 'index',
|
||||
'ext': 'mp4',
|
||||
'title': 'index',
|
||||
'upload_date': '20140720',
|
||||
'formats': 'mincount:11',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@ -1245,14 +1275,13 @@ class GenericIE(InfoExtractor):
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||
}
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
format_id = m.group('format_id')
|
||||
if format_id.endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
@ -1264,11 +1293,8 @@ class GenericIE(InfoExtractor):
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
})
|
||||
info_dict['direct'] = True
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
@ -1289,18 +1315,21 @@ class GenericIE(InfoExtractor):
|
||||
request.add_header('Accept-Encoding', '*')
|
||||
full_response = self._request_webpage(request, video_id)
|
||||
|
||||
first_bytes = full_response.read(512)
|
||||
|
||||
# Is it an M3U playlist?
|
||||
if first_bytes.startswith(b'#EXTM3U'):
|
||||
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
return info_dict
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
first_bytes = full_response.read(512)
|
||||
if not is_html(first_bytes):
|
||||
self._downloader.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'upload_date': upload_date,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
@ -1881,6 +1910,19 @@ class GenericIE(InfoExtractor):
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
||||
'AdobeTVVideo')
|
||||
|
||||
# Look for Vine embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
if instagram_embed_url is not None:
|
||||
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
122
youtube_dl/extractor/hbo.py
Normal file
122
youtube_dl/extractor/hbo.py
Normal file
@ -0,0 +1,122 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class HBOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 64 Clip: Encryption',
|
||||
}
|
||||
}
|
||||
_FORMATS_INFO = {
|
||||
'1920': {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
'640': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'highwifi': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'high3g': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'medwifi': {
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
},
|
||||
'med3g': {
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml(
|
||||
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
|
||||
title = xpath_text(video_data, 'title', 'title', True)
|
||||
|
||||
formats = []
|
||||
for source in xpath_element(video_data, 'videos', 'sources', True):
|
||||
if source.tag == 'size':
|
||||
path = xpath_text(source, './/path')
|
||||
if not path:
|
||||
continue
|
||||
width = source.attrib.get('width')
|
||||
format_info = self._FORMATS_INFO.get(width, {})
|
||||
height = format_info.get('height')
|
||||
fmt = {
|
||||
'url': path,
|
||||
'format_id': 'http%s' % ('-%dp' % height if height else ''),
|
||||
'width': format_info.get('width'),
|
||||
'height': height,
|
||||
}
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': fmt['format_id'].replace('http', 'rtmp'),
|
||||
})
|
||||
formats.append(fmt)
|
||||
else:
|
||||
video_url = source.text
|
||||
if not video_url:
|
||||
continue
|
||||
if source.tag == 'tarball':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url.replace('.tar', '/base_index_w8.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
format_info = self._FORMATS_INFO.get(source.tag, {})
|
||||
formats.append({
|
||||
'format_id': 'http-%s' % source.tag,
|
||||
'url': video_url,
|
||||
'width': format_info.get('width'),
|
||||
'height': format_info.get('height'),
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
thumbnails = []
|
||||
card_sizes = xpath_element(video_data, 'titleCardSizes')
|
||||
if card_sizes is not None:
|
||||
for size in card_sizes:
|
||||
path = xpath_text(size, 'path')
|
||||
if not path:
|
||||
continue
|
||||
width = int_or_none(size.get('width'))
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': path,
|
||||
'width': width,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HypemIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor):
|
||||
class ImdbListIE(InfoExtractor):
|
||||
IE_NAME = 'imdb:list'
|
||||
IE_DESC = 'Internet Movie Database lists'
|
||||
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_TEST = {
|
||||
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
|
||||
'info_dict': {
|
||||
|
@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
lowercase_escape,
|
||||
)
|
||||
|
||||
|
||||
@ -38,6 +40,18 @@ class InstagramIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_embed_url(webpage):
|
||||
blockquote_el = get_element_by_attribute(
|
||||
'class', 'instagram-media', webpage)
|
||||
if blockquote_el is None:
|
||||
return
|
||||
|
||||
mobj = re.search(
|
||||
r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
|
||||
if mobj:
|
||||
return mobj.group('link')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -46,6 +60,8 @@ class InstagramIE(InfoExtractor):
|
||||
webpage, 'uploader id', fatal=False)
|
||||
desc = self._search_regex(
|
||||
r'"caption":"(.+?)"', webpage, 'description', default=None)
|
||||
if desc is not None:
|
||||
desc = lowercase_escape(desc)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -6,6 +6,8 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
@ -30,8 +32,7 @@ class IPrimaIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -43,9 +44,42 @@ class IPrimaIE(InfoExtractor):
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||
|
||||
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
|
||||
formats = []
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
def extract_formats(format_url, format_key=None, lang=None):
|
||||
ext = determine_ext(format_url)
|
||||
new_formats = []
|
||||
if format_key == 'hls' or ext == 'm3u8':
|
||||
new_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
elif format_key == 'dash' or ext == 'mpd':
|
||||
return
|
||||
new_formats = self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
if lang:
|
||||
for f in new_formats:
|
||||
if not f.get('language'):
|
||||
f['language'] = lang
|
||||
formats.extend(new_formats)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
|
||||
playerpage, 'player options', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if options:
|
||||
for key, tracks in options.get('tracks', {}).items():
|
||||
if not isinstance(tracks, list):
|
||||
continue
|
||||
for track in tracks:
|
||||
src = track.get('src')
|
||||
if src:
|
||||
extract_formats(src, key.lower(), track.get('lang'))
|
||||
|
||||
if not formats:
|
||||
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
|
||||
extract_formats(src)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
IE_DESC = '爱奇艺'
|
||||
|
||||
_VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
|
||||
|
||||
_NETRC_MACHINE = 'iqiyi'
|
||||
|
||||
|
@ -9,7 +9,7 @@ from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class JadoreCettePubIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||
|
@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||
_VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KaraoketvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://karaoketv.co.il/?container=songs&id=171568',
|
||||
'info_dict': {
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KarriereVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
|
||||
'info_dict': {
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
IE_NAME = 'kontrtube'
|
||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Ku6IE(InfoExtractor):
|
||||
_VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
|
||||
_VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
|
||||
_TEST = {
|
||||
'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
|
||||
'md5': '01203549b9efbb45f4b87d55bdea1ed1',
|
||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KUSIIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
|
||||
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
|
||||
|
@ -2,13 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
@ -23,7 +23,7 @@ class KuwoBaseIE(InfoExtractor):
|
||||
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
|
||||
]
|
||||
|
||||
def _get_formats(self, song_id):
|
||||
def _get_formats(self, song_id, tolerate_ip_deny=False):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
song_url = self._download_webpage(
|
||||
@ -32,7 +32,7 @@ class KuwoBaseIE(InfoExtractor):
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny':
|
||||
if song_url == 'IPDeny' and not tolerate_ip_deny:
|
||||
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
@ -43,14 +43,19 @@ class KuwoBaseIE(InfoExtractor):
|
||||
'preference': file_format['preference'],
|
||||
'abr': file_format.get('abr'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# XXX _sort_formats fails if there are not formats, while it's not the
|
||||
# desired behavior if 'IPDeny' is ignored
|
||||
# This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged
|
||||
if not tolerate_ip_deny:
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class KuwoIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:song'
|
||||
IE_DESC = '酷我音乐'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/yinyue/635632/',
|
||||
'info_dict': {
|
||||
@ -75,6 +80,9 @@ class KuwoIE(KuwoBaseIE):
|
||||
'params': {
|
||||
'format': 'mp3-320'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -126,7 +134,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
class KuwoAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:album'
|
||||
IE_DESC = '酷我音乐 - 专辑'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/album/502294/',
|
||||
'info_dict': {
|
||||
@ -162,13 +170,11 @@ class KuwoAlbumIE(InfoExtractor):
|
||||
class KuwoChartIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:chart'
|
||||
IE_DESC = '酷我音乐 - 排行榜'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||
_VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
|
||||
'info_dict': {
|
||||
'id': '香港中文龙虎榜',
|
||||
'title': '香港中文龙虎榜',
|
||||
'description': 're:\d{4}第\d{2}期',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}
|
||||
@ -179,30 +185,24 @@ class KuwoChartIE(InfoExtractor):
|
||||
url, chart_id, note='Download chart info',
|
||||
errnote='Unable to get chart info')
|
||||
|
||||
chart_name = self._html_search_regex(
|
||||
r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
|
||||
|
||||
chart_desc = self._html_search_regex(
|
||||
r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
|
||||
|
||||
entries = [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
|
||||
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
|
||||
]
|
||||
return self.playlist_result(entries, chart_id, chart_name, chart_desc)
|
||||
return self.playlist_result(entries, chart_id)
|
||||
|
||||
|
||||
class KuwoSingerIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:singer'
|
||||
IE_DESC = '酷我音乐 - 歌手'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
|
||||
'info_dict': {
|
||||
'id': 'bruno+mars',
|
||||
'title': 'Bruno Mars',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
'playlist_mincount': 329,
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
|
||||
'info_dict': {
|
||||
@ -213,6 +213,8 @@ class KuwoSingerIE(InfoExtractor):
|
||||
'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
|
||||
}]
|
||||
|
||||
PAGE_SIZE = 15
|
||||
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
@ -220,25 +222,28 @@ class KuwoSingerIE(InfoExtractor):
|
||||
errnote='Unable to get singer info')
|
||||
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
|
||||
)
|
||||
r'<h1>([^<]+)</h1>', webpage, 'singer name')
|
||||
|
||||
entries = []
|
||||
first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
|
||||
for page_num in itertools.count(1):
|
||||
artist_id = self._html_search_regex(
|
||||
r'data-artistid="(\d+)"', webpage, 'artist id')
|
||||
|
||||
page_count = int(self._html_search_regex(
|
||||
r'data-page="(\d+)"', webpage, 'page count'))
|
||||
|
||||
def page_func(page_num):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
|
||||
singer_id, note='Download song list page #%d' % page_num,
|
||||
errnote='Unable to get song list page #%d' % page_num)
|
||||
'http://www.kuwo.cn/artist/contentMusicsAjax',
|
||||
singer_id, note='Download song list page #%d' % (page_num + 1),
|
||||
errnote='Unable to get song list page #%d' % (page_num + 1),
|
||||
query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
|
||||
|
||||
entries.extend([
|
||||
return [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
|
||||
r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
|
||||
webpage)
|
||||
][:10 if first_page_only else None])
|
||||
]
|
||||
|
||||
if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
|
||||
break
|
||||
entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, singer_id, singer_name)
|
||||
|
||||
@ -246,7 +251,7 @@ class KuwoSingerIE(InfoExtractor):
|
||||
class KuwoCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:category'
|
||||
IE_DESC = '酷我音乐 - 分类'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||
_VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
|
||||
'info_dict': {
|
||||
@ -283,15 +288,21 @@ class KuwoCategoryIE(InfoExtractor):
|
||||
class KuwoMvIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:mv'
|
||||
IE_DESC = '酷我音乐 - MV'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||
_VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/mv/6480076/',
|
||||
'info_dict': {
|
||||
'id': '6480076',
|
||||
'ext': 'mkv',
|
||||
'title': '我们家MV',
|
||||
'ext': 'mp4',
|
||||
'title': 'My HouseMV',
|
||||
'creator': '2PM',
|
||||
},
|
||||
# In this video, music URLs (anti.s) are blocked outside China and
|
||||
# USA, while the MV URL (mvurl) is available globally, so force the MV
|
||||
# URL for consistent results in different countries
|
||||
'params': {
|
||||
'format': 'mv',
|
||||
},
|
||||
}
|
||||
_FORMATS = KuwoBaseIE._FORMATS + [
|
||||
{'format': 'mkv', 'ext': 'mkv', 'preference': 250},
|
||||
@ -313,7 +324,17 @@ class KuwoMvIE(KuwoBaseIE):
|
||||
else:
|
||||
raise ExtractorError('Unable to find song or singer names')
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
formats = self._get_formats(song_id, tolerate_ip_deny=True)
|
||||
|
||||
mv_url = self._download_webpage(
|
||||
'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
|
||||
song_id, note='Download %s MV URL' % song_id)
|
||||
formats.append({
|
||||
'url': mv_url,
|
||||
'format_id': 'mv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class Laola1TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||
'info_dict': {
|
||||
@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
|
||||
'info_dict': {
|
||||
@ -47,12 +47,28 @@ class Laola1TvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
|
||||
'info_dict': {
|
||||
'id': '487850',
|
||||
'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
|
||||
'ext': 'flv',
|
||||
'title': 'Belogorie BELGOROD - TRENTINO Diatec',
|
||||
'upload_date': '20160322',
|
||||
'uploader': 'CEV - Europäischer Volleyball Verband',
|
||||
'is_live': True,
|
||||
'categories': ['Volleyball'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('slug')
|
||||
kind = mobj.group('kind')
|
||||
lang = mobj.group('lang')
|
||||
portal = mobj.group('portal')
|
||||
|
||||
@ -85,12 +101,17 @@ class Laola1TvIE(InfoExtractor):
|
||||
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
|
||||
title = _v('title', fatal=True)
|
||||
|
||||
VS_TARGETS = {
|
||||
'video': '2',
|
||||
'livestream': '17',
|
||||
}
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
|
||||
compat_urllib_parse.urlencode({
|
||||
'videoId': video_id,
|
||||
'target': '2',
|
||||
'label': 'laola1tv',
|
||||
'target': VS_TARGETS.get(kind, '2'),
|
||||
'label': _v('label'),
|
||||
'area': _v('area'),
|
||||
}),
|
||||
urlencode_postdata(
|
||||
|
@ -28,7 +28,7 @@ from ..utils import (
|
||||
|
||||
class LeIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
|
||||
|
||||
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
||||
|
||||
@ -196,7 +196,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
|
||||
class LePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.le.com/tv/46177.html',
|
||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single video embedded via video/source
|
||||
@ -159,7 +159,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'life:embed'
|
||||
_VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
_VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
|
||||
|
@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor):
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
_VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
|
||||
_VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||
'info_dict': {
|
||||
@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel'
|
||||
_VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
|
||||
_VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
|
||||
_TEST = {
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'info_dict': {
|
||||
@ -207,7 +207,7 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
|
||||
class LimelightChannelListIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel_list'
|
||||
_VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
|
||||
_VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
|
||||
_TEST = {
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'info_dict': {
|
||||
|
@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
|
||||
class M6IE(InfoExtractor):
|
||||
IE_NAME = 'm6'
|
||||
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
_VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = 'metacafe'
|
||||
|
@ -91,7 +91,7 @@ class MITIE(TechTVMITIE):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
|
81
youtube_dl/extractor/mnet.py
Normal file
81
youtube_dl/extractor/mnet.py
Normal file
@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mnet.com/tv/vod/171008',
|
||||
'info_dict': {
|
||||
'id': '171008',
|
||||
'title': 'SS_이해인@히든박스',
|
||||
'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
|
||||
'duration': 88,
|
||||
'upload_date': '20151231',
|
||||
'timestamp': 1451564040,
|
||||
'age_limit': 0,
|
||||
'thumbnails': 'mincount:5',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'ext': 'flv',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://mnet.interest.me/tv/vod/172790',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id,
|
||||
video_id, 'Downloading vod config JSON')['data']['info']
|
||||
|
||||
title = info['title']
|
||||
|
||||
rtmp_info = self._download_json(
|
||||
info['cdn'], video_id, 'Downloading vod cdn JSON')
|
||||
|
||||
formats = [{
|
||||
'url': rtmp_info['serverurl'] + rtmp_info['fileurl'],
|
||||
'ext': 'flv',
|
||||
'page_url': url,
|
||||
'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318',
|
||||
}]
|
||||
|
||||
description = info.get('ment')
|
||||
duration = parse_duration(info.get('time'))
|
||||
timestamp = parse_iso8601(info.get('date'), delimiter=' ')
|
||||
age_limit = info.get('adult')
|
||||
if age_limit is not None:
|
||||
age_limit = 0 if age_limit == 'N' else 18
|
||||
thumbnails = [{
|
||||
'id': thumb_format,
|
||||
'url': thumb['url'],
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
} for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class MooshareIE(InfoExtractor):
|
||||
IE_NAME = 'mooshare'
|
||||
IE_DESC = 'Mooshare.biz'
|
||||
_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
@ -69,6 +69,9 @@ class MotherlessIE(InfoExtractor):
|
||||
">The page you're looking for cannot be found.<")):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
if '>The content you are trying to view is for friends only.' in webpage:
|
||||
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
|
@ -9,7 +9,7 @@ from ..compat import (
|
||||
|
||||
class MotorsportIE(InfoExtractor):
|
||||
IE_DESC = 'motorsport.com'
|
||||
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
|
||||
_VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
|
||||
'info_dict': {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user