1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-26 17:45:35 +08:00

Merge remote-tracking branch 'refs/remotes/rg3/master'

This commit is contained in:
forDream 2015-12-22 10:14:47 +08:00
commit d575ded6a7
50 changed files with 1477 additions and 931 deletions

View File

@ -757,7 +757,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:

View File

@ -65,9 +65,9 @@
- **Bet**
- **Bild**: Bild.de
- **BiliBili**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx**
- **blip.tv:user**
- **BlipTV**
- **Bloomberg**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
@ -80,7 +80,6 @@
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
@ -210,7 +209,9 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
- **GoogleDrive**
- **Goshgay**
- **GPUTechConf**
- **Groupon**
- **Hark**
- **HearThisAt**
@ -252,6 +253,7 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **JWPlatform**
- **Kaltura**
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
@ -292,6 +294,7 @@
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **MakerTV**
- **Malemotion**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@ -551,6 +554,7 @@
- **TechTalks**
- **techtv.mit.edu**
- **ted**
- **Tele13**
- **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**
@ -573,6 +577,7 @@
- **TMZ**
- **TMZArticle**
- **TNAFlix**
- **toggle**
- **tou.tv**
- **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile
@ -711,6 +716,7 @@
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
@ -718,7 +724,6 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**

View File

@ -11,7 +11,6 @@ from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
BlipTVIE,
YoutubeIE,
DailymotionIE,
TEDIE,
@ -145,18 +144,6 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE

View File

@ -22,6 +22,7 @@ from youtube_dl.utils import (
DateRange,
detect_exe_version,
determine_ext,
encode_compat_str,
encodeFilename,
escape_rfc3986,
escape_url,
@ -43,6 +44,7 @@ from youtube_dl.utils import (
sanitize_path,
prepend_extension,
replace_extension,
remove_quotes,
shell_quote,
smuggle_url,
str_to_int,
@ -200,6 +202,15 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None)
self.assertEqual(remove_quotes('"'), '"')
self.assertEqual(remove_quotes("'"), "'")
self.assertEqual(remove_quotes(';'), ';')
self.assertEqual(remove_quotes('";'), '";')
self.assertEqual(remove_quotes('""'), '')
self.assertEqual(remove_quotes('";"'), ';')
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
@ -439,6 +450,10 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
def test_encode_compat_str(self):
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
@ -651,12 +666,13 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
{'like_count': 190, 'dislike_count': 10}))
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), 0.0)
self.assertEqual(parse_dfxp_time_expr(''), 0.0)
self.assertEqual(parse_dfxp_time_expr(None), None)
self.assertEqual(parse_dfxp_time_expr(''), None)
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1)
def test_dfxp2srt(self):
dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
@ -666,6 +682,9 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
<p begin="1" end="2">第二行<br/></p>
<p begin="2" dur="1"><span>Third<br/>Line</span></p>
<p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
<p begin="-1" end="-1">Ignore, two</p>
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
</tt>'''

View File

@ -47,7 +47,9 @@ from .utils import (
DEFAULT_OUTTMPL,
determine_ext,
DownloadError,
encode_compat_str,
encodeFilename,
error_to_compat_str,
ExtractorError,
format_bytes,
formatSeconds,
@ -495,7 +497,7 @@ class YoutubeDL(object):
tb = ''
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
tb += compat_str(traceback.format_exc())
tb += encode_compat_str(traceback.format_exc())
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
@ -674,14 +676,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
@ -1459,7 +1461,7 @@ class YoutubeDL(object):
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err))
self.report_error('unable to create directory ' + error_to_compat_str(err))
return
if self.params.get('writedescription', False):
@ -1510,7 +1512,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause)))
(sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@ -2039,4 +2041,4 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], compat_str(err)))
(t['url'], error_to_compat_str(err)))

View File

@ -5,9 +5,9 @@ import re
import sys
import time
from ..compat import compat_str
from ..utils import (
encodeFilename,
error_to_compat_str,
decodeArgument,
format_bytes,
timeconvert,
@ -186,7 +186,7 @@ class FileDownloader(object):
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error('unable to rename file: %s' % compat_str(err))
self.report_error('unable to rename file: %s' % error_to_compat_str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""

View File

@ -61,8 +61,11 @@ from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
@ -78,7 +81,6 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
@ -232,9 +234,11 @@ from .globo import (
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
from .hearthisat import HearThisAtIE
@ -281,6 +285,7 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
@ -335,6 +340,7 @@ from .lynda import (
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
from .makertv import MakerTVIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
@ -647,6 +653,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
@ -675,6 +682,7 @@ from .tnaflix import (
EMPFlixIE,
MovieFapIE,
)
from .toggle import ToggleIE
from .thvideo import (
THVideoIE,
THVideoPlaylistIE
@ -850,7 +858,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubePlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

View File

@ -23,6 +23,7 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
'skip': 'this video has expired',
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086',
@ -36,6 +37,7 @@ class ABCIE(InfoExtractor):
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
},
'add_ie': ['Youtube'],
'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
@ -58,6 +60,9 @@ class ABCIE(InfoExtractor):
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage)
if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(

View File

@ -68,7 +68,7 @@ class AdultSwimIE(InfoExtractor):
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'flv',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
@ -79,6 +79,10 @@ class AdultSwimIE(InfoExtractor):
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
'params': {
# m3u8 download
'skip_download': True,
}
}]
@staticmethod

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
item = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
thumbnails = []
media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
formats = []
media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
media = media_data['@attributes']
media_type = media['type']
if media_type == 'video/f4m':
f4m_formats = self._extract_f4m_formats(
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif media_type == 'application/x-mpegURL':
m3u8_formats = self._extract_m3u8_formats(
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'format_id': media_data['media-category']['@attributes']['label'],
'url': media['url'],
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': get_media_node('title'),
'description': get_media_node('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
'formats': formats,
}

View File

@ -2,6 +2,8 @@ from __future__ import unicode_literals
import time
import hmac
import hashlib
import re
from .common import InfoExtractor
from ..compat import (
@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$',
},
'skip': 'This video is only available for registered users'
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
def _real_initialize(self):
self._login()
@ -83,58 +105,81 @@ class AtresPlayerIE(InfoExtractor):
episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')
request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')
episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
formats = []
video_url = player.get('urlVideo')
if video_url:
format_info = {
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)
m3u8_url = player.get('urlVideoHls')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8')
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()
formats = []
for fmt in ['windows', 'android_tablet']:
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
headers={'User-Agent': self._USER_AGENT})
fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt)
fmt_json = self._download_json(
request, video_id, 'Downloading windows video JSON')
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue
if video_url.endswith('/Manifest'):
if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
formats.extend(self._extract_f4m_formats(f4m_url, video_id))
else:
formats.append({
'url': video_url,
'format_id': 'android-%s' % format_id,
'preference': 1,
})
for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue
if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
self._sort_formats(formats)
player = self._download_json(
self._PLAYER_URL_TEMPLATE % episode_id,
episode_id)
path_data = player.get('pathData')
episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data,
video_id, 'Downloading episode XML')
self._EPISODE_URL_TEMPLATE % path_data, video_id,
'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))

View File

@ -15,7 +15,7 @@ class AudiMediaIE(InfoExtractor):
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'id': '1564',
'id': '1565',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',

View File

@ -56,7 +56,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s', url)
raise ExtractorError('Invalid url %s' % url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class BleacherReportIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
'info_dict': {
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'uploader_id': 6466954,
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
if primary_photo:
thumbnails = [{
'url': primary_photo['url'],
'width': primary_photo.get('width'),
'height': primary_photo.get('height'),
}]
info = {
'_type': 'url_transparent',
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
'view_count': int_or_none(article_data.get('hitCount')),
}
video = article_data.get('video')
if video:
video_type = video['type']
if video_type == 'cms.bleacherreport.com':
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
else:
info['url'] = video_type + video['id']
return info
else:
raise ExtractorError('no video in the article', expected=True)
class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': 'f0ca220af012d4df857b54f792c586bb',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
info['id'] = video_id
return info

View File

@ -1,290 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
)
class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
'info_dict': {
'id': '5779306',
'ext': 'm4v',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'timestamp': 1323138843,
'upload_date': '20111206',
'uploader': 'cbr',
'uploader_id': '679425',
'duration': 81,
}
},
{
# https://github.com/rg3/youtube-dl/pull/2274
'note': 'Video with subtitles',
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
'md5': '309f9d25b820b086ca163ffac8031806',
'info_dict': {
'id': '6586561',
'ext': 'mp4',
'title': 'Red vs. Blue Season 11 Episode 1',
'description': 'One-Zero-One',
'timestamp': 1371261608,
'upload_date': '20130615',
'uploader': 'redvsblue',
'uploader_id': '792887',
'duration': 279,
}
},
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
'info_dict': {
'id': '6573122',
'ext': 'mov',
'upload_date': '20130520',
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
'title': 'Red vs. Blue Season 11 Trailer',
'timestamp': 1369029609,
'uploader': 'redvsblue',
'uploader_id': '792887',
}
},
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
{
# missing duration
'url': 'http://blip.tv/rss/flash/6700880',
'info_dict': {
'id': '6684191',
'ext': 'm4v',
'title': 'Cowboy Bebop: Gateway Shuffle Review',
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
'timestamp': 1386639757,
'upload_date': '20131210',
'uploader': 'sfdebris',
'uploader_id': '706520',
}
}
]
@staticmethod
def _extract_url(webpage):
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return 'http://blip.tv/a/a-' + mobj.group(1)
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return mobj.group(1)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lookup_id = mobj.group('lookup_id')
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
urlh = self._request_webpage(
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
url = compat_urlparse.urlparse(urlh.geturl())
qs = compat_urlparse.parse_qs(url.query)
mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
def _x(p):
return xpath_with_ns(p, {
'blip': 'http://blip.tv/dtd/blip/1.0',
'media': 'http://search.yahoo.com/mrss/',
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
})
item = rss.find('channel/item')
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
title = xpath_text(item, 'title', 'title', fatal=True)
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
uploader = xpath_text(item, _x('blip:user'), 'uploader')
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
media_thumbnail = item.find(_x('media:thumbnail'))
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
else xpath_text(item, 'image', 'thumbnail'))
categories = [category.text for category in item.findall('category') if category is not None]
formats = []
subtitles_urls = {}
media_group = item.find(_x('media:group'))
for media_content in media_group.findall(_x('media:content')):
url = media_content.get('url')
role = media_content.get(_x('blip:role'))
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):
LANGS = {
'english': 'en',
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
'format_id': role,
'format_note': media_type,
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
'acodec': media_content.get(_x('blip:acodec')),
'filesize': media_content.get('filesize'),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'subtitles': subtitles,
}
def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
_TEST = {
'url': 'http://blip.tv/actone',
'info_dict': {
'id': 'actone',
'title': 'Act One: The Series',
},
'playlist_count': 5,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
title = self._og_search_title(page)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = []
pagenum = 1
while True:
url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(
url, username, 'Downloading video ids from page %d' % pagenum)
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(r'href="/([^"]+)"', page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(unescapeHTML(mobj.group(1)))
video_ids.extend(ids_in_page)
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._PAGE_SIZE:
break
pagenum += 1
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return self.playlist_result(
url_entries, playlist_title=title, playlist_id=username)

View File

@ -1,18 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_BASE_URL = 'http://www.br.de'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
{
@ -22,7 +25,7 @@ class BRIE(InfoExtractor):
'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4',
'title': 'Die böse Überraschung',
'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
'duration': 180,
'uploader': 'Reinhard Weber',
'upload_date': '20150422',
@ -30,23 +33,23 @@ class BRIE(InfoExtractor):
},
{
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
'md5': 'a44396d73ab6a68a69a568fae10705bb',
'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
'info_dict': {
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
'ext': 'mp4',
'ext': 'flv',
'title': 'Manfred Schreiber ist tot',
'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
'duration': 26,
}
},
{
'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
'info_dict': {
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac',
'title': 'Kurzweilig und sehr bewegend',
'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
'duration': 296,
}
},
@ -57,7 +60,7 @@ class BRIE(InfoExtractor):
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
'duration': 116,
}
},
@ -68,7 +71,7 @@ class BRIE(InfoExtractor):
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893,
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
@ -77,28 +80,31 @@ class BRIE(InfoExtractor):
]
def _real_extract(self, url):
display_id = self._match_id(url)
base_url, display_id = re.search(self._VALID_URL, url).groups()
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None)
xml = self._download_xml(base_url + xml_url, display_id)
medias = []
for xml_media in xml.findall('video') + xml.findall('audio'):
media_id = xml_media.get('externalId')
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
'duration': parse_duration(xml_media.find('duration').text),
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
'webpage_url': xml_media.find('permalink').text
'id': media_id,
'title': xpath_text(xml_media, 'title', 'title', True),
'duration': parse_duration(xpath_text(xml_media, 'duration')),
'formats': self._extract_formats(xpath_element(
xml_media, 'assets'), media_id),
'thumbnails': self._extract_thumbnails(xpath_element(
xml_media, 'teaserImage/variants'), base_url),
'description': xpath_text(xml_media, 'desc'),
'webpage_url': xpath_text(xml_media, 'permalink'),
'uploader': xpath_text(xml_media, 'author'),
}
if xml_media.find('author').text:
media['uploader'] = xml_media.find('author').text
if xml_media.find('broadcastDate').text:
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
broadcast_date = xpath_text(xml_media, 'broadcastDate')
if broadcast_date:
media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
medias.append(media)
if len(medias) > 1:
@ -109,35 +115,58 @@ class BRIE(InfoExtractor):
raise ExtractorError('No media entries found')
return medias[0]
def _extract_formats(self, assets):
def text_or_none(asset, tag):
elem = asset.find(tag)
return None if elem is None else elem.text
formats = [{
'url': text_or_none(asset, 'downloadUrl'),
'ext': text_or_none(asset, 'mediaType'),
'format_id': asset.get('type'),
'width': int_or_none(text_or_none(asset, 'frameWidth')),
'height': int_or_none(text_or_none(asset, 'frameHeight')),
'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
'vcodec': text_or_none(asset, 'codecVideo'),
'acodec': text_or_none(asset, 'codecAudio'),
'container': text_or_none(asset, 'mediaType'),
'filesize': int_or_none(text_or_none(asset, 'size')),
} for asset in assets.findall('asset')
if asset.find('downloadUrl') is not None]
def _extract_formats(self, assets, media_id):
formats = []
for asset in assets.findall('asset'):
format_url = xpath_text(asset, ['downloadUrl', 'url'])
asset_type = asset.get('type')
if asset_type == 'HDS':
f4m_formats = self._extract_f4m_formats(
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif asset_type == 'HLS':
m3u8_formats = self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
format_info = {
'ext': xpath_text(asset, 'mediaType'),
'width': int_or_none(xpath_text(asset, 'frameWidth')),
'height': int_or_none(xpath_text(asset, 'frameHeight')),
'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
'vcodec': xpath_text(asset, 'codecVideo'),
'acodec': xpath_text(asset, 'codecAudio'),
'container': xpath_text(asset, 'mediaType'),
'filesize': int_or_none(xpath_text(asset, 'size')),
}
format_url = self._proto_relative_url(format_url)
if format_url:
http_format_info = format_info.copy()
http_format_info.update({
'url': format_url,
'format_id': 'http-%s' % asset_type,
})
formats.append(http_format_info)
server_prefix = xpath_text(asset, 'serverPrefix')
if server_prefix:
rtmp_format_info = format_info.copy()
rtmp_format_info.update({
'url': server_prefix,
'play_path': xpath_text(asset, 'fileName'),
'format_id': 'rtmp-%s' % asset_type,
})
formats.append(rtmp_format_info)
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
def _extract_thumbnails(self, variants, base_url):
thumbnails = [{
'url': self._BASE_URL + variant.find('url').text,
'width': int_or_none(variant.find('width').text),
'height': int_or_none(variant.find('height').text),
} for variant in variants.findall('variant')]
'url': base_url + xpath_text(variant, 'url'),
'width': int_or_none(xpath_text(variant, 'width')),
'height': int_or_none(xpath_text(variant, 'height')),
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails

View File

@ -355,7 +355,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
@ -387,14 +387,24 @@ class BrightcoveNewIE(InfoExtractor):
'params': {
'skip_download': True,
}
}, {
# ref: prefixed video id
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
urls = BrightcoveNewIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
entries = []
@ -407,9 +417,10 @@ class BrightcoveNewIE(InfoExtractor):
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
# According to [4] data-video-id may be prefixed with ref:
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/

View File

@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class Canal13clIE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': '1403022125',
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'twitter:description', webpage, 'description')
url = self._html_search_regex(
r'articuloVideo = \"(.*?)\"', webpage, 'url')
real_id = self._search_regex(
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
thumbnail = self._html_search_regex(
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
return {
'id': real_id,
'display_id': display_id,
'url': url,
'title': title,
'description': description,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE
@ -34,18 +33,17 @@ class CinemassacreIE(InfoExtractor):
},
},
{
# blip.tv embedded video
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'info_dict': {
'id': '4065369',
'ext': 'flv',
'id': 'OEVzPCY2T-g',
'ext': 'mp4',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
'uploader': 'cinemassacre',
'uploader_id': '250778',
'timestamp': 1283233867,
'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
'uploader': 'Cinemassacre',
'uploader_id': 'JamesNintendoNerd',
'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
}
},
{
@ -88,8 +86,6 @@ class CinemassacreIE(InfoExtractor):
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
],
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
playerdata_url = BlipTVIE._extract_url(webpage)
if not playerdata_url:
raise ExtractorError('Unable to find player data')

View File

@ -1,15 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
from .theplatform import ThePlatformIE
from ..utils import int_or_none
class CNETIE(InfoExtractor):
class CNETIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):
'ext': 'flv',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
'duration': 70,
},
'params': {
'skip_download': 'requires rtmpdump',
}
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'info_dict': {
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
'ext': 'flv',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
},
'params': {
'skip_download': True, # requires rtmpdump
'duration': 1482,
},
}]
@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
r"data-cnet-video(?:-uvp)?-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
mpx_account = data['config']['players']['default']['mpx_account']
vid = vdata['files'].get('rtmp', vdata['files']['hds'])
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
video_id = vdata['id']
title = vdata.get('headline')
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
thumbnail = vdata.get('image', {}).get('path')
title = vdata['title']
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):
uploader = None
uploader_id = None
mpx_account = data['config']['uvpConfig']['default']['mpx_account']
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
formats = []
subtitles = {}
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'url': tp_link,
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': metadata.get('thumbnail'),
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'subtitles': subtitles,
'formats': formats,
}

View File

@ -30,6 +30,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
determine_ext,
error_to_compat_str,
ExtractorError,
fix_xml_ampersands,
float_or_none,
@ -332,7 +333,8 @@ class InfoExtractor(object):
return False
if errnote is None:
errnote = 'Unable to download webpage'
errmsg = '%s: %s' % (errnote, compat_str(err))
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
@ -622,7 +624,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
return (username, password)

View File

@ -7,10 +7,10 @@ import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
error_to_compat_str,
ExtractorError,
int_or_none,
parse_iso8601,
sanitized_Request,
@ -278,7 +278,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@ -12,14 +12,11 @@ from ..compat import (
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class DramaFeverBaseIE(InfoExtractor):
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
}
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
feed = self._download_json(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
video_id, 'Downloading episode JSON')['channel']['item']
info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
media_group = feed.get('media-group', {})
formats = []
for media_content in media_group['media-content']:
src = media_content.get('@attributes', {}).get('url')
if not src:
continue
ext = determine_ext(src)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls'))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = media_group.get('media-title')
description = media_group.get('media-description')
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
thumbnail = self._proto_relative_url(
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
subtitles = {}
for media_subtitle in media_group.get('media-subTitle', []):
lang = media_subtitle.get('@attributes', {}).get('lang')
href = media_subtitle.get('@attributes', {}).get('href')
if not lang or not href:
continue
subtitles[lang] = [{
'ext': 'ttml',
'url': href,
}]
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
subtitles.setdefault('English', []).append({
info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
return info
class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -7,11 +7,11 @@ import socket
from .common import InfoExtractor
from ..compat import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
)
from ..utils import (
error_to_compat_str,
ExtractorError,
limit_length,
sanitized_Request,
@ -116,7 +116,7 @@ class FacebookIE(InfoExtractor):
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
return
def _real_initialize(self):

View File

@ -2,6 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
)
class FazIE(InfoExtractor):
@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
r'writeFLV\(\'(.+?)\',', webpage, 'config xml url')
r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
encodings = config.find('ENCODINGS')
encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
'format_id': code.lower(),
'quality': pref,
})
encoding = xpath_element(encodings, code)
if encoding:
encoding_url = xpath_text(encoding, 'FILENAME')
if encoding_url:
formats.append({
'url': encoding_url,
'format_id': code.lower(),
'quality': pref,
'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
})
self._sort_formats(formats)
descr = self._html_search_regex(
r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
'description': description.strip() if description else None,
'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
'duration': int_or_none(xpath_text(config, 'DURATION')),
}

View File

@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
ExtractorError,
js_to_json,
)
@ -32,24 +30,22 @@ class FKTVIE(InfoExtractor):
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
matches = re.search(
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
webpage)
if matches is None:
raise ExtractorError('Unable to extract the video')
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
poster, sources = matches.groups()
if poster is None:
self.report_warning('unable to extract thumbnail')
formats = []
for source in sources:
furl = source.get('src')
if furl:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
self._sort_formats(formats)
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
formats = [{
'url': furl,
'format_id': determine_ext(furl),
} for furl in urls]
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': poster,
'thumbnail': thumbnail,
}

View File

@ -1,67 +1,93 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
find_xpath_attr,
sanitized_Request,
int_or_none,
qualities,
)
class FlickrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': {
'id': '5645318632',
'ext': 'mp4',
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
"uploader_id": "forestwander-nature-pictures",
"title": "Dark Hollow Waterfalls"
'ext': 'mpg',
'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
'title': 'Dark Hollow Waterfalls',
'duration': 19,
'timestamp': 1303528740,
'upload_date': '20110423',
'uploader_id': '10922353@N03',
'uploader': 'Forest Wander',
'comment_count': int,
'view_count': int,
'tags': list,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_API_BASE_URL = 'https://api.flickr.com/services/rest?'
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = sanitized_Request(webpage_url)
req.add_header(
'User-Agent',
# it needs a more recent version
'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
webpage = self._download_webpage(req, video_id)
secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
node_id = find_xpath_attr(
first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
'id').text
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
stream = second_xml.find('.//STREAM')
if stream is None:
raise ExtractorError('Unable to extract video url')
video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id': video_uploader_id,
def _call_api(self, method, video_id, api_key, note, secret=None):
query = {
'photo_id': video_id,
'method': 'flickr.%s' % method,
'api_key': api_key,
'format': 'json',
'nojsoncallback': 1,
}
if secret:
query['secret'] = secret
data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
if data['stat'] != 'ok':
raise ExtractorError(data['message'])
return data
def _real_extract(self, url):
video_id = self._match_id(url)
api_key = self._download_json(
'https://www.flickr.com/hermes_error_beacon.gne', video_id,
'Downloading api key')['site_key']
video_info = self._call_api(
'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
if video_info['media'] == 'video':
streams = self._call_api(
'video.getStreamInfo', video_id, api_key,
'Downloading streams info', video_info['secret'])['streams']
preference = qualities(
['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
formats = []
for stream in streams['stream']:
stream_type = str(stream.get('type'))
formats.append({
'format_id': stream_type,
'url': stream['_content'],
'preference': preference(stream_type),
})
self._sort_formats(formats)
owner = video_info.get('owner', {})
return {
'id': video_id,
'title': video_info['title']['_content'],
'description': video_info.get('description', {}).get('_content'),
'formats': formats,
'timestamp': int_or_none(video_info.get('dateuploaded')),
'duration': int_or_none(video_info.get('video', {}).get('duration')),
'uploader_id': owner.get('nsid'),
'uploader': owner.get('realname'),
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
'view_count': int_or_none(video_info.get('views')),
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
}
else:
raise ExtractorError('not a video', expected=True)

View File

@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
from .amp import AMPIE
class FoxNewsIE(InfoExtractor):
class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
'description': '16-year-old girl is size of toddler',
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
# 'timestamp': 1304411491,
# 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action",
'description': "Congressman discusses president's plan",
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
# 'timestamp': 1417662047,
# 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
host, video_id = re.match(self._VALID_URL, url).groups()
video = self._download_json(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
item = video['channel']['item']
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
return info

View File

@ -44,7 +44,6 @@ from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
from .bliptv import BlipTVIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
@ -55,6 +54,8 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor):
@ -1440,11 +1441,6 @@ class GenericIE(InfoExtractor):
'id': match.group('id')
}
# Look for embedded blip.tv player
bliptv_url = BlipTVIE._extract_url(webpage)
if bliptv_url:
return self.url_result(bliptv_url, 'BlipTV')
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:
@ -1769,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive')
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@ -1796,6 +1797,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:

View File

@ -0,0 +1,88 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 46,
}
}
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage)
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': fmt_url,
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'formats': formats,
}

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
parse_duration,
)
class GPUTechConfIE(InfoExtractor):
_VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
_TEST = {
'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
'info_dict': {
'id': '5156',
'ext': 'mp4',
'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
'duration': 1219,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
metadata = xpath_element(doc, 'metadata')
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
mbr_videos = xpath_element(metadata, 'MBRVideos')
formats = []
for mbr_video in mbr_videos.findall('MBRVideo'):
stream_name = xpath_text(mbr_video, 'streamName')
if stream_name:
formats.append({
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': xpath_text(metadata, 'title'),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
'formats': formats,
}

View File

@ -205,8 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction
# last update at 2015-12-06 for Zombie::bite
enc_key = '3719f6a1da83ee0aee3488d8802d7696'[::-1]
# last update at 2015-12-18 for Zombie::bite
enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]
return enc_key
def _real_extract(self, url):

View File

@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TEST = {
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
'info_dict': {
'id': 'nPripu9l',
'ext': 'mov',
'title': 'Big Buck Bunny Trailer',
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
'upload_date': '20081127',
'timestamp': 1227796140,
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
video_data = json_data['playlist'][0]
subtitles = {}
for track in video_data['tracks']:
if track['kind'] == 'captions':
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
'vcodec': 'none',
})
else:
formats.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
'upload_date': '20150918',
'timestamp': 1442549540,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}

View File

@ -3,10 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
find_xpath_attr,
@ -189,7 +186,7 @@ class NBCNewsIE(InfoExtractor):
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text),
'description': info.find('caption').text,
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:

View File

@ -88,10 +88,10 @@ class NDRIE(NDRBaseIE):
'embedURL', webpage, 'embed URL', fatal=True)
description = self._search_regex(
r'<p[^>]+itemprop="description">([^<]+)</p>',
webpage, 'description', fatal=False)
webpage, 'description', default=None) or self._og_search_description(webpage)
timestamp = parse_iso8601(
self._search_regex(
r'<span itemprop="datePublished" content="([^"]+)">',
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
webpage, 'upload date', fatal=False))
return {
'_type': 'url_transparent',

View File

@ -9,6 +9,7 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
clean_html,
@ -82,14 +83,21 @@ class NocoIE(InfoExtractor):
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
@staticmethod
def _ts():
return int(time.time() * 1000)
def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000))
ts = compat_str(self._ts() + self._ts_offset)
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note)
request = sanitized_Request(url)
request.add_header('Referer', self._referer)
resp = self._download_json(request, video_id, note)
if isinstance(resp, dict) and resp.get('error'):
self._raise_error(resp['error'], resp['description'])
@ -102,8 +110,22 @@ class NocoIE(InfoExtractor):
expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
# Timestamp adjustment offset between server time and local time
# must be calculated in order to use timestamps closest to server's
# in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
webpage, 'noco player', group='player',
default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
ts = int_or_none(qs.get('ts', [None])[0])
self._ts_offset = ts - self._ts() if ts else 0
self._referer = player_url
medias = self._call_api(
'shows/%s/medias' % video_id,
@ -155,8 +177,8 @@ class NocoIE(InfoExtractor):
'format_id': format_id_extended,
'width': int_or_none(fmt.get('res_width')),
'height': int_or_none(fmt.get('res_lines')),
'abr': int_or_none(fmt.get('audiobitrate')),
'vbr': int_or_none(fmt.get('videobitrate')),
'abr': int_or_none(fmt.get('audiobitrate'), 1000),
'vbr': int_or_none(fmt.get('videobitrate'), 1000),
'filesize': int_or_none(fmt.get('filesize')),
'format_note': qualities[format_id].get('quality_name'),
'quality': qualities[format_id].get('priority'),

View File

@ -1,7 +1,10 @@
# encoding: utf-8
from __future__ import unicode_literals
from .brightcove import BrightcoveLegacyIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@ -23,9 +26,12 @@ class NownessBaseIE(InfoExtractor):
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'BrightcoveLegacy')
if bc_url:
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
bc_url = BrightcoveNewIE._extract_url(player_code)
if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
raise ExtractorError('Could not find player definition')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':

View File

@ -16,165 +16,165 @@ from ..utils import (
class PBSIE(InfoExtractor):
_STATIONS = (
('video.pbs.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
('video.aptv.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
('video.gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
('video.mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
('video.wnpt.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
('video.wfsu.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
('video.wsre.org', 'WSRE (WSRE)'), # http://www.wsre.org
('video.wtcitv.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
('video.pba.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
('video.alaskapublic.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
# ('kuac.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
# ('ktoo.org', '360 North (KTOO)'), # http://www.ktoo.org/
# ('azpm.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
('video.azpbs.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
('portal.knme.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
('video.vegaspbs.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
('watch.aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
('video.ket.org', 'KET (WKLE)'), # http://www.ket.org/
('video.wkno.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
('video.lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
('videos.oeta.tv', 'OETA (KETA)'), # http://www.oeta.tv
('video.optv.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
('watch.wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
('video.keet.org', 'KEET TV (KEET)'), # http://www.keet.org
('pbs.kixe.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
('video.kpbs.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
('video.kqed.org', 'KQED (KQED)'), # http://www.kqed.org
('vids.kvie.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
('video.pbssocal.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
('video.valleypbs.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
('video.cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
('watch.knpb.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
('video.soptv.org', 'SOPTV (KSYS)'), # http://www.soptv.org
# ('klcs.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
# ('krcb.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
# ('kvcr.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
('video.rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
('video.kenw.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
('video.kued.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
('video.wyomingpbs.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
('video.cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
('video.kbyueleven.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
('video.thirteen.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
('video.wgbh.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
('video.wgby.org', 'WGBY (WGBY)'), # http://www.wgby.org
('watch.njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
# ('ripbs.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
('watch.wliw.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
('video.mpt.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
('watch.weta.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
('video.whyy.org', 'WHYY (WHYY)'), # http://www.whyy.org
('video.wlvt.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
('video.wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
('video.whut.org', 'Howard University Television (WHUT)'), # http://www.whut.org
('video.wedu.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
('video.wgcu.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
# ('wjct.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
('video.wpbt2.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
('video.wucftv.org', 'WUCF TV (WUCF)'), # http://wucftv.org
('video.wuft.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
('watch.wxel.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
('video.wlrn.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
('video.wusf.usf.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
('video.scetv.org', 'ETV (WRLK)'), # http://www.scetv.org
('video.unctv.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
# ('pbsguam.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
('video.pbshawaii.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
('video.idahoptv.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
('video.ksps.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
('watch.opb.org', 'OPB (KOPB)'), # http://www.opb.org
('watch.nwptv.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
('video.will.illinois.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
('video.networkknowledge.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
('video.wttw.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
# ('wtvp.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
('video.iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
('video.ninenet.org', 'Nine Network (KETC)'), # http://www.ninenet.org
('video.wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
('video.wfyi.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
('video.mptv.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
('video.wnin.org', 'WNIN (WNIN)'), # http://www.wnin.org/
('video.wnit.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
('video.wpt.org', 'WPT (WPNE)'), # http://www.wpt.org/
('video.wvut.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
('video.weiu.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
('video.wqpt.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
('video.wycc.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
# ('lakeshorepublicmedia.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
('video.wipb.org', 'WIPB-TV (WIPB)'), # http://wipb.org
('video.indianapublicmedia.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
('watch.cetconnect.org', 'CET (WCET)'), # http://www.cetconnect.org
('video.thinktv.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
('video.wbgu.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
('video.wgvu.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
('video.netnebraska.org', 'NET1 (KUON)'), # http://netnebraska.org
('video.pioneer.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
('watch.sdpb.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
('video.tpt.org', 'TPT (KTCA)'), # http://www.tpt.org
('watch.ksmq.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
('watch.kpts.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
('watch.ktwu.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
# ('shptv.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
# ('kcpt.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
# ('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
('watch.easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
('video.wcte.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
('video.wljt.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
('video.wosu.org', 'WOSU TV (WOSU)'), # http://wosu.org/
('video.woub.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
('video.wvpublic.org', 'WVPB (WVPB)'), # http://wvpublic.org/
('video.wkyupbs.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
# ('wyes.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
('video.kera.org', 'KERA 13 (KERA)'), # http://www.kera.org/
('video.mpbn.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
('video.mountainlake.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
('video.nhptv.org', 'NHPTV (WENH)'), # http://nhptv.org/
('video.vpt.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
('video.witf.org', 'witf (WITF)'), # http://www.witf.org
('watch.wqed.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
('video.wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
('video.deltabroadcasting.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
('video.dptv.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
('video.wcmu.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
('video.wkar.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
('wnmuvideo.nmu.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
('video.wdse.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
('video.wgte.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
('video.lptv.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
# ('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
('video.kmos.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
('watch.montanapbs.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
('video.krwg.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
('video.kacvtv.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
('video.kcostv.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
('video.wcny.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
('video.wned.org', 'WNED (WNED)'), # http://www.wned.org/
('watch.wpbstv.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
('video.wskg.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
('video.wxxi.org', 'WXXI (WXXI)'), # http://wxxi.org
('video.wpsu.org', 'WPSU (WPSU)'), # http://www.wpsu.org
# ('wqln.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
('on-demand.wvia.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
('video.wtvi.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
# ('whro.org', 'WHRO (WHRO)'), # http://whro.org
('video.westernreservepublicmedia.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
('video.ideastream.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
('video.kcts9.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
('video.basinpbs.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
('video.houstonpbs.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
# ('tamu.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
# ('kedt.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
('video.klrn.org', 'KLRN (KLRN)'), # http://www.klrn.org
('video.klru.tv', 'KLRU (KLRU)'), # http://www.klru.org
# ('kmbh.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
# ('knct.org', 'KNCT (KNCT)'), # http://www.knct.org
# ('ktxt.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
('video.wtjx.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
('video.ideastations.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
('video.kbtc.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
(r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
(r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
(r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
(r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
(r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
(r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
(r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
# (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
# (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
# (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
(r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
(r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
(r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
(r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
(r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
(r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
(r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
(r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
(r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
(r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
(r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
(r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
(r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
(r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
(r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
(r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
(r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
(r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
(r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
(r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
# (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
# (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
# (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
(r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
(r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
(r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
(r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
(r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
(r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
(r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
(r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
(r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
(r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
# (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
(r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
(r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
(r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
(r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
(r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
(r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
(r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
(r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
(r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
# (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
(r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
(r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
(r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
(r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
(r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
(r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
(r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
(r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
# (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
(r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
(r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
(r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
(r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
(r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
(r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
(r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
(r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
# (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
(r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
(r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
(r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
(r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
(r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
(r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
(r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
(r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
(r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
(r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
(r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
(r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
# (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
(r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
(r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
(r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
(r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
(r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
(r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
(r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
(r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
(r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
(r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
(r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
(r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
(r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
# (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
# (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
# (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
(r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
(r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
(r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
(r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
(r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
(r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
(r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
# (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
(r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
(r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
(r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
(r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
(r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
(r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
(r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
(r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
(r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
(r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
(r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
(r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
(r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
(r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
(r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
(r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
# (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
(r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
(r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
(r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
(r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
(r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
(r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
(r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
(r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
(r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
(r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
(r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
# (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
(r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
(r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
# (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
(r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
(r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
(r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
(r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
(r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
# (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
# (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
(r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
(r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
# (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
# (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
# (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
(r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
(r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
(r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
)
IE_NAME = 'pbs'
@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
# Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
''' % '|'.join(re.escape(p) for p in list(zip(*_STATIONS))[0])
''' % '|'.join(list(zip(*_STATIONS))[0])
_TESTS = [
{

View File

@ -17,9 +17,9 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
_VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
_TEST = {
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
@ -36,7 +36,10 @@ class RutubeIE(InfoExtractor):
# It requires ffmpeg (m3u8 download)
'skip_download': True,
},
}
}, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
js_to_json,
qualities,
determine_ext,
)
class Tele13IE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
},
'params': {
# HTTP Error 404: Not Found
'skip_download': True,
},
},
{
'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
'md5': '867adf6a3b3fef932c68a71d70b70946',
'info_dict': {
'id': 'rOoKv2OMpOw',
'ext': 'mp4',
'title': 'Shooting star seen on 7-Sep-2015',
'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
'uploader': 'Porjai Jaturongkhakun',
'upload_date': '20150906',
'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
},
'add_ie': ['Youtube'],
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
preference = qualities(['Móvil', 'SD', 'HD'])
formats = []
urls = []
for f in sources:
format_url = f['file']
if format_url and format_url not in urls:
ext = determine_ext(format_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif YoutubeIE.suitable(format_url):
return self.url_result(format_url, 'Youtube')
else:
formats.append({
'url': format_url,
'format_id': f.get('label'),
'preference': preference(f.get('label')),
'ext': ext,
})
urls.append(format_url)
self._sort_formats(formats)
return {
'id': display_id,
'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
'description': self._html_search_meta('description', webpage, 'description'),
'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
'formats': formats,
}

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
_VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
_VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
_TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
@ -22,7 +22,7 @@ class TF1IE(InfoExtractor):
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
'id': '12043945',
'id': 'le-grand-mysterioso-chuggington-7085291-739',
'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
@ -32,22 +32,24 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
'skip': 'HTTP Error 410: Gone',
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
}, {
'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
'only_matching': True,
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_regex(
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
webpage, 'wat id', group='id')
wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
return self.url_result(wat_info['media']['url'], 'Wat')

View File

@ -0,0 +1,194 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
'id': '343115',
'ext': 'mp4',
'title': 'Lion Moms Premiere',
'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
'upload_date': '20150910',
'timestamp': 1441858274,
},
'params': {
'skip_download': 'm3u8 download',
}
}, {
'note': 'DRM-protected video',
'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
'title': 'Dug\'s Special Mission',
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
'upload_date': '20150827',
'timestamp': 1440644006,
},
'params': {
'skip_download': 'DRM-protected wvm download',
}
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
'title': '28th SEA Games (5 Show) - Episode 11',
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
'upload_date': '20150605',
'timestamp': 1433480166,
},
'params': {
'skip_download': 'DRM-protected wvm download',
},
'skip': 'm3u8 links are geo-restricted'
}, {
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
'only_matching': True,
}]
_FORMAT_PREFERENCES = {
'wvm-STBMain': -10,
'wvm-iPadMain': -20,
'wvm-iPhoneMain': -30,
'wvm-Android': -40,
}
_API_USER = 'tvpapi_147'
_API_PASS = '11111'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, note='Downloading video page')
api_user = self._search_regex(
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
default=self._API_USER, group='user')
api_pass = self._search_regex(
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
default=self._API_PASS, group='pass')
params = {
'initObj': {
'Locale': {
'LocaleLanguage': '',
'LocaleCountry': '',
'LocaleDevice': '',
'LocaleUserState': 0
},
'Platform': 0,
'SiteGuid': 0,
'DomainID': '0',
'UDID': '',
'ApiUser': api_user,
'ApiPass': api_pass
},
'MediaID': video_id,
'mediaType': 0,
}
req = sanitized_Request(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
json.dumps(params).encode('utf-8'))
info = self._download_json(req, video_id, 'Downloading video info json')
title = info['MediaName']
formats = []
for video_file in info.get('Files', []):
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
if not video_url or not vid_format:
continue
ext = determine_ext(video_url)
vid_format = vid_format.replace(' ', '')
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext in ('mp4', 'wvm'):
# wvm are drm-protected files
formats.append({
'ext': ext,
'url': video_url,
'format_id': vid_format,
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
'format_note': 'DRM-protected video' if ext == 'wvm' else None
})
if not formats:
# Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats)
duration = int_or_none(info.get('Duration'))
description = info.get('Description')
created_at = parse_iso8601(info.get('CreationDate') or None)
average_rating = float_or_none(info.get('Rating'))
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
thumbnails = []
for picture in info.get('Pictures', []):
if not isinstance(picture, dict):
continue
pic_url = picture.get('URL')
if not pic_url:
continue
thumbnail = {
'url': pic_url,
}
pic_size = picture.get('PicSize', '')
m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
if m:
thumbnail.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
thumbnails.append(thumbnail)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': created_at,
'average_rating': average_rating,
'view_count': view_count,
'like_count': like_count,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -23,6 +23,7 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
parse_filesize,
)
@ -184,6 +185,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'user28849593',
},
},
{
# contains original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
'uploader': 'The DMCI',
'uploader_id': 'dmci',
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
},
{
'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status',
@ -392,6 +407,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
comment_count = None
formats = []
download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
'X-Requested-With': 'XMLHttpRequest'})
download_data = self._download_json(download_request, video_id, fatal=False)
if download_data:
source_file = download_data.get('source_file')
if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
formats.append({
'url': source_file['download_url'],
'ext': source_file['extension'].lower(),
'width': int_or_none(source_file.get('width')),
'height': int_or_none(source_file.get('height')),
'filesize': parse_filesize(source_file.get('size')),
'format_id': source_file.get('public_name', 'Original'),
'preference': 1,
})
config_files = config['video'].get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []):
video_url = f.get('url')
@ -408,12 +438,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
subtitles = {}
text_tracks = config['request'].get('text_tracks')

View File

@ -221,7 +221,7 @@ class YoukuIE(InfoExtractor):
'Youku said: Sorry, this video is available in China only', expected=True)
else:
msg = 'Youku server reported error %i' % error.get('code')
if error is not None:
if error_note is not None:
msg += ': ' + error_note
raise ExtractorError(msg)

View File

@ -26,6 +26,7 @@ from ..compat import (
from ..utils import (
clean_html,
encode_dict,
error_to_compat_str,
ExtractorError,
float_or_none,
get_element_by_attribute,
@ -33,6 +34,7 @@ from ..utils import (
int_or_none,
orderedSet,
parse_duration,
remove_quotes,
remove_start,
sanitized_Request,
smuggle_url,
@ -395,12 +397,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
'creator': 'Icona Pop',
}
},
{
@ -411,9 +415,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
'creator': 'Justin Timberlake',
'age_limit': 18,
}
},
@ -492,10 +498,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM',
'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off',
'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
'creator': 'Taylor Swift',
},
'params': {
'youtube_include_dash_manifest': True,
@ -551,9 +559,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
},
'expected_warnings': [
'DASH manifest missing',
@ -701,10 +711,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
},
'params': {
'skip_download': True,
@ -892,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
sub_lang_list = {}
@ -1308,6 +1320,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
video_creator = clean_html(m_music.group('creator'))
else:
video_alt_title = video_creator = None
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
@ -1537,7 +1558,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'creator': video_creator,
'title': video_title,
'alt_title': video_alt_title,
'thumbnail': video_thumbnail,
'description': video_description,
'categories': video_categories,
@ -1752,6 +1775,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
},
}]
@classmethod
def suitable(cls, url):
return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
def _real_extract(self, url):
channel_id = self._match_id(url)
@ -1825,10 +1852,10 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url)
class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:user:playlists'
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user/channel playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:playlists'
_TESTS = [{
'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
@ -1845,6 +1872,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'id': 'igorkle1',
'title': 'Игорь Клейнер',
},
}, {
'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
'playlist_mincount': 17,
'info_dict': {
'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
'title': 'Chem Player',
},
}]

View File

@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
(?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),

View File

@ -9,7 +9,7 @@ import subprocess
import sys
from zipimport import zipimporter
from .compat import compat_str
from .utils import encode_compat_str
from .version import __version__
@ -61,7 +61,7 @@ def update_self(to_screen, verbose, opener):
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
return
if newversion == __version__:
@ -74,7 +74,7 @@ def update_self(to_screen, verbose, opener):
versions_info = json.loads(versions_info)
except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if 'signature' not in versions_info:
@ -123,7 +123,7 @@ def update_self(to_screen, verbose, opener):
urlh.close()
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@ -137,7 +137,7 @@ def update_self(to_screen, verbose, opener):
outf.write(newcontent)
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to write the new version')
return
@ -157,7 +157,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
return # Do not show premature success messages
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return
@ -169,7 +169,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh.close()
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@ -183,7 +183,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
outf.write(newcontent)
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return

View File

@ -1406,6 +1406,15 @@ def remove_end(s, end):
return s
def remove_quotes(s):
if s is None or len(s) < 2:
return s
for quote in ('"', "'", ):
if s[0] == quote and s[-1] == quote:
return s[1:-1]
return s
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip('/').split('/')[-1]
@ -1703,6 +1712,10 @@ def encode_dict(d, encoding='utf-8'):
return dict((encode(k), encode(v)) for k, v in d.items())
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
US_RATINGS = {
'G': 0,
'PG': 10,
@ -1797,6 +1810,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args)
def error_to_compat_str(err):
err_str = str(err)
# On python 2 error byte string must be decoded with proper
# encoding rather than ascii
if sys.version_info[0] < 3:
err_str = err_str.decode(preferredencoding())
return err_str
def mimetype2ext(mt):
_, _, res = mt.rpartition('/')
@ -1967,15 +1989,15 @@ def match_filter_func(filter_str):
def parse_dfxp_time_expr(time_expr):
if not time_expr:
return 0.0
return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
if mobj:
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
def srt_subtitles_timecode(seconds):
@ -2011,10 +2033,15 @@ def dfxp2srt(dfxp_data):
raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
dur = parse_dfxp_time_expr(para.attrib.get('dur'))
if begin_time is None:
continue
if not end_time:
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
if not dur:
continue
end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.12.13'
__version__ = '2015.12.21'