1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-27 14:43:01 +08:00

Merge remote-tracking branch 'refs/remotes/rg3/master'

This commit is contained in:
forDream 2015-12-22 10:14:47 +08:00
commit d575ded6a7
50 changed files with 1477 additions and 931 deletions

View File

@ -757,7 +757,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
``` ```
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:

View File

@ -65,9 +65,9 @@
- **Bet** - **Bet**
- **Bild**: Bild.de - **Bild**: Bild.de
- **BiliBili** - **BiliBili**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx** - **blinkx**
- **blip.tv:user**
- **BlipTV**
- **Bloomberg** - **Bloomberg**
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek - **BR**: Bayerischer Rundfunk Mediathek
@ -80,7 +80,6 @@
- **BYUtv** - **BYUtv**
- **Camdemy** - **Camdemy**
- **CamdemyFolder** - **CamdemyFolder**
- **Canal13cl**
- **canalc2.tv** - **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS** - **CBS**
@ -210,7 +209,9 @@
- **GodTube** - **GodTube**
- **GoldenMoustache** - **GoldenMoustache**
- **Golem** - **Golem**
- **GoogleDrive**
- **Goshgay** - **Goshgay**
- **GPUTechConf**
- **Groupon** - **Groupon**
- **Hark** - **Hark**
- **HearThisAt** - **HearThisAt**
@ -252,6 +253,7 @@
- **Jove** - **Jove**
- **jpopsuki.tv** - **jpopsuki.tv**
- **Jukebox** - **Jukebox**
- **JWPlatform**
- **Kaltura** - **Kaltura**
- **KanalPlay**: Kanal 5/9/11 Play - **KanalPlay**: Kanal 5/9/11 Play
- **Kankan** - **Kankan**
@ -292,6 +294,7 @@
- **m6** - **m6**
- **macgamestore**: MacGameStore trailers - **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **MakerTV**
- **Malemotion** - **Malemotion**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
@ -551,6 +554,7 @@
- **TechTalks** - **TechTalks**
- **techtv.mit.edu** - **techtv.mit.edu**
- **ted** - **ted**
- **Tele13**
- **TeleBruxelles** - **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf** - **Telegraaf**
@ -573,6 +577,7 @@
- **TMZ** - **TMZ**
- **TMZArticle** - **TMZArticle**
- **TNAFlix** - **TNAFlix**
- **toggle**
- **tou.tv** - **tou.tv**
- **Toypics**: Toypics user profile - **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile - **ToypicsUser**: Toypics user profile
@ -711,6 +716,7 @@
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:playlist**: YouTube.com playlists - **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches - **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first - **youtube:search:date**: YouTube.com searches, newest videos first
@ -718,7 +724,6 @@
- **youtube:show**: YouTube.com (multi-season) shows - **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks** - **Zapiks**
- **ZDF** - **ZDF**

View File

@ -11,7 +11,6 @@ from test.helper import FakeYDL, md5
from youtube_dl.extractor import ( from youtube_dl.extractor import (
BlipTVIE,
YoutubeIE, YoutubeIE,
DailymotionIE, DailymotionIE,
TEDIE, TEDIE,
@ -145,18 +144,6 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles): class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871' url = 'http://vimeo.com/76979871'
IE = VimeoIE IE = VimeoIE

View File

@ -22,6 +22,7 @@ from youtube_dl.utils import (
DateRange, DateRange,
detect_exe_version, detect_exe_version,
determine_ext, determine_ext,
encode_compat_str,
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
@ -43,6 +44,7 @@ from youtube_dl.utils import (
sanitize_path, sanitize_path,
prepend_extension, prepend_extension,
replace_extension, replace_extension,
remove_quotes,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_to_int, str_to_int,
@ -200,6 +202,15 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None)
self.assertEqual(remove_quotes('"'), '"')
self.assertEqual(remove_quotes("'"), "'")
self.assertEqual(remove_quotes(';'), ';')
self.assertEqual(remove_quotes('";'), '";')
self.assertEqual(remove_quotes('""'), '')
self.assertEqual(remove_quotes('";"'), ';')
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([]), [])
@ -439,6 +450,10 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes)) self.assertTrue(isinstance(data, bytes))
def test_encode_compat_str(self):
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
def test_parse_iso8601(self): def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
@ -651,12 +666,13 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
{'like_count': 190, 'dislike_count': 10})) {'like_count': 190, 'dislike_count': 10}))
def test_parse_dfxp_time_expr(self): def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), 0.0) self.assertEqual(parse_dfxp_time_expr(None), None)
self.assertEqual(parse_dfxp_time_expr(''), 0.0) self.assertEqual(parse_dfxp_time_expr(''), None)
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1)
def test_dfxp2srt(self): def test_dfxp2srt(self):
dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
@ -666,6 +682,9 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
<p begin="1" end="2">第二行<br/></p> <p begin="1" end="2">第二行<br/></p>
<p begin="2" dur="1"><span>Third<br/>Line</span></p> <p begin="2" dur="1"><span>Third<br/>Line</span></p>
<p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
<p begin="-1" end="-1">Ignore, two</p>
<p begin="3" dur="-1">Ignored, three</p>
</div> </div>
</body> </body>
</tt>''' </tt>'''

View File

@ -47,7 +47,9 @@ from .utils import (
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
determine_ext, determine_ext,
DownloadError, DownloadError,
encode_compat_str,
encodeFilename, encodeFilename,
error_to_compat_str,
ExtractorError, ExtractorError,
format_bytes, format_bytes,
formatSeconds, formatSeconds,
@ -495,7 +497,7 @@ class YoutubeDL(object):
tb = '' tb = ''
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
tb += compat_str(traceback.format_exc()) tb += encode_compat_str(traceback.format_exc())
else: else:
tb_data = traceback.format_list(traceback.extract_stack()) tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data) tb = ''.join(tb_data)
@ -674,14 +676,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
else: else:
return ie_result return ie_result
except ExtractorError as de: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
break break
except MaxDownloadsReached: except MaxDownloadsReached:
raise raise
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break break
else: else:
raise raise
@ -1459,7 +1461,7 @@ class YoutubeDL(object):
if dn and not os.path.exists(dn): if dn and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err)) self.report_error('unable to create directory ' + error_to_compat_str(err))
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
@ -1510,7 +1512,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False) sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err: except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' % self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause))) (sub_lang, error_to_compat_str(err.cause)))
continue continue
try: try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format) sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@ -2039,4 +2041,4 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' % self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], compat_str(err))) (t['url'], error_to_compat_str(err)))

View File

@ -5,9 +5,9 @@ import re
import sys import sys
import time import time
from ..compat import compat_str
from ..utils import ( from ..utils import (
encodeFilename, encodeFilename,
error_to_compat_str,
decodeArgument, decodeArgument,
format_bytes, format_bytes,
timeconvert, timeconvert,
@ -186,7 +186,7 @@ class FileDownloader(object):
return return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err: except (IOError, OSError) as err:
self.report_error('unable to rename file: %s' % compat_str(err)) self.report_error('unable to rename file: %s' % error_to_compat_str(err))
def try_utime(self, filename, last_modified_hdr): def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""

View File

@ -61,8 +61,11 @@ from .beatportpro import BeatportProIE
from .bet import BetIE from .bet import BetIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
@ -78,7 +81,6 @@ from .camdemy import (
CamdemyIE, CamdemyIE,
CamdemyFolderIE CamdemyFolderIE
) )
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
@ -232,9 +234,11 @@ from .globo import (
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
@ -281,6 +285,7 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE from .kanalplay import KanalPlayIE
@ -335,6 +340,7 @@ from .lynda import (
from .m6 import M6IE from .m6 import M6IE
from .macgamestore import MacGameStoreIE from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE from .mailru import MailRuIE
from .makertv import MakerTVIE
from .malemotion import MalemotionIE from .malemotion import MalemotionIE
from .mdr import MDRIE from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
@ -647,6 +653,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
@ -675,6 +682,7 @@ from .tnaflix import (
EMPFlixIE, EMPFlixIE,
MovieFapIE, MovieFapIE,
) )
from .toggle import ToggleIE
from .thvideo import ( from .thvideo import (
THVideoIE, THVideoIE,
THVideoPlaylistIE THVideoPlaylistIE
@ -850,7 +858,7 @@ from .youtube import (
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubeUserPlaylistsIE, YoutubePlaylistsIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@ -23,6 +23,7 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67', 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
}, },
'skip': 'this video has expired',
}, { }, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086', 'md5': 'db2a5369238b51f9811ad815b69dc086',
@ -36,6 +37,7 @@ class ABCIE(InfoExtractor):
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
'skip': 'Not accessible from Travis CI server',
}, { }, {
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
@ -58,6 +60,9 @@ class ABCIE(InfoExtractor):
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);', r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage) webpage)
if mobj is None: if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError('Unable to extract video urls') raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json( urls_info = self._parse_json(

View File

@ -68,7 +68,7 @@ class AdultSwimIE(InfoExtractor):
'md5': '3e346a2ab0087d687a05e1e7f3b3e529', 'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': { 'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0', 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'flv', 'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
}, },
@ -79,6 +79,10 @@ class AdultSwimIE(InfoExtractor):
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
}, },
'params': {
# m3u8 download
'skip_download': True,
}
}] }]
@staticmethod @staticmethod

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
item = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
thumbnails = []
media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
formats = []
media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
media = media_data['@attributes']
media_type = media['type']
if media_type == 'video/f4m':
f4m_formats = self._extract_f4m_formats(
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif media_type == 'application/x-mpegURL':
m3u8_formats = self._extract_m3u8_formats(
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'format_id': media_data['media-category']['@attributes']['label'],
'url': media['url'],
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': get_media_node('title'),
'description': get_media_node('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
'formats': formats,
}

View File

@ -2,6 +2,8 @@ from __future__ import unicode_literals
import time import time
import hmac import hmac
import hashlib
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6, 'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'skip': 'This video is only available for registered users'
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': 're:^https?://.*\.jpg$',
},
}, },
{ {
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -83,58 +105,81 @@ class AtresPlayerIE(InfoExtractor):
episode_id = self._search_regex( episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id') r'episode="([^"]+)"', webpage, 'episode id')
request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')
episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
formats = []
video_url = player.get('urlVideo')
if video_url:
format_info = {
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)
m3u8_url = player.get('urlVideoHls')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
timestamp = int_or_none(self._download_webpage( timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL, self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new( token = hmac.new(
self._MAGIC.encode('ascii'), self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8') (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest() ).hexdigest()
formats = [] request = sanitized_Request(
for fmt in ['windows', 'android_tablet']: self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
request = sanitized_Request( headers={'User-Agent': self._USER_AGENT})
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)
fmt_json = self._download_json( fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt) request, video_id, 'Downloading windows video JSON')
result = fmt_json.get('resultDes') result = fmt_json.get('resultDes')
if result.lower() != 'ok': if result.lower() != 'ok':
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True) '%s returned error: %s' % (self.IE_NAME, result), expected=True)
for format_id, video_url in fmt_json['resultObject'].items(): for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'): if format_id == 'token' or not video_url.startswith('http'):
continue continue
if video_url.endswith('/Manifest'): if 'geodeswowsmpra3player' in video_url:
if 'geodeswowsmpra3player' in video_url: f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) # this videos are protected by DRM, the f4m downloader doesn't support them
# this videos are protected by DRM, the f4m downloader doesn't support them continue
continue else:
else: f4m_url = video_url[:-9] + '/manifest.f4m'
f4m_url = video_url[:-9] + '/manifest.f4m' f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
formats.extend(self._extract_f4m_formats(f4m_url, video_id)) if f4m_formats:
else: formats.extend(f4m_formats)
formats.append({
'url': video_url,
'format_id': 'android-%s' % format_id,
'preference': 1,
})
self._sort_formats(formats) self._sort_formats(formats)
player = self._download_json(
self._PLAYER_URL_TEMPLATE % episode_id,
episode_id)
path_data = player.get('pathData') path_data = player.get('pathData')
episode = self._download_xml( episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data, self._EPISODE_URL_TEMPLATE % path_data, video_id,
video_id, 'Downloading episode XML') 'Downloading episode XML')
duration = float_or_none(xpath_text( duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration')) episode, './media/asset/info/technical/contentDuration', 'duration'))

View File

@ -15,7 +15,7 @@ class AudiMediaIE(InfoExtractor):
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test', 'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66', 'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': { 'info_dict': {
'id': '1564', 'id': '1565',
'ext': 'mp4', 'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test', 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941', 'description': 'md5:60e5d30a78ced725f7b8d34370762941',

View File

@ -56,7 +56,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors # API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response: if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s', url) raise ExtractorError('Invalid url %s' % url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor # if so, pass the work off to the soundcloud extractor

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class BleacherReportIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
'info_dict': {
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'uploader_id': 6466954,
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
if primary_photo:
thumbnails = [{
'url': primary_photo['url'],
'width': primary_photo.get('width'),
'height': primary_photo.get('height'),
}]
info = {
'_type': 'url_transparent',
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
'view_count': int_or_none(article_data.get('hitCount')),
}
video = article_data.get('video')
if video:
video_type = video['type']
if video_type == 'cms.bleacherreport.com':
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
else:
info['url'] = video_type + video['id']
return info
else:
raise ExtractorError('no video in the article', expected=True)
class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': 'f0ca220af012d4df857b54f792c586bb',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
info['id'] = video_id
return info

View File

@ -1,290 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
)
class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
'info_dict': {
'id': '5779306',
'ext': 'm4v',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'timestamp': 1323138843,
'upload_date': '20111206',
'uploader': 'cbr',
'uploader_id': '679425',
'duration': 81,
}
},
{
# https://github.com/rg3/youtube-dl/pull/2274
'note': 'Video with subtitles',
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
'md5': '309f9d25b820b086ca163ffac8031806',
'info_dict': {
'id': '6586561',
'ext': 'mp4',
'title': 'Red vs. Blue Season 11 Episode 1',
'description': 'One-Zero-One',
'timestamp': 1371261608,
'upload_date': '20130615',
'uploader': 'redvsblue',
'uploader_id': '792887',
'duration': 279,
}
},
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
'info_dict': {
'id': '6573122',
'ext': 'mov',
'upload_date': '20130520',
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
'title': 'Red vs. Blue Season 11 Trailer',
'timestamp': 1369029609,
'uploader': 'redvsblue',
'uploader_id': '792887',
}
},
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
{
# missing duration
'url': 'http://blip.tv/rss/flash/6700880',
'info_dict': {
'id': '6684191',
'ext': 'm4v',
'title': 'Cowboy Bebop: Gateway Shuffle Review',
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
'timestamp': 1386639757,
'upload_date': '20131210',
'uploader': 'sfdebris',
'uploader_id': '706520',
}
}
]
@staticmethod
def _extract_url(webpage):
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return 'http://blip.tv/a/a-' + mobj.group(1)
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return mobj.group(1)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lookup_id = mobj.group('lookup_id')
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
urlh = self._request_webpage(
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
url = compat_urlparse.urlparse(urlh.geturl())
qs = compat_urlparse.parse_qs(url.query)
mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
def _x(p):
return xpath_with_ns(p, {
'blip': 'http://blip.tv/dtd/blip/1.0',
'media': 'http://search.yahoo.com/mrss/',
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
})
item = rss.find('channel/item')
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
title = xpath_text(item, 'title', 'title', fatal=True)
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
uploader = xpath_text(item, _x('blip:user'), 'uploader')
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
media_thumbnail = item.find(_x('media:thumbnail'))
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
else xpath_text(item, 'image', 'thumbnail'))
categories = [category.text for category in item.findall('category') if category is not None]
formats = []
subtitles_urls = {}
media_group = item.find(_x('media:group'))
for media_content in media_group.findall(_x('media:content')):
url = media_content.get('url')
role = media_content.get(_x('blip:role'))
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):
LANGS = {
'english': 'en',
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
'format_id': role,
'format_note': media_type,
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
'acodec': media_content.get(_x('blip:acodec')),
'filesize': media_content.get('filesize'),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'subtitles': subtitles,
}
def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
_TEST = {
'url': 'http://blip.tv/actone',
'info_dict': {
'id': 'actone',
'title': 'Act One: The Series',
},
'playlist_count': 5,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
title = self._og_search_title(page)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = []
pagenum = 1
while True:
url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(
url, username, 'Downloading video ids from page %d' % pagenum)
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(r'href="/([^"]+)"', page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(unescapeHTML(mobj.group(1)))
video_ids.extend(ids_in_page)
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._PAGE_SIZE:
break
pagenum += 1
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return self.playlist_result(
url_entries, playlist_title=title, playlist_id=username)

View File

@ -1,18 +1,21 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
xpath_element,
xpath_text,
) )
class BRIE(InfoExtractor): class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek' IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_BASE_URL = 'http://www.br.de'
_TESTS = [ _TESTS = [
{ {
@ -22,7 +25,7 @@ class BRIE(InfoExtractor):
'id': '48f656ef-287e-486f-be86-459122db22cc', 'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Die böse Überraschung', 'title': 'Die böse Überraschung',
'description': 'Betriebliche Altersvorsorge: Die böse Überraschung', 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
'duration': 180, 'duration': 180,
'uploader': 'Reinhard Weber', 'uploader': 'Reinhard Weber',
'upload_date': '20150422', 'upload_date': '20150422',
@ -30,23 +33,23 @@ class BRIE(InfoExtractor):
}, },
{ {
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
'md5': 'a44396d73ab6a68a69a568fae10705bb', 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
'info_dict': { 'info_dict': {
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
'ext': 'mp4', 'ext': 'flv',
'title': 'Manfred Schreiber ist tot', 'title': 'Manfred Schreiber ist tot',
'description': 'Abendschau kompakt: Manfred Schreiber ist tot', 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
'duration': 26, 'duration': 26,
} }
}, },
{ {
'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html', 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
'info_dict': { 'info_dict': {
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac', 'ext': 'aac',
'title': 'Kurzweilig und sehr bewegend', 'title': 'Kurzweilig und sehr bewegend',
'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend', 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
'duration': 296, 'duration': 296,
} }
}, },
@ -57,7 +60,7 @@ class BRIE(InfoExtractor):
'id': '6ba73750-d405-45d3-861d-1ce8c524e059', 'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer', 'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
'duration': 116, 'duration': 116,
} }
}, },
@ -68,7 +71,7 @@ class BRIE(InfoExtractor):
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Folge 1 - Metaphysik', 'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik', 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893, 'duration': 893,
'uploader': 'Eva Maria Steimle', 'uploader': 'Eva Maria Steimle',
'upload_date': '20140117', 'upload_date': '20140117',
@ -77,28 +80,31 @@ class BRIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) base_url, display_id = re.search(self._VALID_URL, url).groups()
page = self._download_webpage(url, display_id) page = self._download_webpage(url, display_id)
xml_url = self._search_regex( xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None) xml = self._download_xml(base_url + xml_url, display_id)
medias = [] medias = []
for xml_media in xml.findall('video') + xml.findall('audio'): for xml_media in xml.findall('video') + xml.findall('audio'):
media_id = xml_media.get('externalId')
media = { media = {
'id': xml_media.get('externalId'), 'id': media_id,
'title': xml_media.find('title').text, 'title': xpath_text(xml_media, 'title', 'title', True),
'duration': parse_duration(xml_media.find('duration').text), 'duration': parse_duration(xpath_text(xml_media, 'duration')),
'formats': self._extract_formats(xml_media.find('assets')), 'formats': self._extract_formats(xpath_element(
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')), xml_media, 'assets'), media_id),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()), 'thumbnails': self._extract_thumbnails(xpath_element(
'webpage_url': xml_media.find('permalink').text xml_media, 'teaserImage/variants'), base_url),
'description': xpath_text(xml_media, 'desc'),
'webpage_url': xpath_text(xml_media, 'permalink'),
'uploader': xpath_text(xml_media, 'author'),
} }
if xml_media.find('author').text: broadcast_date = xpath_text(xml_media, 'broadcastDate')
media['uploader'] = xml_media.find('author').text if broadcast_date:
if xml_media.find('broadcastDate').text: media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
medias.append(media) medias.append(media)
if len(medias) > 1: if len(medias) > 1:
@ -109,35 +115,58 @@ class BRIE(InfoExtractor):
raise ExtractorError('No media entries found') raise ExtractorError('No media entries found')
return medias[0] return medias[0]
def _extract_formats(self, assets): def _extract_formats(self, assets, media_id):
formats = []
def text_or_none(asset, tag): for asset in assets.findall('asset'):
elem = asset.find(tag) format_url = xpath_text(asset, ['downloadUrl', 'url'])
return None if elem is None else elem.text asset_type = asset.get('type')
if asset_type == 'HDS':
formats = [{ f4m_formats = self._extract_f4m_formats(
'url': text_or_none(asset, 'downloadUrl'), format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)
'ext': text_or_none(asset, 'mediaType'), if f4m_formats:
'format_id': asset.get('type'), formats.extend(f4m_formats)
'width': int_or_none(text_or_none(asset, 'frameWidth')), elif asset_type == 'HLS':
'height': int_or_none(text_or_none(asset, 'frameHeight')), m3u8_formats = self._extract_m3u8_formats(
'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')), format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)
'abr': int_or_none(text_or_none(asset, 'bitrateAudio')), if m3u8_formats:
'vcodec': text_or_none(asset, 'codecVideo'), formats.extend(m3u8_formats)
'acodec': text_or_none(asset, 'codecAudio'), else:
'container': text_or_none(asset, 'mediaType'), format_info = {
'filesize': int_or_none(text_or_none(asset, 'size')), 'ext': xpath_text(asset, 'mediaType'),
} for asset in assets.findall('asset') 'width': int_or_none(xpath_text(asset, 'frameWidth')),
if asset.find('downloadUrl') is not None] 'height': int_or_none(xpath_text(asset, 'frameHeight')),
'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
'vcodec': xpath_text(asset, 'codecVideo'),
'acodec': xpath_text(asset, 'codecAudio'),
'container': xpath_text(asset, 'mediaType'),
'filesize': int_or_none(xpath_text(asset, 'size')),
}
format_url = self._proto_relative_url(format_url)
if format_url:
http_format_info = format_info.copy()
http_format_info.update({
'url': format_url,
'format_id': 'http-%s' % asset_type,
})
formats.append(http_format_info)
server_prefix = xpath_text(asset, 'serverPrefix')
if server_prefix:
rtmp_format_info = format_info.copy()
rtmp_format_info.update({
'url': server_prefix,
'play_path': xpath_text(asset, 'fileName'),
'format_id': 'rtmp-%s' % asset_type,
})
formats.append(rtmp_format_info)
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_thumbnails(self, variants): def _extract_thumbnails(self, variants, base_url):
thumbnails = [{ thumbnails = [{
'url': self._BASE_URL + variant.find('url').text, 'url': base_url + xpath_text(variant, 'url'),
'width': int_or_none(variant.find('width').text), 'width': int_or_none(xpath_text(variant, 'width')),
'height': int_or_none(variant.find('height').text), 'height': int_or_none(xpath_text(variant, 'height')),
} for variant in variants.findall('variant')] } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails return thumbnails

View File

@ -355,7 +355,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(InfoExtractor): class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new' IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)' _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be', 'md5': 'c8100925723840d4b0d243f7025703be',
@ -387,14 +387,24 @@ class BrightcoveNewIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
}, {
# ref: prefixed video id
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
urls = BrightcoveNewIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
# Reference: # Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript) # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
entries = [] entries = []
@ -407,9 +417,10 @@ class BrightcoveNewIE(InfoExtractor):
for video_id, account_id, player_id, embed in re.findall( for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id # According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is # may be optional and what to do when it is
# According to [4] data-video-id may be prefixed with ref:
r'''(?sx) r'''(?sx)
<video[^>]+ <video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*? data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
</video>.*? </video>.*?
<script[^>]+ <script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/ src=["\'](?:https?:)?//players\.brightcove\.net/

View File

@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class Canal13clIE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': '1403022125',
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'twitter:description', webpage, 'description')
url = self._html_search_regex(
r'articuloVideo = \"(.*?)\"', webpage, 'url')
real_id = self._search_regex(
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
thumbnail = self._html_search_regex(
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
return {
'id': real_id,
'display_id': display_id,
'url': url,
'title': title,
'description': description,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
@ -34,18 +33,17 @@ class CinemassacreIE(InfoExtractor):
}, },
}, },
{ {
# blip.tv embedded video # Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de', 'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'info_dict': { 'info_dict': {
'id': '4065369', 'id': 'OEVzPCY2T-g',
'ext': 'flv', 'ext': 'mp4',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207', 'upload_date': '20061207',
'uploader': 'cinemassacre', 'uploader': 'Cinemassacre',
'uploader_id': '250778', 'uploader_id': 'JamesNintendoNerd',
'timestamp': 1283233867, 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
} }
}, },
{ {
@ -88,8 +86,6 @@ class CinemassacreIE(InfoExtractor):
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
], ],
webpage, 'player data URL', default=None, group='url') webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
playerdata_url = BlipTVIE._extract_url(webpage)
if not playerdata_url: if not playerdata_url:
raise ExtractorError('Unable to find player data') raise ExtractorError('Unable to find player data')

View File

@ -1,15 +1,11 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json from .theplatform import ThePlatformIE
from ..utils import int_or_none
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class CNETIE(InfoExtractor): class CNETIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'title': 'Hands-on with Microsoft Windows 8.1 Update', 'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff', 'uploader': 'Sarah Mitroff',
'duration': 70,
}, },
'params': {
'skip_download': 'requires rtmpdump',
}
}, { }, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'info_dict': { 'info_dict': {
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba', 'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
'ext': 'flv', 'ext': 'flv',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole', 'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda', 'uploader': 'Ashley Esqueda',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', 'duration': 1482,
},
'params': {
'skip_download': True, # requires rtmpdump
}, },
}] }]
@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex( data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'", r"data-cnet-video(?:-uvp)?-options='([^']+)'",
webpage, 'data json') webpage, 'data json')
data = json.loads(data_json) data = self._parse_json(data_json, display_id)
vdata = data['video'] vdata = data.get('video') or data['videos'][0]
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
mpx_account = data['config']['players']['default']['mpx_account']
vid = vdata['files'].get('rtmp', vdata['files']['hds'])
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
video_id = vdata['id'] video_id = vdata['id']
title = vdata.get('headline') title = vdata['title']
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author') author = vdata.get('author')
if author: if author:
uploader = '%s %s' % (author['firstName'], author['lastName']) uploader = '%s %s' % (author['firstName'], author['lastName'])
@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):
uploader = None uploader = None
uploader_id = None uploader_id = None
mpx_account = data['config']['uvpConfig']['default']['mpx_account']
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
formats = []
subtitles = {}
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
return { return {
'_type': 'url_transparent',
'url': tp_link,
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description,
'thumbnail': metadata.get('thumbnail'),
'duration': duration,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'thumbnail': thumbnail, 'subtitles': subtitles,
'formats': formats,
} }

View File

@ -30,6 +30,7 @@ from ..utils import (
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
determine_ext, determine_ext,
error_to_compat_str,
ExtractorError, ExtractorError,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
@ -332,7 +333,8 @@ class InfoExtractor(object):
return False return False
if errnote is None: if errnote is None:
errnote = 'Unable to download webpage' errnote = 'Unable to download webpage'
errmsg = '%s: %s' % (errnote, compat_str(err))
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal: if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else: else:
@ -622,7 +624,7 @@ class InfoExtractor(object):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err: except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
return (username, password) return (username, password)

View File

@ -7,10 +7,10 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
error_to_compat_str,
ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request, sanitized_Request,
@ -278,7 +278,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False) video_id, note=False)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {} return {}
info = json.loads(sub_list) info = json.loads(sub_list)
if (info['total'] > 0): if (info['total'] > 0):

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools import itertools
from .common import InfoExtractor from .amp import AMPIE
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse, compat_urllib_parse,
@ -12,14 +12,11 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request, sanitized_Request,
) )
class DramaFeverBaseIE(InfoExtractor): class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever' _NETRC_MACHINE = 'dramafever'
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058, 'timestamp': 1404336058,
'upload_date': '20140702', 'upload_date': '20140702',
'duration': 343, 'duration': 343,
} },
'params': {
# m3u8 download
'skip_download': True,
},
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.') video_id = self._match_id(url).replace('/', '.')
try: try:
feed = self._download_json( info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
video_id, 'Downloading episode JSON')['channel']['item']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, compat_HTTPError):
raise ExtractorError( raise ExtractorError(
'Currently unavailable in your country.', expected=True) 'Currently unavailable in your country.', expected=True)
raise raise
media_group = feed.get('media-group', {})
formats = []
for media_content in media_group['media-content']:
src = media_content.get('@attributes', {}).get('url')
if not src:
continue
ext = determine_ext(src)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls'))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = media_group.get('media-title')
description = media_group.get('media-description')
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
thumbnail = self._proto_relative_url(
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
subtitles = {}
for media_subtitle in media_group.get('media-subTitle', []):
lang = media_subtitle.get('@attributes', {}).get('lang')
href = media_subtitle.get('@attributes', {}).get('href')
if not lang or not href:
continue
subtitles[lang] = [{
'ext': 'ttml',
'url': href,
}]
series_id, episode_number = video_id.split('.') series_id, episode_number = video_id.split('.')
episode_info = self._download_json( episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode # We only need a single episode info, so restricting page size to one episode
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value: if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile') subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/': if subfile and subfile != 'http://www.dramafever.com/st/':
subtitles.setdefault('English', []).append({ info['subtitiles'].setdefault('English', []).append({
'ext': 'srt', 'ext': 'srt',
'url': subfile, 'url': subfile,
}) })
return { return info
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
class DramaFeverSeriesIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -7,11 +7,11 @@ import socket
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_http_client, compat_http_client,
compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..utils import ( from ..utils import (
error_to_compat_str,
ExtractorError, ExtractorError,
limit_length, limit_length,
sanitized_Request, sanitized_Request,
@ -116,7 +116,7 @@ class FacebookIE(InfoExtractor):
if re.search(r'id="checkpointSubmitButton"', check_response) is not None: if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning('unable to log in: %s' % compat_str(err)) self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
return return
def _real_initialize(self): def _real_initialize(self):

View File

@ -2,6 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
)
class FazIE(InfoExtractor): class FazIE(InfoExtractor):
@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
description = self._og_search_description(webpage)
config_xml_url = self._search_regex( config_xml_url = self._search_regex(
r'writeFLV\(\'(.+?)\',', webpage, 'config xml url') r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml( config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml') config_xml_url, video_id, 'Downloading config xml')
encodings = config.find('ENCODINGS') encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = [] formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']): for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
encoding = encodings.find(code) encoding = xpath_element(encodings, code)
if encoding is None: if encoding:
continue encoding_url = xpath_text(encoding, 'FILENAME')
encoding_url = encoding.find('FILENAME').text if encoding_url:
formats.append({ formats.append({
'url': encoding_url, 'url': encoding_url,
'format_id': code.lower(), 'format_id': code.lower(),
'quality': pref, 'quality': pref,
}) 'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
})
self._sort_formats(formats) self._sort_formats(formats)
descr = self._html_search_regex(
r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'formats': formats, 'formats': formats,
'description': descr, 'description': description.strip() if description else None,
'thumbnail': config.find('STILL/STILL_BIG').text, 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
'duration': int_or_none(xpath_text(config, 'DURATION')),
} }

View File

@ -1,12 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
ExtractorError, js_to_json,
) )
@ -32,24 +30,22 @@ class FKTVIE(InfoExtractor):
'http://fernsehkritik.tv/folge-%s/play' % episode, episode) 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex( title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title')) '<h3>([^<]+)</h3>', webpage, 'title'))
matches = re.search( thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>', sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
webpage)
if matches is None:
raise ExtractorError('Unable to extract the video')
poster, sources = matches.groups() formats = []
if poster is None: for source in sources:
self.report_warning('unable to extract thumbnail') furl = source.get('src')
if furl:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
self._sort_formats(formats)
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
formats = [{
'url': furl,
'format_id': determine_ext(furl),
} for furl in urls]
return { return {
'id': episode, 'id': episode,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': poster, 'thumbnail': thumbnail,
} }

View File

@ -1,67 +1,93 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, int_or_none,
sanitized_Request, qualities,
) )
class FlickrIE(InfoExtractor): class FlickrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': { 'info_dict': {
'id': '5645318632', 'id': '5645318632',
'ext': 'mp4', 'ext': 'mpg',
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
"uploader_id": "forestwander-nature-pictures", 'title': 'Dark Hollow Waterfalls',
"title": "Dark Hollow Waterfalls" 'duration': 19,
'timestamp': 1303528740,
'upload_date': '20110423',
'uploader_id': '10922353@N03',
'uploader': 'Forest Wander',
'comment_count': int,
'view_count': int,
'tags': list,
} }
} }
def _real_extract(self, url): _API_BASE_URL = 'https://api.flickr.com/services/rest?'
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') def _call_api(self, method, video_id, api_key, note, secret=None):
video_uploader_id = mobj.group('uploader_id') query = {
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id 'photo_id': video_id,
req = sanitized_Request(webpage_url) 'method': 'flickr.%s' % method,
req.add_header( 'api_key': api_key,
'User-Agent', 'format': 'json',
# it needs a more recent version 'nojsoncallback': 1,
'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
webpage = self._download_webpage(req, video_id)
secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
node_id = find_xpath_attr(
first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
'id').text
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
stream = second_xml.find('.//STREAM')
if stream is None:
raise ExtractorError('Unable to extract video url')
video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id': video_uploader_id,
} }
if secret:
query['secret'] = secret
data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
if data['stat'] != 'ok':
raise ExtractorError(data['message'])
return data
def _real_extract(self, url):
video_id = self._match_id(url)
api_key = self._download_json(
'https://www.flickr.com/hermes_error_beacon.gne', video_id,
'Downloading api key')['site_key']
video_info = self._call_api(
'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
if video_info['media'] == 'video':
streams = self._call_api(
'video.getStreamInfo', video_id, api_key,
'Downloading streams info', video_info['secret'])['streams']
preference = qualities(
['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
formats = []
for stream in streams['stream']:
stream_type = str(stream.get('type'))
formats.append({
'format_id': stream_type,
'url': stream['_content'],
'preference': preference(stream_type),
})
self._sort_formats(formats)
owner = video_info.get('owner', {})
return {
'id': video_id,
'title': video_info['title']['_content'],
'description': video_info.get('description', {}).get('_content'),
'formats': formats,
'timestamp': int_or_none(video_info.get('dateuploaded')),
'duration': int_or_none(video_info.get('video', {}).get('duration')),
'uploader_id': owner.get('nsid'),
'uploader': owner.get('realname'),
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
'view_count': int_or_none(video_info.get('views')),
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
}
else:
raise ExtractorError('not a video', expected=True)

View File

@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .amp import AMPIE
from ..utils import (
parse_iso8601,
int_or_none,
)
class FoxNewsIE(InfoExtractor): class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video' IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [ _TESTS = [
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480', 'id': '3937480',
'ext': 'flv', 'ext': 'flv',
'title': 'Frozen in Time', 'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', 'description': '16-year-old girl is size of toddler',
'duration': 265, 'duration': 265,
'timestamp': 1304411491, # 'timestamp': 1304411491,
'upload_date': '20110503', # 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
}, },
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001', 'id': '3922535568001',
'ext': 'mp4', 'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action", 'description': "Congressman discusses president's plan",
'duration': 292, 'duration': 292,
'timestamp': 1417662047, # 'timestamp': 1417662047,
'upload_date': '20141204', # 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
}, },
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) host, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
host = mobj.group('host')
video = self._download_json( info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
item = video['channel']['item'] return info
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@ -44,7 +44,6 @@ from .myvi import MyviIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .udn import UDNEmbedIE from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .bliptv import BlipTVIE
from .svt import SVTIE from .svt import SVTIE
from .pornhub import PornHubIE from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE from .xhamster import XHamsterEmbedIE
@ -55,6 +54,8 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1440,11 +1441,6 @@ class GenericIE(InfoExtractor):
'id': match.group('id') 'id': match.group('id')
} }
# Look for embedded blip.tv player
bliptv_url = BlipTVIE._extract_url(webpage)
if bliptv_url:
return self.url_result(bliptv_url, 'BlipTV')
# Look for SVT player # Look for SVT player
svt_url = SVTIE._extract_url(webpage) svt_url = SVTIE._extract_url(webpage)
if svt_url: if svt_url:
@ -1769,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url: if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive')
# Look for UDN embeds # Look for UDN embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@ -1796,6 +1797,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url: if snagfilms_url:
return self.url_result(snagfilms_url) return self.url_result(snagfilms_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds # Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None: if mobj is not None:

View File

@ -0,0 +1,88 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 46,
}
}
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage)
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': fmt_url,
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'formats': formats,
}

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
parse_duration,
)
class GPUTechConfIE(InfoExtractor):
_VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
_TEST = {
'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
'info_dict': {
'id': '5156',
'ext': 'mp4',
'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
'duration': 1219,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
metadata = xpath_element(doc, 'metadata')
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
mbr_videos = xpath_element(metadata, 'MBRVideos')
formats = []
for mbr_video in mbr_videos.findall('MBRVideo'):
stream_name = xpath_text(mbr_video, 'streamName')
if stream_name:
formats.append({
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': xpath_text(metadata, 'title'),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
'formats': formats,
}

View File

@ -205,8 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id): def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction # TODO: automatic key extraction
# last update at 2015-12-06 for Zombie::bite # last update at 2015-12-18 for Zombie::bite
enc_key = '3719f6a1da83ee0aee3488d8802d7696'[::-1] enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]
return enc_key return enc_key
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TEST = {
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
'info_dict': {
'id': 'nPripu9l',
'ext': 'mov',
'title': 'Big Buck Bunny Trailer',
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
'upload_date': '20081127',
'timestamp': 1227796140,
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
video_data = json_data['playlist'][0]
subtitles = {}
for track in video_data['tracks']:
if track['kind'] == 'captions':
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
'vcodec': 'none',
})
else:
formats.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
'upload_date': '20150918',
'timestamp': 1442549540,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}

View File

@ -3,10 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_str,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
@ -189,7 +186,7 @@ class NBCNewsIE(InfoExtractor):
'title': info.find('headline').text, 'title': info.find('headline').text,
'ext': 'flv', 'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text), 'description': info.find('caption').text,
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
} }
else: else:

View File

@ -88,10 +88,10 @@ class NDRIE(NDRBaseIE):
'embedURL', webpage, 'embed URL', fatal=True) 'embedURL', webpage, 'embed URL', fatal=True)
description = self._search_regex( description = self._search_regex(
r'<p[^>]+itemprop="description">([^<]+)</p>', r'<p[^>]+itemprop="description">([^<]+)</p>',
webpage, 'description', fatal=False) webpage, 'description', default=None) or self._og_search_description(webpage)
timestamp = parse_iso8601( timestamp = parse_iso8601(
self._search_regex( self._search_regex(
r'<span itemprop="datePublished" content="([^"]+)">', r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View File

@ -9,6 +9,7 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
@ -82,14 +83,21 @@ class NocoIE(InfoExtractor):
if 'erreur' in login: if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
@staticmethod
def _ts():
return int(time.time() * 1000)
def _call_api(self, path, video_id, note, sub_lang=None): def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000)) ts = compat_str(self._ts() + self._ts_offset)
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk) url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang: if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note) request = sanitized_Request(url)
request.add_header('Referer', self._referer)
resp = self._download_json(request, video_id, note)
if isinstance(resp, dict) and resp.get('error'): if isinstance(resp, dict) and resp.get('error'):
self._raise_error(resp['error'], resp['description']) self._raise_error(resp['error'], resp['description'])
@ -102,8 +110,22 @@ class NocoIE(InfoExtractor):
expected=True) expected=True)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
# Timestamp adjustment offset between server time and local time
# must be calculated in order to use timestamps closest to server's
# in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
webpage, 'noco player', group='player',
default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
ts = int_or_none(qs.get('ts', [None])[0])
self._ts_offset = ts - self._ts() if ts else 0
self._referer = player_url
medias = self._call_api( medias = self._call_api(
'shows/%s/medias' % video_id, 'shows/%s/medias' % video_id,
@ -155,8 +177,8 @@ class NocoIE(InfoExtractor):
'format_id': format_id_extended, 'format_id': format_id_extended,
'width': int_or_none(fmt.get('res_width')), 'width': int_or_none(fmt.get('res_width')),
'height': int_or_none(fmt.get('res_lines')), 'height': int_or_none(fmt.get('res_lines')),
'abr': int_or_none(fmt.get('audiobitrate')), 'abr': int_or_none(fmt.get('audiobitrate'), 1000),
'vbr': int_or_none(fmt.get('videobitrate')), 'vbr': int_or_none(fmt.get('videobitrate'), 1000),
'filesize': int_or_none(fmt.get('filesize')), 'filesize': int_or_none(fmt.get('filesize')),
'format_note': qualities[format_id].get('quality_name'), 'format_note': qualities[format_id].get('quality_name'),
'quality': qualities[format_id].get('priority'), 'quality': qualities[format_id].get('priority'),

View File

@ -1,7 +1,10 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .brightcove import BrightcoveLegacyIE from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
@ -23,9 +26,12 @@ class NownessBaseIE(InfoExtractor):
note='Downloading player JavaScript', note='Downloading player JavaScript',
errnote='Unable to download player JavaScript') errnote='Unable to download player JavaScript')
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code) bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None: if bc_url:
raise ExtractorError('Could not find player definition') return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
return self.url_result(bc_url, 'BrightcoveLegacy') bc_url = BrightcoveNewIE._extract_url(player_code)
if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
raise ExtractorError('Could not find player definition')
elif source == 'vimeo': elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube': elif source == 'youtube':

View File

@ -16,165 +16,165 @@ from ..utils import (
class PBSIE(InfoExtractor): class PBSIE(InfoExtractor):
_STATIONS = ( _STATIONS = (
('video.pbs.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/ (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
('video.aptv.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
('video.gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
('video.mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
('video.wnpt.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org (r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
('video.wfsu.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/ (r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
('video.wsre.org', 'WSRE (WSRE)'), # http://www.wsre.org (r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
('video.wtcitv.org', 'WTCI (WTCI)'), # http://www.wtcitv.org (r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
('video.pba.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/ (r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
('video.alaskapublic.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm (r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
# ('kuac.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/ # (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
# ('ktoo.org', '360 North (KTOO)'), # http://www.ktoo.org/ # (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
# ('azpm.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/ # (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
('video.azpbs.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org (r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
('portal.knme.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/ (r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
('video.vegaspbs.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/ (r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
('watch.aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/ (r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
('video.ket.org', 'KET (WKLE)'), # http://www.ket.org/ (r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
('video.wkno.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/ (r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
('video.lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/ (r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
('videos.oeta.tv', 'OETA (KETA)'), # http://www.oeta.tv (r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
('video.optv.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/ (r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
('watch.wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/ (r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
('video.keet.org', 'KEET TV (KEET)'), # http://www.keet.org (r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
('pbs.kixe.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/ (r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
('video.kpbs.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/ (r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
('video.kqed.org', 'KQED (KQED)'), # http://www.kqed.org (r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
('vids.kvie.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org (r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
('video.pbssocal.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/ (r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
('video.valleypbs.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/ (r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
('video.cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org (r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
('watch.knpb.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/ (r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
('video.soptv.org', 'SOPTV (KSYS)'), # http://www.soptv.org (r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
# ('klcs.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org # (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
# ('krcb.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org # (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
# ('kvcr.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org # (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
('video.rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org (r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
('video.kenw.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org (r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
('video.kued.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org (r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
('video.wyomingpbs.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org (r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
('video.cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/ (r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
('video.kbyueleven.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/ (r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
('video.thirteen.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org (r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
('video.wgbh.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org (r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
('video.wgby.org', 'WGBY (WGBY)'), # http://www.wgby.org (r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
('watch.njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/ (r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
# ('ripbs.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/ # (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
('watch.wliw.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/ (r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
('video.mpt.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org (r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
('watch.weta.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org (r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
('video.whyy.org', 'WHYY (WHYY)'), # http://www.whyy.org (r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
('video.wlvt.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/ (r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
('video.wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net (r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
('video.whut.org', 'Howard University Television (WHUT)'), # http://www.whut.org (r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
('video.wedu.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org (r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
('video.wgcu.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/ (r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
# ('wjct.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org # (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
('video.wpbt2.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org (r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
('video.wucftv.org', 'WUCF TV (WUCF)'), # http://wucftv.org (r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
('video.wuft.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org (r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
('watch.wxel.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/ (r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
('video.wlrn.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/ (r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
('video.wusf.usf.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/ (r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
('video.scetv.org', 'ETV (WRLK)'), # http://www.scetv.org (r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
('video.unctv.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/ (r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
# ('pbsguam.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/ # (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
('video.pbshawaii.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/ (r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
('video.idahoptv.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org (r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
('video.ksps.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/ (r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
('watch.opb.org', 'OPB (KOPB)'), # http://www.opb.org (r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
('watch.nwptv.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org (r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
('video.will.illinois.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/ (r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
('video.networkknowledge.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv (r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
('video.wttw.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/ (r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
# ('wtvp.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/ # (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
('video.iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/ (r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
('video.ninenet.org', 'Nine Network (KETC)'), # http://www.ninenet.org (r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
('video.wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/ (r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
('video.wfyi.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org (r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
('video.mptv.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org (r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
('video.wnin.org', 'WNIN (WNIN)'), # http://www.wnin.org/ (r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
('video.wnit.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/ (r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
('video.wpt.org', 'WPT (WPNE)'), # http://www.wpt.org/ (r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
('video.wvut.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/ (r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
('video.weiu.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net (r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
('video.wqpt.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org (r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
('video.wycc.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org (r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
# ('lakeshorepublicmedia.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/ # (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
('video.wipb.org', 'WIPB-TV (WIPB)'), # http://wipb.org (r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
('video.indianapublicmedia.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/ (r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
('watch.cetconnect.org', 'CET (WCET)'), # http://www.cetconnect.org (r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
('video.thinktv.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org (r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
('video.wbgu.org', 'WBGU-TV (WBGU)'), # http://wbgu.org (r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
('video.wgvu.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/ (r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
('video.netnebraska.org', 'NET1 (KUON)'), # http://netnebraska.org (r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
('video.pioneer.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org (r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
('watch.sdpb.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org (r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
('video.tpt.org', 'TPT (KTCA)'), # http://www.tpt.org (r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
('watch.ksmq.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/ (r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
('watch.kpts.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/ (r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
('watch.ktwu.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org (r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
# ('shptv.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org # (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
# ('kcpt.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/ # (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
# ('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/ # (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
('watch.easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org (r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
('video.wcte.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org (r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
('video.wljt.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/ (r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
('video.wosu.org', 'WOSU TV (WOSU)'), # http://wosu.org/ (r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
('video.woub.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5 (r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
('video.wvpublic.org', 'WVPB (WVPB)'), # http://wvpublic.org/ (r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
('video.wkyupbs.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org (r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
# ('wyes.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org # (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
('video.kera.org', 'KERA 13 (KERA)'), # http://www.kera.org/ (r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
('video.mpbn.net', 'MPBN (WCBB)'), # http://www.mpbn.net/ (r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
('video.mountainlake.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/ (r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
('video.nhptv.org', 'NHPTV (WENH)'), # http://nhptv.org/ (r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
('video.vpt.org', 'Vermont PBS (WETK)'), # http://www.vpt.org (r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
('video.witf.org', 'witf (WITF)'), # http://www.witf.org (r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
('watch.wqed.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/ (r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
('video.wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/ (r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
('video.deltabroadcasting.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org (r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
('video.dptv.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/ (r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
('video.wcmu.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org (r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
('video.wkar.org', 'WKAR-TV (WKAR)'), # http://wkar.org/ (r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
('wnmuvideo.nmu.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu (r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
('video.wdse.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/ (r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
('video.wgte.org', 'WGTE TV (WGTE)'), # http://www.wgte.org (r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
('video.lptv.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org (r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
# ('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/ # (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
('video.kmos.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/ (r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
('watch.montanapbs.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org (r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
('video.krwg.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org (r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
('video.kacvtv.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/ (r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
('video.kcostv.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org (r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
('video.wcny.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org (r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
('video.wned.org', 'WNED (WNED)'), # http://www.wned.org/ (r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
('watch.wpbstv.org', 'WPBS (WPBS)'), # http://www.wpbstv.org (r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
('video.wskg.org', 'WSKG Public TV (WSKG)'), # http://wskg.org (r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
('video.wxxi.org', 'WXXI (WXXI)'), # http://wxxi.org (r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
('video.wpsu.org', 'WPSU (WPSU)'), # http://www.wpsu.org (r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
# ('wqln.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org # (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
('on-demand.wvia.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/ (r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
('video.wtvi.org', 'WTVI (WTVI)'), # http://www.wtvi.org/ (r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
# ('whro.org', 'WHRO (WHRO)'), # http://whro.org # (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
('video.westernreservepublicmedia.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/ (r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
('video.ideastream.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/ (r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
('video.kcts9.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/ (r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
('video.basinpbs.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org (r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
('video.houstonpbs.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/ (r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
# ('tamu.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu # (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
# ('kedt.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org # (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
('video.klrn.org', 'KLRN (KLRN)'), # http://www.klrn.org (r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
('video.klru.tv', 'KLRU (KLRU)'), # http://www.klru.org (r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
# ('kmbh.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org # (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
# ('knct.org', 'KNCT (KNCT)'), # http://www.knct.org # (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
# ('ktxt.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org # (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
('video.wtjx.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/ (r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
('video.ideastations.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/ (r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
('video.kbtc.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org (r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
) )
IE_NAME = 'pbs' IE_NAME = 'pbs'
@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
# Player # Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
) )
''' % '|'.join(re.escape(p) for p in list(zip(*_STATIONS))[0]) ''' % '|'.join(list(zip(*_STATIONS))[0])
_TESTS = [ _TESTS = [
{ {

View File

@ -17,9 +17,9 @@ from ..utils import (
class RutubeIE(InfoExtractor): class RutubeIE(InfoExtractor):
IE_NAME = 'rutube' IE_NAME = 'rutube'
IE_DESC = 'Rutube videos' IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})' _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
_TEST = { _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
@ -36,7 +36,10 @@ class RutubeIE(InfoExtractor):
# It requires ffmpeg (m3u8 download) # It requires ffmpeg (m3u8 download)
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
js_to_json,
qualities,
determine_ext,
)
class Tele13IE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
},
'params': {
# HTTP Error 404: Not Found
'skip_download': True,
},
},
{
'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
'md5': '867adf6a3b3fef932c68a71d70b70946',
'info_dict': {
'id': 'rOoKv2OMpOw',
'ext': 'mp4',
'title': 'Shooting star seen on 7-Sep-2015',
'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
'uploader': 'Porjai Jaturongkhakun',
'upload_date': '20150906',
'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
},
'add_ie': ['Youtube'],
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
preference = qualities(['Móvil', 'SD', 'HD'])
formats = []
urls = []
for f in sources:
format_url = f['file']
if format_url and format_url not in urls:
ext = determine_ext(format_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif YoutubeIE.suitable(format_url):
return self.url_result(format_url, 'Youtube')
else:
formats.append({
'url': format_url,
'format_id': f.get('label'),
'preference': preference(f.get('label')),
'ext': ext,
})
urls.append(format_url)
self._sort_formats(formats)
return {
'id': display_id,
'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
'description': self._html_search_meta('description', webpage, 'description'),
'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
'formats': formats,
}

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor): class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player.""" """TF1 uses the wat.tv player."""
_VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': { 'info_dict': {
@ -22,7 +22,7 @@ class TF1IE(InfoExtractor):
}, { }, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html', 'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': { 'info_dict': {
'id': '12043945', 'id': 'le-grand-mysterioso-chuggington-7085291-739',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington', 'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.', 'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
@ -32,22 +32,24 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option # Sometimes wat serves the whole file with the --test option
'skip_download': True, 'skip_download': True,
}, },
'skip': 'HTTP Error 410: Gone',
}, { }, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html', 'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_regex( wat_id = self._html_search_regex(
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url') r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
embed_page = self._download_webpage(embed_url, video_id, webpage, 'wat id', group='id')
'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_info = self._download_json( wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id) 'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
return self.url_result(wat_info['media']['url'], 'Wat') return self.url_result(wat_info['media']['url'], 'Wat')

View File

@ -0,0 +1,194 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
'id': '343115',
'ext': 'mp4',
'title': 'Lion Moms Premiere',
'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
'upload_date': '20150910',
'timestamp': 1441858274,
},
'params': {
'skip_download': 'm3u8 download',
}
}, {
'note': 'DRM-protected video',
'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
'title': 'Dug\'s Special Mission',
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
'upload_date': '20150827',
'timestamp': 1440644006,
},
'params': {
'skip_download': 'DRM-protected wvm download',
}
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
'title': '28th SEA Games (5 Show) - Episode 11',
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
'upload_date': '20150605',
'timestamp': 1433480166,
},
'params': {
'skip_download': 'DRM-protected wvm download',
},
'skip': 'm3u8 links are geo-restricted'
}, {
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
'only_matching': True,
}]
_FORMAT_PREFERENCES = {
'wvm-STBMain': -10,
'wvm-iPadMain': -20,
'wvm-iPhoneMain': -30,
'wvm-Android': -40,
}
_API_USER = 'tvpapi_147'
_API_PASS = '11111'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, note='Downloading video page')
api_user = self._search_regex(
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
default=self._API_USER, group='user')
api_pass = self._search_regex(
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
default=self._API_PASS, group='pass')
params = {
'initObj': {
'Locale': {
'LocaleLanguage': '',
'LocaleCountry': '',
'LocaleDevice': '',
'LocaleUserState': 0
},
'Platform': 0,
'SiteGuid': 0,
'DomainID': '0',
'UDID': '',
'ApiUser': api_user,
'ApiPass': api_pass
},
'MediaID': video_id,
'mediaType': 0,
}
req = sanitized_Request(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
json.dumps(params).encode('utf-8'))
info = self._download_json(req, video_id, 'Downloading video info json')
title = info['MediaName']
formats = []
for video_file in info.get('Files', []):
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
if not video_url or not vid_format:
continue
ext = determine_ext(video_url)
vid_format = vid_format.replace(' ', '')
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext in ('mp4', 'wvm'):
# wvm are drm-protected files
formats.append({
'ext': ext,
'url': video_url,
'format_id': vid_format,
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
'format_note': 'DRM-protected video' if ext == 'wvm' else None
})
if not formats:
# Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats)
duration = int_or_none(info.get('Duration'))
description = info.get('Description')
created_at = parse_iso8601(info.get('CreationDate') or None)
average_rating = float_or_none(info.get('Rating'))
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
thumbnails = []
for picture in info.get('Pictures', []):
if not isinstance(picture, dict):
continue
pic_url = picture.get('URL')
if not pic_url:
continue
thumbnail = {
'url': pic_url,
}
pic_size = picture.get('PicSize', '')
m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
if m:
thumbnail.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
thumbnails.append(thumbnail)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': created_at,
'average_rating': average_rating,
'view_count': view_count,
'like_count': like_count,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -23,6 +23,7 @@ from ..utils import (
unsmuggle_url, unsmuggle_url,
urlencode_postdata, urlencode_postdata,
unescapeHTML, unescapeHTML,
parse_filesize,
) )
@ -184,6 +185,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'user28849593', 'uploader_id': 'user28849593',
}, },
}, },
{
# contains original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
'uploader': 'The DMCI',
'uploader_id': 'dmci',
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
},
{ {
'url': 'https://vimeo.com/109815029', 'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status', 'note': 'Video not completely processed, "failed" seed status',
@ -392,6 +407,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
comment_count = None comment_count = None
formats = [] formats = []
download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
'X-Requested-With': 'XMLHttpRequest'})
download_data = self._download_json(download_request, video_id, fatal=False)
if download_data:
source_file = download_data.get('source_file')
if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
formats.append({
'url': source_file['download_url'],
'ext': source_file['extension'].lower(),
'width': int_or_none(source_file.get('width')),
'height': int_or_none(source_file.get('height')),
'filesize': parse_filesize(source_file.get('size')),
'format_id': source_file.get('public_name', 'Original'),
'preference': 1,
})
config_files = config['video'].get('files') or config['request'].get('files', {}) config_files = config['video'].get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []): for f in config_files.get('progressive', []):
video_url = f.get('url') video_url = f.get('url')
@ -408,12 +438,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
m3u8_url = config_files.get('hls', {}).get('url') m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url: if m3u8_url:
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False) m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats: if m3u8_formats:
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting. # at the same time without actual units specified. This lead to wrong sorting.
self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id')) self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
subtitles = {} subtitles = {}
text_tracks = config['request'].get('text_tracks') text_tracks = config['request'].get('text_tracks')

View File

@ -221,7 +221,7 @@ class YoukuIE(InfoExtractor):
'Youku said: Sorry, this video is available in China only', expected=True) 'Youku said: Sorry, this video is available in China only', expected=True)
else: else:
msg = 'Youku server reported error %i' % error.get('code') msg = 'Youku server reported error %i' % error.get('code')
if error is not None: if error_note is not None:
msg += ': ' + error_note msg += ': ' + error_note
raise ExtractorError(msg) raise ExtractorError(msg)

View File

@ -26,6 +26,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
clean_html, clean_html,
encode_dict, encode_dict,
error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
get_element_by_attribute, get_element_by_attribute,
@ -33,6 +34,7 @@ from ..utils import (
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
remove_quotes,
remove_start, remove_start,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
@ -395,12 +397,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20120506', 'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773', 'description': 'md5:782e8651347686cba06e58f71ab51773',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'], 'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop', 'uploader': 'Icona Pop',
'uploader_id': 'IconaPop', 'uploader_id': 'IconaPop',
'creator': 'Icona Pop',
} }
}, },
{ {
@ -411,9 +415,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20130703', 'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373', 'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO', 'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO',
'creator': 'Justin Timberlake',
'age_limit': 18, 'age_limit': 18,
} }
}, },
@ -492,10 +498,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM', 'id': 'nfWlot6h_JM',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off', 'title': 'Taylor Swift - Shake It Off',
'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
'uploader': 'TaylorSwiftVEVO', 'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818', 'upload_date': '20140818',
'creator': 'Taylor Swift',
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
@ -551,9 +559,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20100430', 'upload_date': '20100430',
'uploader_id': 'deadmau5', 'uploader_id': 'deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360', 'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5', 'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)', 'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
}, },
'expected_warnings': [ 'expected_warnings': [
'DASH manifest missing', 'DASH manifest missing',
@ -701,10 +711,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg', 'id': 'lsguqyKfVQg',
'ext': 'mp4', 'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119', 'upload_date': '20151119',
'uploader_id': 'IronSoulElf', 'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf', 'uploader': 'IronSoulElf',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -892,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False) video_id, note=False)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {} return {}
sub_lang_list = {} sub_lang_list = {}
@ -1308,6 +1320,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date) upload_date = unified_strdate(upload_date)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
video_creator = clean_html(m_music.group('creator'))
else:
video_alt_title = video_creator = None
m_cat_container = self._search_regex( m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None) video_webpage, 'categories', default=None)
@ -1537,7 +1558,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
'upload_date': upload_date, 'upload_date': upload_date,
'creator': video_creator,
'title': video_title, 'title': video_title,
'alt_title': video_alt_title,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': video_description, 'description': video_description,
'categories': video_categories, 'categories': video_categories,
@ -1752,6 +1775,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
}, },
}] }]
@classmethod
def suitable(cls, url):
return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
@ -1825,10 +1852,10 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url) return super(YoutubeUserIE, cls).suitable(url)
class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor): class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user playlists' IE_DESC = 'YouTube.com user/channel playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:user:playlists' IE_NAME = 'youtube:playlists'
_TESTS = [{ _TESTS = [{
'url': 'http://www.youtube.com/user/ThirstForScience/playlists', 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
@ -1845,6 +1872,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'id': 'igorkle1', 'id': 'igorkle1',
'title': 'Игорь Клейнер', 'title': 'Игорь Клейнер',
}, },
}, {
'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
'playlist_mincount': 17,
'info_dict': {
'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
'title': 'Chem Player',
},
}] }]

View File

@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname): def extract_function(self, funcname):
func_m = re.search( func_m = re.search(
r'''(?x) r'''(?x)
(?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s* \((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % ( \{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)), re.escape(funcname), re.escape(funcname), re.escape(funcname)),

View File

@ -9,7 +9,7 @@ import subprocess
import sys import sys
from zipimport import zipimporter from zipimport import zipimporter
from .compat import compat_str from .utils import encode_compat_str
from .version import __version__ from .version import __version__
@ -61,7 +61,7 @@ def update_self(to_screen, verbose, opener):
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
except Exception: except Exception:
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.') to_screen('ERROR: can\'t find the current version. Please try again later.')
return return
if newversion == __version__: if newversion == __version__:
@ -74,7 +74,7 @@ def update_self(to_screen, verbose, opener):
versions_info = json.loads(versions_info) versions_info = json.loads(versions_info)
except Exception: except Exception:
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.') to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return return
if 'signature' not in versions_info: if 'signature' not in versions_info:
@ -123,7 +123,7 @@ def update_self(to_screen, verbose, opener):
urlh.close() urlh.close()
except (IOError, OSError): except (IOError, OSError):
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version') to_screen('ERROR: unable to download latest version')
return return
@ -137,7 +137,7 @@ def update_self(to_screen, verbose, opener):
outf.write(newcontent) outf.write(newcontent)
except (IOError, OSError): except (IOError, OSError):
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to write the new version') to_screen('ERROR: unable to write the new version')
return return
@ -157,7 +157,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
return # Do not show premature success messages return # Do not show premature success messages
except (IOError, OSError): except (IOError, OSError):
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version') to_screen('ERROR: unable to overwrite current version')
return return
@ -169,7 +169,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh.close() urlh.close()
except (IOError, OSError): except (IOError, OSError):
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version') to_screen('ERROR: unable to download latest version')
return return
@ -183,7 +183,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
outf.write(newcontent) outf.write(newcontent)
except (IOError, OSError): except (IOError, OSError):
if verbose: if verbose:
to_screen(compat_str(traceback.format_exc())) to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version') to_screen('ERROR: unable to overwrite current version')
return return

View File

@ -1406,6 +1406,15 @@ def remove_end(s, end):
return s return s
def remove_quotes(s):
if s is None or len(s) < 2:
return s
for quote in ('"', "'", ):
if s[0] == quote and s[-1] == quote:
return s[1:-1]
return s
def url_basename(url): def url_basename(url):
path = compat_urlparse.urlparse(url).path path = compat_urlparse.urlparse(url).path
return path.strip('/').split('/')[-1] return path.strip('/').split('/')[-1]
@ -1703,6 +1712,10 @@ def encode_dict(d, encoding='utf-8'):
return dict((encode(k), encode(v)) for k, v in d.items()) return dict((encode(k), encode(v)) for k, v in d.items())
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
US_RATINGS = { US_RATINGS = {
'G': 0, 'G': 0,
'PG': 10, 'PG': 10,
@ -1797,6 +1810,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args) return ' '.join(shlex_quote(a) for a in args)
def error_to_compat_str(err):
err_str = str(err)
# On python 2 error byte string must be decoded with proper
# encoding rather than ascii
if sys.version_info[0] < 3:
err_str = err_str.decode(preferredencoding())
return err_str
def mimetype2ext(mt): def mimetype2ext(mt):
_, _, res = mt.rpartition('/') _, _, res = mt.rpartition('/')
@ -1967,15 +1989,15 @@ def match_filter_func(filter_str):
def parse_dfxp_time_expr(time_expr): def parse_dfxp_time_expr(time_expr):
if not time_expr: if not time_expr:
return 0.0 return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj: if mobj:
return float(mobj.group('time_offset')) return float(mobj.group('time_offset'))
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
if mobj: if mobj:
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
def srt_subtitles_timecode(seconds): def srt_subtitles_timecode(seconds):
@ -2011,10 +2033,15 @@ def dfxp2srt(dfxp_data):
raise ValueError('Invalid dfxp/TTML subtitle') raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)): for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin']) begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end')) end_time = parse_dfxp_time_expr(para.attrib.get('end'))
dur = parse_dfxp_time_expr(para.attrib.get('dur'))
if begin_time is None:
continue
if not end_time: if not end_time:
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) if not dur:
continue
end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % ( out.append('%d\n%s --> %s\n%s\n\n' % (
index, index,
srt_subtitles_timecode(begin_time), srt_subtitles_timecode(begin_time),

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.12.13' __version__ = '2015.12.21'