1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-14 05:22:51 +08:00

Merge branch 'master' of https://github.com/rg3/youtube-dl into upstream/master

This commit is contained in:
Mister Hat 2015-11-06 15:43:34 -06:00
commit b374f63129
13 changed files with 218 additions and 132 deletions

View File

@ -572,7 +572,7 @@ class YoutubeDL(object):
if v is not None) if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict) template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
tmpl = compat_expanduser(outtmpl) tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict filename = tmpl % template_dict
# Temporary fix for #4787 # Temporary fix for #4787
@ -580,7 +580,7 @@ class YoutubeDL(object):
# to workaround encoding issues with subprocess on python2 @ Windows # to workaround encoding issues with subprocess on python2 @ Windows
if sys.version_info < (3, 0) and sys.platform == 'win32': if sys.version_info < (3, 0) and sys.platform == 'win32':
filename = encodeFilename(filename, True).decode(preferredencoding()) filename = encodeFilename(filename, True).decode(preferredencoding())
return filename return sanitize_path(filename)
except ValueError as err: except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None return None

View File

@ -212,7 +212,10 @@ from .gfycat import GfycatIE
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
from .giga import GigaIE from .giga import GigaIE
from .glide import GlideIE from .glide import GlideIE
from .globo import GloboIE from .globo import (
GloboIE,
GloboArticleIE,
)
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE

View File

@ -14,79 +14,58 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
str_or_none,
) )
class GloboIE(InfoExtractor): class GloboIE(InfoExtractor):
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)' _VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist' _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s' _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
_VIDEOID_REGEXES = [
r'\bdata-video-id="(\d+)"',
r'\bdata-player-videosids="(\d+)"',
r'<div[^>]+\bid="(\d+)"',
]
_RESIGN_EXPIRATION = 86400 _RESIGN_EXPIRATION = 86400
_TESTS = [ _TESTS = [{
{ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/', 'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
'md5': '03ebf41cb7ade43581608b7d9b71fab0', 'info_dict': {
'info_dict': { 'id': '3607726',
'id': '3654973', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão', 'duration': 103.204,
'duration': 251.585, 'uploader': 'Globo.com',
'uploader': 'SporTV', 'uploader_id': '265',
'uploader_id': 698,
'like_count': int,
}
}, },
{ }, {
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 'url': 'http://globoplay.globo.com/v/4581987/',
'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
'info_dict': { 'info_dict': {
'id': '3607726', 'id': '4581987',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
'duration': 103.204, 'duration': 137.973,
'uploader': 'Globo.com', 'uploader': 'Rede Globo',
'uploader_id': 265, 'uploader_id': '196',
'like_count': int,
}
}, },
{ }, {
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', 'only_matching': True,
'info_dict': { }, {
'id': '3652183', 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
'ext': 'mp4', 'only_matching': True,
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', }, {
'duration': 110.711, 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
'uploader': 'Rede Globo', 'only_matching': True,
'uploader_id': 196, }, {
'like_count': int, 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
} 'only_matching': True,
}, }, {
{ 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', 'only_matching': True,
'md5': 'c1defca721ce25b2354e927d3e4b3dec', }]
'info_dict': {
'id': '3928201',
'ext': 'mp4',
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
'duration': 1472.906,
'uploader': 'Canal Brasil',
'uploader_id': 705,
'like_count': int,
}
},
]
class MD5(): class MD5:
HEX_FORMAT_LOWERCASE = 0 HEX_FORMAT_LOWERCASE = 0
HEX_FORMAT_UPPERCASE = 1 HEX_FORMAT_UPPERCASE = 1
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = '' BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
@ -353,9 +332,6 @@ class GloboIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
video = self._download_json( video = self._download_json(
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0] self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
@ -364,7 +340,7 @@ class GloboIE(InfoExtractor):
formats = [] formats = []
for resource in video['resources']: for resource in video['resources']:
resource_id = resource.get('_id') resource_id = resource.get('_id')
if not resource_id: if not resource_id or resource_id.endswith('manifest'):
continue continue
security = self._download_json( security = self._download_json(
@ -393,20 +369,23 @@ class GloboIE(InfoExtractor):
resource_url = resource['url'] resource_url = resource['url']
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4')) m3u8_formats = self._extract_m3u8_formats(
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else: else:
formats.append({ formats.append({
'url': signed_url, 'url': signed_url,
'format_id': resource_id, 'format_id': 'http-%s' % resource_id,
'height': resource.get('height'), 'height': int_or_none(resource.get('height')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
duration = float_or_none(video.get('duration'), 1000) duration = float_or_none(video.get('duration'), 1000)
like_count = int_or_none(video.get('likes'))
uploader = video.get('channel') uploader = video.get('channel')
uploader_id = video.get('channel_id') uploader_id = str_or_none(video.get('channel_id'))
return { return {
'id': video_id, 'id': video_id,
@ -414,6 +393,46 @@ class GloboIE(InfoExtractor):
'duration': duration, 'duration': duration,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'like_count': like_count,
'formats': formats 'formats': formats
} }
class GloboArticleIE(InfoExtractor):
_VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_VIDEOID_REGEXES = [
r'\bdata-video-id=["\'](\d{7,})',
r'\bdata-player-videosids=["\'](\d{7,})',
r'\bvideosIDs\s*:\s*["\'](\d{7,})',
r'\bdata-id=["\'](\d{7,})',
r'<div[^>]+\bid=["\'](\d{7,})',
]
_TESTS = [{
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
'info_dict': {
'id': '3652183',
'ext': 'mp4',
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
'duration': 110.711,
'uploader': 'Rede Globo',
'uploader_id': '196',
}
}, {
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
'only_matching': True,
}, {
'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
return self.url_result('globo:%s' % video_id, 'Globo')

View File

@ -82,6 +82,11 @@ class LyndaBaseIE(InfoExtractor):
expected=True) expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _logout(self):
self._download_webpage(
'http://www.lynda.com/ajax/logout.aspx', None,
'Logging out', 'Unable to log out', fatal=False)
class LyndaIE(LyndaBaseIE): class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda' IE_NAME = 'lynda'
@ -108,51 +113,47 @@ class LyndaIE(LyndaBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
page = self._download_webpage( video = self._download_json(
'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
video_id, 'Downloading video JSON') video_id, 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json: if 'Status' in video:
raise ExtractorError( raise ExtractorError(
'lynda returned error: %s' % video_json['Message'], expected=True) 'lynda returned error: %s' % video['Message'], expected=True)
if video_json['HasAccess'] is False: if video.get('HasAccess') is False:
self.raise_login_required('Video %s is only available for members' % video_id) self.raise_login_required('Video %s is only available for members' % video_id)
video_id = compat_str(video_json['ID']) video_id = compat_str(video.get('ID') or video_id)
duration = video_json['DurationInSeconds'] duration = int_or_none(video.get('DurationInSeconds'))
title = video_json['Title'] title = video['Title']
formats = [] formats = []
fmts = video_json.get('Formats') fmts = video.get('Formats')
if fmts: if fmts:
formats.extend([ formats.extend([{
{ 'url': f['Url'],
'url': fmt['Url'], 'ext': f.get('Extension'),
'ext': fmt['Extension'], 'width': int_or_none(f.get('Width')),
'width': fmt['Width'], 'height': int_or_none(f.get('Height')),
'height': fmt['Height'], 'filesize': int_or_none(f.get('FileSize')),
'filesize': fmt['FileSize'], 'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
'format_id': str(fmt['Resolution']) } for f in fmts if f.get('Url')])
} for fmt in fmts])
prioritized_streams = video_json.get('PrioritizedStreams') prioritized_streams = video.get('PrioritizedStreams')
if prioritized_streams: if prioritized_streams:
for prioritized_stream_id, prioritized_stream in prioritized_streams.items(): for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
formats.extend([ formats.extend([{
{ 'url': video_url,
'url': video_url, 'width': int_or_none(format_id),
'width': int_or_none(format_id), 'format_id': '%s-%s' % (prioritized_stream_id, format_id),
'format_id': '%s-%s' % (prioritized_stream_id, format_id), } for format_id, video_url in prioritized_stream.items()])
} for format_id, video_url in prioritized_stream.items()
])
self._check_formats(formats, video_id) self._check_formats(formats, video_id)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, page) subtitles = self.extract_subtitles(video_id)
return { return {
'id': video_id, 'id': video_id,
@ -183,7 +184,7 @@ class LyndaIE(LyndaBaseIE):
if srt: if srt:
return srt return srt
def _get_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
subs = self._download_json(url, None, False) subs = self._download_json(url, None, False)
if subs: if subs:
@ -205,12 +206,13 @@ class LyndaCourseIE(LyndaBaseIE):
course_path = mobj.group('coursepath') course_path = mobj.group('coursepath')
course_id = mobj.group('courseid') course_id = mobj.group('courseid')
page = self._download_webpage( course = self._download_json(
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON') course_id, 'Downloading course JSON')
course_json = json.loads(page)
if 'Status' in course_json and course_json['Status'] == 'NotFound': self._logout()
if course.get('Status') == 'NotFound':
raise ExtractorError( raise ExtractorError(
'Course %s does not exist' % course_id, expected=True) 'Course %s does not exist' % course_id, expected=True)
@ -220,12 +222,13 @@ class LyndaCourseIE(LyndaBaseIE):
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore # by single video API anymore
for chapter in course_json['Chapters']: for chapter in course['Chapters']:
for video in chapter['Videos']: for video in chapter.get('Videos', []):
if video['HasAccess'] is False: if video.get('HasAccess') is False:
unaccessible_videos += 1 unaccessible_videos += 1
continue continue
videos.append(video['ID']) if video.get('ID'):
videos.append(video['ID'])
if unaccessible_videos > 0: if unaccessible_videos > 0:
self._downloader.report_warning( self._downloader.report_warning(
@ -238,6 +241,6 @@ class LyndaCourseIE(LyndaBaseIE):
'Lynda') 'Lynda')
for video_id in videos] for video_id in videos]
course_title = course_json['Title'] course_title = course.get('Title')
return self.playlist_result(entries, course_id, course_title) return self.playlist_result(entries, course_id, course_title)

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import random import random
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
int_or_none, int_or_none,
@ -51,6 +52,8 @@ class MioMioIE(InfoExtractor):
mioplayer_path = self._search_regex( mioplayer_path = self._search_regex(
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
xml_config = self._search_regex( xml_config = self._search_regex(
r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;', r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
webpage, 'xml config') webpage, 'xml config')
@ -60,14 +63,12 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id) video_id)
# the following xml contains the actual configuration information on the video file(s) vid_config_request = compat_urllib_request.Request(
vid_config = self._download_xml(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
video_id) headers=http_headers)
http_headers = { # the following xml contains the actual configuration information on the video file(s)
'Referer': 'http://www.miomio.tv%s' % mioplayer_path, vid_config = self._download_xml(vid_config_request, video_id)
}
if not int_or_none(xpath_text(vid_config, 'timelength')): if not int_or_none(xpath_text(vid_config, 'timelength')):
raise ExtractorError('Unable to load videos!', expected=True) raise ExtractorError('Unable to load videos!', expected=True)

View File

@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor):
class NDRIE(NDRBaseIE): class NDRIE(NDRBaseIE):
IE_NAME = 'ndr' IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk' IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html' _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{ _TESTS = [{
# httpVideo, same content id # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
@ -78,6 +78,9 @@ class NDRIE(NDRBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
'only_matching': True,
}] }]
def _extract_embed(self, webpage, display_id): def _extract_embed(self, webpage, display_id):
@ -102,7 +105,7 @@ class NDRIE(NDRBaseIE):
class NJoyIE(NDRBaseIE): class NJoyIE(NDRBaseIE):
IE_NAME = 'njoy' IE_NAME = 'njoy'
IE_DESC = 'N-JOY' IE_DESC = 'N-JOY'
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
_TESTS = [{ _TESTS = [{
# httpVideo, same content id # httpVideo, same content id
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
@ -235,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE): class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed' IE_NAME = 'ndr:embed'
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@ -329,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE):
class NJoyEmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE):
IE_NAME = 'njoy:embed' IE_NAME = 'njoy:embed'
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
_TESTS = [{ _TESTS = [{
# httpVideo # httpVideo
'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',

View File

@ -8,6 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
strip_jsonp,
unified_strdate, unified_strdate,
US_RATINGS, US_RATINGS,
) )
@ -153,6 +154,22 @@ class PBSIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
}, },
},
{
# Frontline video embedded via flp2012.js
'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists',
'info_dict': {
'id': '2070868960',
'display_id': 'the-atomic-artists',
'ext': 'mp4',
'title': 'FRONTLINE - The Atomic Artists',
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
'duration': 723,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # requires ffmpeg
},
} }
] ]
_ERRORS = { _ERRORS = {
@ -191,9 +208,30 @@ class PBSIE(InfoExtractor):
if media_id: if media_id:
return media_id, presumptive_id, upload_date return media_id, presumptive_id, upload_date
url = self._search_regex( # Fronline video embedded via flp
r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', video_id = self._search_regex(
webpage, 'player URL') r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
if video_id:
# pkg_id calculation is reverse engineered from
# http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js
prg_id = self._search_regex(
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:]
if 'q' in prg_id:
prg_id = prg_id.split('q')[1]
prg_id = int(prg_id, 16)
getdir = self._download_json(
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
presumptive_id, 'Downloading getdir JSON',
transform_source=strip_jsonp)
return getdir['mid'], presumptive_id, upload_date
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
url = self._search_regex(
r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe,
'player URL', default=None, group='url')
if url:
break
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id') player_id = mobj.group('player_id')

View File

@ -12,7 +12,8 @@ from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor): class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope' IE_DESC = 'Periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
_TEST = { # Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'md5': '65b57957972e503fcbbaeed8f4fa04ca', 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
'info_dict': { 'info_dict': {
@ -25,11 +26,15 @@ class PeriscopeIE(InfoExtractor):
'uploader_id': '1465763', 'uploader_id': '1465763',
}, },
'skip': 'Expires in 24 hours', 'skip': 'Expires in 24 hours',
} }, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}]
def _call_api(self, method, token): def _call_api(self, method, value):
attribute = 'token' if len(value) > 13 else 'broadcast_id'
return self._download_json( return self._download_json(
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token) 'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
def _real_extract(self, url): def _real_extract(self, url):
token = self._match_id(url) token = self._match_id(url)

View File

@ -20,7 +20,7 @@ from ..utils import (
class ProSiebenSat1IE(InfoExtractor): class ProSiebenSat1IE(InfoExtractor):
IE_NAME = 'prosiebensat1' IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital' IE_DESC = 'ProSiebenSat.1 Digital'
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
_TESTS = [ _TESTS = [
{ {

View File

@ -8,7 +8,6 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_urlparse, compat_urlparse,
) )

View File

@ -281,9 +281,13 @@ class VKIE(InfoExtractor):
mobj.group(1) + ' ' + mobj.group(2) mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
view_count = str_to_int(self._search_regex( view_count = None
r'"mv_views_count_number"[^>]*>([\d,.]+) views<', views = self._html_search_regex(
info_page, 'view count', fatal=False)) r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
info_page, 'view count', fatal=False)
if views:
view_count = str_to_int(self._search_regex(
r'([\d,.]+)', views, 'view count', fatal=False))
formats = [{ formats = [{
'format_id': k, 'format_id': k,

View File

@ -1107,6 +1107,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not video_info: if not video_info:
video_info = get_video_info video_info = get_video_info
if 'token' in get_video_info: if 'token' in get_video_info:
# Different get_video_info requests may report different results, e.g.
# some may report video unavailability, but some may serve it without
# any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
# the original webpage as well as el=info and el=embedded get_video_info
# requests report video unavailability due to geo restriction while
# el=detailpage succeeds and returns valid data). This is probably
# due to YouTube measures against IP ranges of hosting providers.
# Working around by preferring the first succeeded video_info containing
# the token if no such video_info yet was found.
if 'token' not in video_info:
video_info = get_video_info
break break
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:

View File

@ -272,7 +272,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
return [], information return [], information
try: try:
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts) self.run_ffmpeg(path, new_path, acodec, more_opts)
except AudioConversionError as e: except AudioConversionError as e:
raise PostProcessingError( raise PostProcessingError(