1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 16:49:58 +08:00

Merge branch 'track-upstream-master'

This commit is contained in:
Your Name 2014-08-17 14:17:01 -04:00
commit b41f8ed971
5 changed files with 156 additions and 3 deletions

View File

@ -126,6 +126,7 @@ from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .huffpost import HuffPostIE
from .hypem import HypemIE
from .iconosquare import IconosquareIE

View File

@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
video_id)
video_info = player_info.find('video')
f4m_info = self._download_xml(video_info.find('url').text, video_id)
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'

View File

@ -706,6 +706,13 @@ class GenericIE(InfoExtractor):
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='MTVServicesEmbedded')
# Look for embedded yahoo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Yahoo')
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:

View File

@ -0,0 +1,134 @@
from __future__ import unicode_literals
import re
import json
import random
import string
from .common import InfoExtractor
from ..utils import find_xpath_attr
class HowStuffWorksIE(InfoExtractor):
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
_TESTS = [
{
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
'info_dict': {
'id': '450221',
'display_id': 'cool-jobs-iditarod-musher',
'ext': 'flv',
'title': 'Cool Jobs - Iditarod Musher',
'description': 'md5:82bb58438a88027b8186a1fccb365f90',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
{
'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
'info_dict': {
'id': '553470',
'display_id': 'deadliest-catch-jakes-farewell-pots',
'ext': 'mp4',
'title': 'Deadliest Catch: Jake\'s Farewell Pots',
'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
{
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
'info_dict': {
'id': '440011',
'display_id': 'sword-swallowing-1-by-dan-meyer',
'ext': 'flv',
'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
mp4 = self._search_regex(
r'''(?xs)var\s+clip\s*=\s*{\s*
.+?\s*
content_id\s*:\s*%s\s*,\s*
.+?\s*
mp4\s*:\s*\[(.*?),?\]\s*
};\s*
videoData\.push\(clip\);''' % content_id,
webpage, 'mp4', fatal=False, default=None)
smil = self._download_xml(
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
content_id, 'Downloading video SMIL')
http_base = find_xpath_attr(
smil,
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
'name',
'httpBase').get('content')
def random_string(str_len=0):
return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
formats = []
if mp4:
for video in json.loads('[%s]' % mp4):
bitrate = video['bitrate']
fmt = {
'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
'format_id': bitrate,
}
m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
if m:
fmt['vbr'] = int(m.group('vbr'))
formats.append(fmt)
else:
for video in smil.findall(
'.//{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
vbr = int(video.attrib['system-bitrate']) / 1000
formats.append({
'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
'format_id': '%dk' % vbr,
'vbr': vbr,
})
self._sort_formats(formats)
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' : HowStuffWorks'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': content_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -15,7 +15,7 @@ from ..utils import (
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
_VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -46,12 +46,23 @@ class YahooIE(InfoExtractor):
'title': 'The World Loves Spider-Man',
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
}
}
},
{
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
'md5': '60e8ac193d8fb71997caa8fce54c6460',
'info_dict': {
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
'ext': 'mp4',
'title': "Yahoo Saves 'Community'",
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = mobj.group('url')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(