1
0
mirror of https://github.com/l1ving/youtube-dl synced 2026-06-08 00:04:47 +08:00
This commit is contained in:
luceatnobis
2015-09-29 14:06:26 +02:00
Unverified
36 changed files with 516 additions and 341 deletions
+1
View File
@@ -5,6 +5,7 @@ python:
- "3.2"
- "3.3"
- "3.4"
- "3.5"
sudo: false
script: nosetests test --verbose
notifications:
+1
View File
@@ -143,3 +143,4 @@ Shaun Walbridge
Lee Jenkins
Anssi Hannula
Lukáš Lalinský
Qijiang Fan
+1
View File
@@ -281,6 +281,7 @@ The `-o` option allows users to indicate a template for the output file names. T
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
- `duration`: The sequence will be replaced by the length of the video in seconds.
The current default template is `%(title)s-%(id)s.%(ext)s`.
+1 -1
View File
@@ -5,7 +5,7 @@ import os
from os.path import dirname as dirn
import sys
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import youtube_dl
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
+1 -1
View File
@@ -6,7 +6,7 @@ import os
from os.path import dirname as dirn
import sys
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import youtube_dl
from youtube_dl.utils import shell_quote
+1 -1
View File
@@ -6,7 +6,7 @@ import os
import textwrap
# We must be able to import youtube_dl
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import youtube_dl
+1 -1
View File
@@ -9,7 +9,7 @@ import sys
# Import youtube_dl
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(ROOT_DIR)
sys.path.insert(0, ROOT_DIR)
import youtube_dl
+1 -1
View File
@@ -5,7 +5,7 @@ import os
from os.path import dirname as dirn
import sys
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import youtube_dl
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
+11 -10
View File
@@ -101,7 +101,7 @@
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **Cracked**
- **Criterion**
- **CrooksAndLiars**
@@ -122,7 +122,6 @@
- **defense.gouv.fr**
- **DHM**: Filmarchiv - Deutsches Historisches Museum
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
- **DouyuTV**: 斗鱼
- **dramafever**
@@ -159,7 +158,6 @@
- **faz.net**
- **fc2**
- **fernsehkritik.tv**
- **fernsehkritik.tv:postecke**
- **Firstpost**
- **FiveTV**
- **Flickr**
@@ -209,7 +207,6 @@
- **hitbox**
- **hitbox:live**
- **HornBunny**
- **HostingBulk**
- **HotNewHipHop**
- **Howcast**
- **HowStuffWorks**
@@ -286,7 +283,7 @@
- **Minhateca**
- **MinistryGrid**
- **miomio.tv**
- **mitele.es**
- **MiTele**: mitele.es
- **mixcloud**
- **MLB**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
@@ -317,7 +314,6 @@
- **Myvi**
- **myvideo**
- **MyVidster**
- **N-JOY**
- **n-tv.de**
- **NationalGeographic**
- **Naver**
@@ -326,7 +322,9 @@
- **NBCNews**
- **NBCSports**
- **NBCSportsVPlayer**
- **ndr**: NDR.de - Mediathek
- **ndr**: NDR.de - Norddeutscher Rundfunk
- **ndr:embed**
- **ndr:embed:base**
- **NDTV**
- **NerdCubedFeed**
- **Nerdist**
@@ -349,12 +347,16 @@
- **nhl.com:videocenter**: NHL videocenter category
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **njoy**: N-JOY
- **njoy:embed**
- **Noco**
- **Normalboots**
- **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **novamov**: NovaMov
- **Nowness**
- **nowness**
- **nowness:playlist**
- **nowness:series**
- **NowTV**
- **nowvideo**: NowVideo
- **npo**: npo.nl and ntr.nl
@@ -375,7 +377,6 @@
- **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
- **OpenFilm**
- **orf:fm4**: radio FM4
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
@@ -530,7 +531,7 @@
- **techtv.mit.edu**
- **ted**
- **TeleBruxelles**
- **telecinco.es**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**
- **TeleMB**
- **TeleTask**
+1 -1
View File
@@ -1,5 +1,5 @@
[tox]
envlist = py26,py27,py33,py34
envlist = py26,py27,py33,py34,py35
[testenv]
deps =
nose
+1 -1
View File
@@ -11,7 +11,7 @@ if __package__ is None and not hasattr(sys, "frozen"):
# direct call of __main__.py
import os.path
path = os.path.realpath(os.path.abspath(__file__))
sys.path.append(os.path.dirname(os.path.dirname(path)))
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
import youtube_dl
+12 -3
View File
@@ -28,9 +28,18 @@ class HlsFD(FileDownloader):
return False
ffpp.check_version()
args = [
encodeArgument(opt)
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
args = [ffpp.executable, '-y']
if info_dict['http_headers']:
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args += [
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())]
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
+1 -5
View File
@@ -169,10 +169,7 @@ from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .fivetv import FiveTVIE
from .fktv import (
FKTVIE,
FKTVPosteckeIE,
)
from .fktv import FKTVIE
from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
@@ -228,7 +225,6 @@ from .historicfilms import HistoricFilmsIE
from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
+11 -1
View File
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
find_xpath_attr,
unified_strdate,
@@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
webpage, 'json vp url')
webpage, 'json vp url', default=None)
if not json_url:
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
webpage, 'iframe url', group='url')
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
+19 -1
View File
@@ -21,6 +21,9 @@ class BBCCoUkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf)
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
]
@@ -153,6 +156,21 @@ class BBCCoUkIE(InfoExtractor):
'skip_download': True,
},
'skip': 'geolocation',
}, {
# iptv-all mediaset fails with geolocation however there is no geo restriction
# for this programme at all
'url': 'http://www.bbc.co.uk/programmes/b06bp7lf',
'info_dict': {
'id': 'b06bp7kf',
'ext': 'flv',
'title': "Annie Mac's Friday Night, B.Traits sits in for Annie",
'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.',
'duration': 10800,
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
@@ -294,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id)
except BBCCoUkIE.MediaSelectionError as e:
if e.id == 'notukerror':
if e.id in ('notukerror', 'geolocation'):
last_exception = e
continue
self._raise_extractor_error(e)
+7
View File
@@ -152,6 +152,7 @@ class InfoExtractor(object):
description: Full video description.
uploader: Full name of the video uploader.
creator: The main artist who created the video.
release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
@@ -516,6 +517,12 @@ class InfoExtractor(object):
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
expected=True)
@staticmethod
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
raise ExtractorError(
'%s. You might want to use --proxy to workaround.' % msg,
expected=True)
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
+41 -13
View File
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import (
@@ -12,6 +11,7 @@ from ..compat import (
)
from ..utils import (
orderedSet,
remove_end,
)
@@ -24,21 +24,33 @@ class CondeNastIE(InfoExtractor):
# The keys are the supported sites and the values are the name to be shown
# to the user and in the extractor description.
_SITES = {
'wired': 'WIRED',
'gq': 'GQ',
'vogue': 'Vogue',
'glamour': 'Glamour',
'wmagazine': 'W Magazine',
'vanityfair': 'Vanity Fair',
'allure': 'Allure',
'architecturaldigest': 'Architectural Digest',
'arstechnica': 'Ars Technica',
'bonappetit': 'Bon Appétit',
'brides': 'Brides',
'cnevids': 'Condé Nast',
'cntraveler': 'Condé Nast Traveler',
'details': 'Details',
'epicurious': 'Epicurious',
'glamour': 'Glamour',
'golfdigest': 'Golf Digest',
'gq': 'GQ',
'newyorker': 'The New Yorker',
'self': 'SELF',
'teenvogue': 'Teen Vogue',
'vanityfair': 'Vanity Fair',
'vogue': 'Vogue',
'wired': 'WIRED',
'wmagazine': 'W Magazine',
}
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
_TEST = {
_TESTS = [{
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
'md5': '1921f713ed48aabd715691f774c451f7',
'info_dict': {
@@ -47,7 +59,16 @@ class CondeNastIE(InfoExtractor):
'title': '3D Printed Speakers Lit With LED',
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
}
}
}, {
# JS embed
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
'md5': 'f1a6f9cafb7083bab74a710f65d08999',
'info_dict': {
'id': '55f9cf8b61646d1acf00000c',
'ext': 'mp4',
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
}
}]
def _extract_series(self, url, webpage):
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
@@ -86,8 +107,8 @@ class CondeNastIE(InfoExtractor):
info_url = base_info_url + data
info_page = self._download_webpage(info_url, video_id,
'Downloading video info')
video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
video_info = json.loads(video_info)
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
video_info = self._parse_json(video_info, video_id)
formats = [{
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
@@ -111,6 +132,13 @@ class CondeNastIE(InfoExtractor):
url_type = mobj.group('type')
item_id = mobj.group('id')
# Convert JS embed to regular embed
if url_type == 'embedjs':
parsed_url = compat_urlparse.urlparse(url)
url = compat_urlparse.urlunparse(parsed_url._replace(
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
url_type = 'embed'
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, item_id)
+19 -8
View File
@@ -21,7 +21,7 @@ class EaglePlatformIE(InfoExtractor):
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
'info_dict': {
'id': '227304',
'ext': 'mp4',
@@ -36,7 +36,7 @@ class EaglePlatformIE(InfoExtractor):
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '6c2ebeab03b739597ce8d86339d5a905',
'md5': '90b26344ba442c8e44aa4cf8f301164a',
'info_dict': {
'id': '12820',
'ext': 'mp4',
@@ -48,7 +48,8 @@ class EaglePlatformIE(InfoExtractor):
'skip': 'Georestricted',
}]
def _handle_error(self, response):
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
@@ -58,6 +59,9 @@ class EaglePlatformIE(InfoExtractor):
self._handle_error(response)
return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
@@ -69,7 +73,7 @@ class EaglePlatformIE(InfoExtractor):
title = media['title']
description = media.get('description')
thumbnail = media.get('snapshot')
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
@@ -78,13 +82,20 @@ class EaglePlatformIE(InfoExtractor):
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
m3u8_data = self._download_json(
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
video_id, 'Downloading m3u8 JSON')
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
formats = self._extract_m3u8_formats(
m3u8_data['data'][0], video_id,
m3u8_url, video_id,
'mp4', entry_protocol='m3u8_native')
mp4_url = self._get_video_url(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
video_id, 'Downloading mp4 JSON')
formats.append({'url': mp4_url, 'format_id': 'mp4'})
self._sort_formats(formats)
return {
+29 -56
View File
@@ -1,13 +1,12 @@
from __future__ import unicode_literals
import re
import random
import json
from .common import InfoExtractor
from ..utils import (
get_element_by_id,
clean_html,
determine_ext,
ExtractorError,
)
@@ -17,66 +16,40 @@ class FKTVIE(InfoExtractor):
_TEST = {
'url': 'http://fernsehkritik.tv/folge-1',
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
'info_dict': {
'id': '00011',
'ext': 'flv',
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
'description': 'md5:fb4818139c7cfe6907d4b83412a6864f',
'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
episode = int(self._match_id(url))
episode = self._match_id(url)
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
episode)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
'playlist', flags=re.DOTALL)
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
webpage = self._download_webpage(
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
matches = re.search(
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
webpage)
if matches is None:
raise ExtractorError('Unable to extract the video')
videos = []
for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i)
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
videos.append({
'ext': 'flv',
'id': video_id,
'url': video_url,
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
'thumbnail': video_thumbnail
})
poster, sources = matches.groups()
if poster is None:
self.report_warning('unable to extract thumbnail')
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
formats = [{
'url': furl,
'format_id': determine_ext(furl),
} for furl in urls]
return {
'_type': 'multi_video',
'entries': videos,
'id': 'folge-%s' % episode,
}
class FKTVPosteckeIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv:postecke'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = {
'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
'md5': '262f0adbac80317412f7e57b4808e5c4',
'info_dict': {
'id': '0120',
'ext': 'flv',
'title': 'Postecke 120',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = int(mobj.group('ep'))
server = random.randint(2, 4)
video_id = '%04d' % episode
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
video_title = 'Postecke %d' % episode
return {
'id': video_id,
'url': video_url,
'title': video_title,
'id': episode,
'title': title,
'formats': formats,
'thumbnail': poster,
}
+25 -7
View File
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import os
import re
import sys
from .common import InfoExtractor
from .youtube import YoutubeIE
@@ -49,6 +50,7 @@ from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
class GenericIE(InfoExtractor):
@@ -230,6 +232,22 @@ class GenericIE(InfoExtractor):
'skip_download': False,
}
},
{
# redirect in Refresh HTTP header
'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
'info_dict': {
'id': 'pO8h3EaFRdo',
'ext': 'mp4',
'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
'upload_date': '20150917',
'uploader_id': 'brtvofficial',
'uploader': 'Boiler Room',
},
'params': {
'skip_download': False,
},
},
{
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@ -1594,12 +1612,9 @@ class GenericIE(InfoExtractor):
return self.url_result(url, ie='Vulture')
# Look for embedded mtvservices player
mobj = re.search(
r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
webpage)
if mobj is not None:
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='MTVServicesEmbedded')
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if mtvservices_url:
return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
# Look for embedded yahoo player
mobj = re.search(
@@ -1638,7 +1653,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'MLB')
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
webpage)
if mobj is not None:
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
@@ -1808,6 +1823,9 @@ class GenericIE(InfoExtractor):
# Look also in Refresh HTTP header
refresh_header = head_response.headers.get('Refresh')
if refresh_header:
# In python 2 response HTTP headers are bytestrings
if sys.version_info < (3, 0) and isinstance(refresh_header, str):
refresh_header = refresh_header.decode('iso-8859-1')
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-80
View File
@@ -1,80 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
urlencode_postdata,
)
class HostingBulkIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:www\.)?hostingbulk\.com/
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
_TEST = {
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
'info_dict': {
'id': 'n0ulw1hv20fm',
'ext': 'mp4',
'title': 'md5:5afeba33f48ec87219c269e054afd622',
'filesize': 6816081,
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
# Custom request with cookie to set language to English, so our file
# deleted regex would work.
request = compat_urllib_request.Request(
url, headers={'Cookie': 'lang=english'})
webpage = self._download_webpage(request, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
filesize = int_or_none(
self._search_regex(
r'<small>\((\d+)\sbytes?\)</small>',
webpage,
'filesize',
fatal=False
)
)
thumbnail = self._search_regex(
r'<img src="([^"]+)".+?class="pic"',
webpage, 'thumbnail', fatal=False)
fields = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
request.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._request_webpage(request, video_id,
'Submiting download request')
video_url = response.geturl()
formats = [{
'format_id': 'sd',
'filesize': filesize,
'url': video_url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
+17 -7
View File
@@ -1,7 +1,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
int_or_none,
get_element_by_id,
remove_end,
)
class IconosquareIE(InfoExtractor):
@@ -12,7 +16,7 @@ class IconosquareIE(InfoExtractor):
'info_dict': {
'id': '522207370455279102_24101272',
'ext': 'mp4',
'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
'timestamp': 1376471991,
'upload_date': '20130814',
@@ -29,8 +33,7 @@ class IconosquareIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
media = self._parse_json(
self._search_regex(
r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
get_element_by_id('mediaJson', webpage),
video_id)
formats = [{
@@ -41,9 +44,7 @@ class IconosquareIE(InfoExtractor):
} for format_id, f in media['videos'].items()]
self._sort_formats(formats)
title = self._html_search_regex(
r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
webpage, 'title')
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
description = media.get('caption', {}).get('text')
@@ -61,6 +62,14 @@ class IconosquareIE(InfoExtractor):
'height': int_or_none(t.get('height'))
} for thumbnail_id, t in media.get('images', {}).items()]
comments = [{
'id': comment.get('id'),
'text': comment['text'],
'timestamp': int_or_none(comment.get('created_time')),
'author': comment.get('from', {}).get('full_name'),
'author_id': comment.get('from', {}).get('username'),
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
return {
'id': video_id,
'title': title,
@@ -72,4 +81,5 @@ class IconosquareIE(InfoExtractor):
'comment_count': comment_count,
'like_count': like_count,
'formats': formats,
'comments': comments,
}
+11 -7
View File
@@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor):
('10', 'h1'),
]
@staticmethod
def md5_text(text):
return hashlib.md5(text.encode('utf-8')).hexdigest()
def construct_video_urls(self, data, video_id, _uuid):
def do_xor(x, y):
a = y % 3
@@ -121,7 +125,7 @@ class IqiyiIE(InfoExtractor):
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
)['t']
t = str(int(math.floor(int(tm) / (600.0))))
return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
return self.md5_text(t + mg + x)
video_urls_dict = {}
for format_item in data['vp']['tkl'][0]['vs']:
@@ -179,20 +183,19 @@ class IqiyiIE(InfoExtractor):
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
tm = str(int(time.time()))
tail = tm + tvid
param = {
'key': 'fvip',
'src': hashlib.md5(b'youtube-dl').hexdigest(),
'src': self.md5_text('youtube-dl'),
'tvId': tvid,
'vid': video_id,
'vinfo': 1,
'tm': tm,
'enc': hashlib.md5(
(enc_key + tm + tvid).encode('utf8')).hexdigest(),
'enc': self.md5_text((enc_key + tail)[1:64:2] + tail),
'qyid': _uuid,
'tn': random.random(),
'um': 0,
'authkey': hashlib.md5(
(tm + tvid).encode('utf8')).hexdigest()
'authkey': self.md5_text(self.md5_text('') + tail),
}
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
@@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor):
return raw_data
def get_enc_key(self, swf_url, video_id):
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
# TODO: automatic key extraction
enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc' # last update at 2015-09-23-23 for Zombie::bite
return enc_key
def _real_extract(self, url):
+16 -23
View File
@@ -1,46 +1,39 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class KeekIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
IE_NAME = 'keek'
_TEST = {
'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
'md5': '09c5c109067536c1cec8bac8c21fea05',
'url': 'https://www.keek.com/keek/NODfbab',
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
'info_dict': {
'id': 'NODfbab',
'ext': 'mp4',
'uploader': 'youtube-dl project',
'uploader_id': 'ytdl',
'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
'uploader': 'ytdl',
'uploader_id': 'eGT5bab',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
webpage = self._download_webpage(url, video_id)
raw_desc = self._html_search_meta('description', webpage)
if raw_desc:
uploader = self._html_search_regex(
r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False)
uploader_id = self._html_search_regex(
r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False)
else:
uploader = None
uploader_id = None
return {
'id': video_id,
'url': video_url,
'url': self._og_search_video_url(webpage),
'ext': 'mp4',
'title': self._og_search_title(webpage),
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'title': self._og_search_description(webpage).strip(),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._search_regex(
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
'uploader', fatal=False, group='uploader'),
'uploader_id': self._search_regex(
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
'uploader id', fatal=False, group='uploader_id'),
}
+4 -1
View File
@@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE):
'upload_date': '20080122',
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
},
'skip': 'this song has been offline because of copyright issues',
}, {
'url': 'http://www.kuwo.cn/yinyue/6446136/',
'info_dict': {
@@ -76,9 +77,11 @@ class KuwoIE(KuwoBaseIE):
webpage = self._download_webpage(
url, song_id, note='Download song detail info',
errnote='Unable to get song detail info')
if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
song_name = self._html_search_regex(
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
singer_name = self._html_search_regex(
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
webpage, 'singer name', fatal=False)
+14 -1
View File
@@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid is None or ':' not in mgid:
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid')
webpage, 'mgid', default=None)
if not mgid:
sm4_embed = self._html_search_meta(
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
videos_info = self._get_videos_info(mgid)
return videos_info
@@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
},
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
if mobj:
return mobj.group('url')
def _get_feed_url(self, uri):
video_id = self._id_from_uri(uri)
site_id = uri.replace(video_id, '')
+103 -51
View File
@@ -16,53 +16,104 @@ from ..utils import (
class NFLIE(InfoExtractor):
IE_NAME = 'nfl.com'
_VALID_URL = r'''(?x)https?://
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
(?:.+?/)*
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
_TESTS = [
{
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
'md5': '394ef771ddcd1354f665b471d78ec4c6',
'info_dict': {
'id': '0ap3000000398478',
'ext': 'mp4',
'title': 'Week 3: Redskins vs. Eagles highlights',
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
'upload_date': '20140921',
'timestamp': 1411337580,
'thumbnail': 're:^https?://.*\.jpg$',
}
},
{
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
'info_dict': {
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
'ext': 'mp4',
'title': 'LIVE: Post Game vs. Browns',
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
'upload_date': '20131229',
'timestamp': 1388354455,
'thumbnail': 're:^https?://.*\.jpg$',
}
},
{
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
'info_dict': {
'id': '0ap3000000467607',
'ext': 'mp4',
'title': 'Frustrations flare on the field',
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
'timestamp': 1422850320,
'upload_date': '20150202',
},
},
{
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
'only_matching': True,
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
(?:
(?:
nfl|
buffalobills|
miamidolphins|
patriots|
newyorkjets|
baltimoreravens|
bengals|
clevelandbrowns|
steelers|
houstontexans|
colts|
jaguars|
titansonline|
denverbroncos|
kcchiefs|
raiders|
chargers|
dallascowboys|
giants|
philadelphiaeagles|
redskins|
chicagobears|
detroitlions|
packers|
vikings|
atlantafalcons|
panthers|
neworleanssaints|
buccaneers|
azcardinals|
stlouisrams|
49ers|
seahawks
)\.com|
.+?\.clubs\.nfl\.com
)
)/
(?:.+?/)*
(?P<id>[^/#?&]+)
'''
_TESTS = [{
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
'md5': '394ef771ddcd1354f665b471d78ec4c6',
'info_dict': {
'id': '0ap3000000398478',
'ext': 'mp4',
'title': 'Week 3: Redskins vs. Eagles highlights',
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
'upload_date': '20140921',
'timestamp': 1411337580,
'thumbnail': 're:^https?://.*\.jpg$',
}
]
}, {
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
'info_dict': {
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
'ext': 'mp4',
'title': 'LIVE: Post Game vs. Browns',
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
'upload_date': '20131229',
'timestamp': 1388354455,
'thumbnail': 're:^https?://.*\.jpg$',
}
}, {
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
'info_dict': {
'id': '0ap3000000467607',
'ext': 'mp4',
'title': 'Frustrations flare on the field',
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
'timestamp': 1422850320,
'upload_date': '20150202',
},
}, {
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
'info_dict': {
'id': 'n-238346',
'ext': 'mp4',
'title': '10 Days at Gillette',
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
'timestamp': 1442618809,
'upload_date': '20150918',
},
}, {
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
'only_matching': True,
}, {
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
'only_matching': True,
}]
@staticmethod
def prepend_host(host, url):
@@ -95,13 +146,14 @@ class NFLIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
config_url = NFLIE.prepend_host(host, self._search_regex(
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
default='static/content/static/config/video/config.json'))
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
webpage, 'config URL', default='static/content/static/config/video/config.json',
group='config'))
# For articles, the id in the url is not the video id
video_id = self._search_regex(
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
config = self._download_json(config_url, video_id,
note='Downloading player config')
r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1',
webpage, 'video id', default=video_id, group='id')
config = self._download_json(config_url, video_id, 'Downloading player config')
url_template = NFLIE.prepend_host(
host, '{contentURLTemplate:}'.format(**config))
video_data = self._download_json(
+21 -5
View File
@@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
_TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor):
'params': {
'skip_download': True, # Requires rtmpdump
}
}, {
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -146,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLNewsIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:news'
IE_DESC = 'NHL news'
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
'md5': '4b3d1262e177687a3009937bd9ec0be8',
'info_dict': {
@@ -159,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor):
'duration': 37,
'upload_date': '20150128',
},
}
}, {
# iframe embed
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
'md5': '9f663d1c006c90ac9fb82777d4294e12',
'info_dict': {
'id': '836127',
'ext': 'mp4',
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
'duration': 93,
'upload_date': '20150923',
},
}]
def _real_extract(self, url):
news_id = self._match_id(url)
webpage = self._download_webpage(url, news_id)
video_id = self._search_regex(
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
webpage, 'video id')
return self._real_extract_video(video_id)
+63 -32
View File
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import str_to_int
@@ -9,61 +8,93 @@ from ..utils import str_to_int
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
(?:
v/(?P<numid>[0-9]+)|
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
)
'''
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
_TESTS = [{
"url": "http://9gag.tv/v/1912",
"info_dict": {
"id": "1912",
"ext": "mp4",
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
'info_dict': {
'id': 'Kk2X5',
'ext': 'mp4',
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
'uploader': 'CompilationChannel',
'upload_date': '20131110',
"view_count": int,
"thumbnail": "re:^https?://",
'view_count': int,
},
'add_ie': ['Youtube']
'add_ie': ['Youtube'],
}, {
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'url': 'http://9gag.com/tv/p/aKolP3',
'info_dict': {
'id': 'KklwM',
'id': 'aKolP3',
'ext': 'mp4',
'display_id': 'alternate-banned-opening-scene-of-gravity',
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
'uploader': 'Krishna Shenoi',
'upload_date': '20140401',
'uploader_id': 'krishnashenoi93',
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
'uploader_id': 'rickmereki',
'uploader': 'Rick Mereki',
'upload_date': '20110803',
'view_count': int,
},
'add_ie': ['Vimeo'],
}, {
'url': 'http://9gag.com/tv/p/KklwM',
'only_matching': True,
}, {
'url': 'http://9gag.tv/p/Kk2X5',
'only_matching': True,
}, {
'url': 'http://9gag.com/tv/embed/a5Dmvl',
'only_matching': True,
}]
_EXTERNAL_VIDEO_PROVIDER = {
'1': {
'url': '%s',
'ie_key': 'Youtube',
},
'2': {
'url': 'http://player.vimeo.com/video/%s',
'ie_key': 'Vimeo',
},
'3': {
'url': 'http://instagram.com/p/%s',
'ie_key': 'Instagram',
},
'4': {
'url': 'http://vine.co/v/%s',
'ie_key': 'Vine',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('numid') or mobj.group('id')
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex(
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
post_view = self._parse_json(
self._search_regex(
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
webpage, 'post view'),
display_id)
youtube_id = post_view['videoExternalId']
ie_key = None
source_url = post_view.get('sourceUrl')
if not source_url:
external_video_id = post_view['videoExternalId']
external_video_provider = post_view['videoExternalProvider']
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
title = post_view['title']
description = post_view['description']
view_count = str_to_int(post_view['externalView'])
description = post_view.get('description')
view_count = str_to_int(post_view.get('externalView'))
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
return {
'_type': 'url_transparent',
'url': youtube_id,
'ie_key': 'Youtube',
'url': source_url,
'ie_key': ie_key,
'id': video_id,
'display_id': display_id,
'title': title,
+32 -5
View File
@@ -25,7 +25,7 @@ class QQMusicIE(InfoExtractor):
'id': '004295Et37taLD',
'ext': 'mp3',
'title': '可惜没如果',
'upload_date': '20141227',
'release_date': '20141227',
'creator': '林俊杰',
'description': 'md5:d327722d0361576fde558f1ac68a7065',
'thumbnail': 're:^https?://.*\.jpg$',
@@ -38,11 +38,26 @@ class QQMusicIE(InfoExtractor):
'id': '004MsGEo3DdNxV',
'ext': 'mp3',
'title': '如果',
'upload_date': '20050626',
'release_date': '20050626',
'creator': '李季美',
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
'thumbnail': 're:^https?://.*\.jpg$',
}
}, {
'note': 'lyrics not in .lrc format',
'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
'info_dict': {
'id': '001JyApY11tIp6',
'ext': 'mp3',
'title': 'Shadows Over Transylvania',
'release_date': '19970225',
'creator': 'Dark Funeral',
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
}]
_FORMATS = {
@@ -112,15 +127,27 @@ class QQMusicIE(InfoExtractor):
self._check_formats(formats, mid)
self._sort_formats(formats)
return {
actual_lrc_lyrics = ''.join(
line + '\n' for line in re.findall(
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
info_dict = {
'id': mid,
'formats': formats,
'title': song_name,
'upload_date': publish_time,
'release_date': publish_time,
'creator': singer,
'description': lrc_content,
'thumbnail': thumbnail_url,
'thumbnail': thumbnail_url
}
if actual_lrc_lyrics:
info_dict['subtitles'] = {
'origin': [{
'ext': 'lrc',
'data': actual_lrc_lyrics,
}]
}
return info_dict
class QQPlaylistBaseIE(InfoExtractor):
+1 -1
View File
@@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor):
},
]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
def report_resolve(self, video_id):
+1
View File
@@ -119,6 +119,7 @@ class VidmeIE(InfoExtractor):
'url': f['uri'],
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'preference': 0 if f.get('type', '').endswith('clip') else 1,
} for f in video.get('formats', []) if f.get('uri')]
self._sort_formats(formats)
+43 -14
View File
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_iso8601,
HEADRequest,
@@ -16,14 +18,14 @@ from ..utils import (
class ViewsterIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
_TESTS = [{
# movie, Type=Movie
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
'info_dict': {
'id': '1140-11855-000',
'ext': 'flv',
'ext': 'mp4',
'title': 'The listening Project',
'description': 'md5:bac720244afd1a8ea279864e67baa071',
'timestamp': 1214870400,
@@ -33,10 +35,10 @@ class ViewsterIE(InfoExtractor):
}, {
# series episode, Type=Episode
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
'md5': '9243079a8531809efe1b089db102c069',
'info_dict': {
'id': '1284-19427-001',
'ext': 'flv',
'ext': 'mp4',
'title': 'The World and a Wall',
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
'timestamp': 1428192000,
@@ -61,6 +63,14 @@ class ViewsterIE(InfoExtractor):
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
},
'playlist_mincount': 16,
}, {
# geo restricted series
'url': 'https://www.viewster.com/serie/1280-18794-002/',
'only_matching': True,
}, {
# geo restricted video
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
'only_matching': True,
}]
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
@@ -74,8 +84,8 @@ class ViewsterIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
# Get 'api_token' cookie
self._request_webpage(HEADRequest(url), video_id)
cookies = self._get_cookies(url)
self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
cookies = self._get_cookies('http://www.viewster.com/')
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
info = self._download_json(
@@ -85,10 +95,16 @@ class ViewsterIE(InfoExtractor):
entry_id = info.get('Id') or info['id']
# unfinished serie has no Type
if info.get('Type') in ['Serie', None]:
episodes = self._download_json(
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
video_id, 'Downloading series JSON')
if info.get('Type') in ('Serie', None):
try:
episodes = self._download_json(
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
video_id, 'Downloading series JSON')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
self.raise_geo_restricted()
else:
raise
entries = [
self.url_result(
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
@@ -98,7 +114,7 @@ class ViewsterIE(InfoExtractor):
return self.playlist_result(entries, video_id, title, description)
formats = []
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
media = self._download_json(
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
% (entry_id, compat_urllib_parse.quote(media_type)),
@@ -120,9 +136,22 @@ class ViewsterIE(InfoExtractor):
fatal=False # m3u8 sometimes fail
))
else:
formats.append({
format_id = media.get('Bitrate')
f = {
'url': video_url,
})
'format_id': 'mp4-%s' % format_id,
'height': int_or_none(media.get('Height')),
'width': int_or_none(media.get('Width')),
'preference': 1,
}
if format_id and not f['height']:
f['height'] = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append(f)
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
self.raise_geo_restricted()
self._sort_formats(formats)
synopsis = info.get('Synopsis', {})
+3 -1
View File
@@ -63,7 +63,9 @@ class XHamsterIE(InfoExtractor):
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
title = self._html_search_regex(
[r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)</title>',
r'<h1>([^<]+)</h1>'], webpage, 'title')
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
+1 -1
View File
@@ -619,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# expected HTTP responses to meet HTTP/1.0 or later (see also
# https://github.com/rg3/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
kwargs['strict'] = True
kwargs[b'strict'] = True
hc = http_class(*args, **kwargs)
source_address = ydl_handler._params.get('source_address')
if source_address is not None:
+1 -1
View File
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.09.09'
__version__ = '2015.09.28'