mirror of
https://github.com/l1ving/youtube-dl
synced 2026-06-08 00:04:47 +08:00
Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
@@ -5,6 +5,7 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
|
||||
@@ -143,3 +143,4 @@ Shaun Walbridge
|
||||
Lee Jenkins
|
||||
Anssi Hannula
|
||||
Lukáš Lalinský
|
||||
Qijiang Fan
|
||||
|
||||
@@ -281,6 +281,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
|
||||
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
from youtube_dl.utils import shell_quote
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import textwrap
|
||||
|
||||
# We must be able to import youtube_dl
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ import sys
|
||||
|
||||
# Import youtube_dl
|
||||
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.append(ROOT_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
import youtube_dl
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
|
||||
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
||||
|
||||
+11
-10
@@ -101,7 +101,7 @@
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
@@ -122,7 +122,6 @@
|
||||
- **defense.gouv.fr**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **dramafever**
|
||||
@@ -159,7 +158,6 @@
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fernsehkritik.tv**
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
@@ -209,7 +207,6 @@
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
- **HostingBulk**
|
||||
- **HotNewHipHop**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -286,7 +283,7 @@
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **miomio.tv**
|
||||
- **mitele.es**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
- **MLB**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
@@ -317,7 +314,6 @@
|
||||
- **Myvi**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
@@ -326,7 +322,9 @@
|
||||
- **NBCNews**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Mediathek
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
@@ -349,12 +347,16 @@
|
||||
- **nhl.com:videocenter**: NHL videocenter category
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **novamov**: NovaMov
|
||||
- **Nowness**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
@@ -375,7 +377,6 @@
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **OpenFilm**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
@@ -530,7 +531,7 @@
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33,py34
|
||||
envlist = py26,py27,py33,py34,py35
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
|
||||
@@ -11,7 +11,7 @@ if __package__ is None and not hasattr(sys, "frozen"):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(path)))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@@ -28,9 +28,18 @@ class HlsFD(FileDownloader):
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [
|
||||
encodeArgument(opt)
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
if info_dict['http_headers']:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())]
|
||||
|
||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
@@ -169,10 +169,7 @@ from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
)
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
@@ -228,7 +225,6 @@ from .historicfilms import HistoricFilmsIE
|
||||
from .history import HistoryIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
|
||||
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
unified_strdate,
|
||||
@@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url')
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
||||
@@ -21,6 +21,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
@@ -153,6 +156,21 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
# iptv-all mediaset fails with geolocation however there is no geo restriction
|
||||
# for this programme at all
|
||||
'url': 'http://www.bbc.co.uk/programmes/b06bp7lf',
|
||||
'info_dict': {
|
||||
'id': 'b06bp7kf',
|
||||
'ext': 'flv',
|
||||
'title': "Annie Mac's Friday Night, B.Traits sits in for Annie",
|
||||
'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.',
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -294,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id == 'notukerror':
|
||||
if e.id in ('notukerror', 'geolocation'):
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
|
||||
@@ -152,6 +152,7 @@ class InfoExtractor(object):
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
creator: The main artist who created the video.
|
||||
release_date: The date (YYYYMMDD) when the video was released.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
@@ -516,6 +517,12 @@ class InfoExtractor(object):
|
||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||
expected=True)
|
||||
|
||||
@staticmethod
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
||||
raise ExtractorError(
|
||||
'%s. You might want to use --proxy to workaround.' % msg,
|
||||
expected=True)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -12,6 +11,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,21 +24,33 @@ class CondeNastIE(InfoExtractor):
|
||||
# The keys are the supported sites and the values are the name to be shown
|
||||
# to the user and in the extractor description.
|
||||
_SITES = {
|
||||
'wired': 'WIRED',
|
||||
'gq': 'GQ',
|
||||
'vogue': 'Vogue',
|
||||
'glamour': 'Glamour',
|
||||
'wmagazine': 'W Magazine',
|
||||
'vanityfair': 'Vanity Fair',
|
||||
'allure': 'Allure',
|
||||
'architecturaldigest': 'Architectural Digest',
|
||||
'arstechnica': 'Ars Technica',
|
||||
'bonappetit': 'Bon Appétit',
|
||||
'brides': 'Brides',
|
||||
'cnevids': 'Condé Nast',
|
||||
'cntraveler': 'Condé Nast Traveler',
|
||||
'details': 'Details',
|
||||
'epicurious': 'Epicurious',
|
||||
'glamour': 'Glamour',
|
||||
'golfdigest': 'Golf Digest',
|
||||
'gq': 'GQ',
|
||||
'newyorker': 'The New Yorker',
|
||||
'self': 'SELF',
|
||||
'teenvogue': 'Teen Vogue',
|
||||
'vanityfair': 'Vanity Fair',
|
||||
'vogue': 'Vogue',
|
||||
'wired': 'WIRED',
|
||||
'wmagazine': 'W Magazine',
|
||||
}
|
||||
|
||||
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||
'info_dict': {
|
||||
@@ -47,7 +59,16 @@ class CondeNastIE(InfoExtractor):
|
||||
'title': '3D Printed Speakers Lit With LED',
|
||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# JS embed
|
||||
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
|
||||
'md5': 'f1a6f9cafb7083bab74a710f65d08999',
|
||||
'info_dict': {
|
||||
'id': '55f9cf8b61646d1acf00000c',
|
||||
'ext': 'mp4',
|
||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
@@ -86,8 +107,8 @@ class CondeNastIE(InfoExtractor):
|
||||
info_url = base_info_url + data
|
||||
info_page = self._download_webpage(info_url, video_id,
|
||||
'Downloading video info')
|
||||
video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
|
||||
video_info = json.loads(video_info)
|
||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
||||
video_info = self._parse_json(video_info, video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
||||
@@ -111,6 +132,13 @@ class CondeNastIE(InfoExtractor):
|
||||
url_type = mobj.group('type')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
# Convert JS embed to regular embed
|
||||
if url_type == 'embedjs':
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
||||
url_type = 'embed'
|
||||
|
||||
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# http://lenta.ru/news/2015/03/06/navalny/
|
||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
@@ -36,7 +36,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
# http://muz-tv.ru/play/7129/
|
||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||
'md5': '90b26344ba442c8e44aa4cf8f301164a',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +48,8 @@ class EaglePlatformIE(InfoExtractor):
|
||||
'skip': 'Georestricted',
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
@@ -58,6 +59,9 @@ class EaglePlatformIE(InfoExtractor):
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
return self._download_json(url_or_request, video_id, note)['data'][0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||
@@ -69,7 +73,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
title = media['title']
|
||||
description = media.get('description')
|
||||
thumbnail = media.get('snapshot')
|
||||
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
view_count = int_or_none(media.get('views'))
|
||||
|
||||
@@ -78,13 +82,20 @@ class EaglePlatformIE(InfoExtractor):
|
||||
if age_restriction:
|
||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||
|
||||
m3u8_data = self._download_json(
|
||||
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
|
||||
video_id, 'Downloading m3u8 JSON')
|
||||
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
|
||||
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_data['data'][0], video_id,
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON')
|
||||
formats.append({'url': mp4_url, 'format_id': 'mp4'})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -17,66 +16,40 @@ class FKTVIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/folge-1',
|
||||
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
|
||||
'info_dict': {
|
||||
'id': '00011',
|
||||
'ext': 'flv',
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 vom 10. April 2007',
|
||||
'description': 'md5:fb4818139c7cfe6907d4b83412a6864f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = int(self._match_id(url))
|
||||
episode = self._match_id(url)
|
||||
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
webpage = self._download_webpage(
|
||||
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
|
||||
title = clean_html(self._html_search_regex(
|
||||
'<h3>([^<]+)</h3>', webpage, 'title'))
|
||||
matches = re.search(
|
||||
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
|
||||
webpage)
|
||||
if matches is None:
|
||||
raise ExtractorError('Unable to extract the video')
|
||||
|
||||
videos = []
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
|
||||
videos.append({
|
||||
'ext': 'flv',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||
'thumbnail': video_thumbnail
|
||||
})
|
||||
poster, sources = matches.groups()
|
||||
if poster is None:
|
||||
self.report_warning('unable to extract thumbnail')
|
||||
|
||||
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
|
||||
formats = [{
|
||||
'url': furl,
|
||||
'format_id': determine_ext(furl),
|
||||
} for furl in urls]
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': videos,
|
||||
'id': 'folge-%s' % episode,
|
||||
}
|
||||
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
IE_NAME = 'fernsehkritik.tv:postecke'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||
'md5': '262f0adbac80317412f7e57b4808e5c4',
|
||||
'info_dict': {
|
||||
'id': '0120',
|
||||
'ext': 'flv',
|
||||
'title': 'Postecke 120',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_id = '%04d' % episode
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
|
||||
video_title = 'Postecke %d' % episode
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'id': episode,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': poster,
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
@@ -49,6 +50,7 @@ from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
from .mtv import MTVServicesEmbeddedIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -230,6 +232,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': False,
|
||||
}
|
||||
},
|
||||
{
|
||||
# redirect in Refresh HTTP header
|
||||
'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
|
||||
'info_dict': {
|
||||
'id': 'pO8h3EaFRdo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
|
||||
'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
|
||||
'upload_date': '20150917',
|
||||
'uploader_id': 'brtvofficial',
|
||||
'uploader': 'Boiler Room',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': False,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||
@@ -1594,12 +1612,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='MTVServicesEmbedded')
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
if mtvservices_url:
|
||||
return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
|
||||
|
||||
# Look for embedded yahoo player
|
||||
mobj = re.search(
|
||||
@@ -1638,7 +1653,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||
r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
@@ -1808,6 +1823,9 @@ class GenericIE(InfoExtractor):
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = head_response.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
# In python 2 response HTTP headers are bytestrings
|
||||
if sys.version_info < (3, 0) and isinstance(refresh_header, str):
|
||||
refresh_header = refresh_header.decode('iso-8859-1')
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HostingBulkIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hostingbulk\.com/
|
||||
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
|
||||
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
|
||||
_TEST = {
|
||||
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
|
||||
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
|
||||
'info_dict': {
|
||||
'id': 'n0ulw1hv20fm',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5afeba33f48ec87219c269e054afd622',
|
||||
'filesize': 6816081,
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
|
||||
|
||||
# Custom request with cookie to set language to English, so our file
|
||||
# deleted regex would work.
|
||||
request = compat_urllib_request.Request(
|
||||
url, headers={'Cookie': 'lang=english'})
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
|
||||
filesize = int_or_none(
|
||||
self._search_regex(
|
||||
r'<small>\((\d+)\sbytes?\)</small>',
|
||||
webpage,
|
||||
'filesize',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
response = self._request_webpage(request, video_id,
|
||||
'Submiting download request')
|
||||
video_url = response.geturl()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'filesize': filesize,
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,7 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
get_element_by_id,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IconosquareIE(InfoExtractor):
|
||||
@@ -12,7 +16,7 @@ class IconosquareIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522207370455279102_24101272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
'timestamp': 1376471991,
|
||||
'upload_date': '20130814',
|
||||
@@ -29,8 +33,7 @@ class IconosquareIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
|
||||
get_element_by_id('mediaJson', webpage),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
@@ -41,9 +44,7 @@ class IconosquareIE(InfoExtractor):
|
||||
} for format_id, f in media['videos'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
|
||||
webpage, 'title')
|
||||
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
|
||||
|
||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
||||
description = media.get('caption', {}).get('text')
|
||||
@@ -61,6 +62,14 @@ class IconosquareIE(InfoExtractor):
|
||||
'height': int_or_none(t.get('height'))
|
||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
||||
|
||||
comments = [{
|
||||
'id': comment.get('id'),
|
||||
'text': comment['text'],
|
||||
'timestamp': int_or_none(comment.get('created_time')),
|
||||
'author': comment.get('from', {}).get('full_name'),
|
||||
'author_id': comment.get('from', {}).get('username'),
|
||||
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -72,4 +81,5 @@ class IconosquareIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
@@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor):
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def md5_text(text):
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid):
|
||||
def do_xor(x, y):
|
||||
a = y % 3
|
||||
@@ -121,7 +125,7 @@ class IqiyiIE(InfoExtractor):
|
||||
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||
)['t']
|
||||
t = str(int(math.floor(int(tm) / (600.0))))
|
||||
return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
|
||||
return self.md5_text(t + mg + x)
|
||||
|
||||
video_urls_dict = {}
|
||||
for format_item in data['vp']['tkl'][0]['vs']:
|
||||
@@ -179,20 +183,19 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
|
||||
tm = str(int(time.time()))
|
||||
tail = tm + tvid
|
||||
param = {
|
||||
'key': 'fvip',
|
||||
'src': hashlib.md5(b'youtube-dl').hexdigest(),
|
||||
'src': self.md5_text('youtube-dl'),
|
||||
'tvId': tvid,
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': hashlib.md5(
|
||||
(enc_key + tm + tvid).encode('utf8')).hexdigest(),
|
||||
'enc': self.md5_text((enc_key + tail)[1:64:2] + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
'authkey': hashlib.md5(
|
||||
(tm + tvid).encode('utf8')).hexdigest()
|
||||
'authkey': self.md5_text(self.md5_text('') + tail),
|
||||
}
|
||||
|
||||
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||
@@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
# TODO: automatic key extraction
|
||||
enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc' # last update at 2015-09-23-23 for Zombie::bite
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,46 +1,39 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
|
||||
IE_NAME = 'keek'
|
||||
_TEST = {
|
||||
'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
'md5': '09c5c109067536c1cec8bac8c21fea05',
|
||||
'url': 'https://www.keek.com/keek/NODfbab',
|
||||
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
'info_dict': {
|
||||
'id': 'NODfbab',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'youtube-dl project',
|
||||
'uploader_id': 'ytdl',
|
||||
'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
|
||||
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
|
||||
'uploader': 'ytdl',
|
||||
'uploader_id': 'eGT5bab',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
|
||||
thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
raw_desc = self._html_search_meta('description', webpage)
|
||||
if raw_desc:
|
||||
uploader = self._html_search_regex(
|
||||
r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False)
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': self._og_search_description(webpage).strip(),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': self._search_regex(
|
||||
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
|
||||
'uploader', fatal=False, group='uploader'),
|
||||
'uploader_id': self._search_regex(
|
||||
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
|
||||
'uploader id', fatal=False, group='uploader_id'),
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
'upload_date': '20080122',
|
||||
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
|
||||
},
|
||||
'skip': 'this song has been offline because of copyright issues',
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/yinyue/6446136/',
|
||||
'info_dict': {
|
||||
@@ -76,9 +77,11 @@ class KuwoIE(KuwoBaseIE):
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
|
||||
r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||
webpage, 'singer name', fatal=False)
|
||||
|
||||
@@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, 'mgid')
|
||||
webpage, 'mgid', default=None)
|
||||
|
||||
if not mgid:
|
||||
sm4_embed = self._html_search_meta(
|
||||
'sm4:video:embed', webpage, 'sm4 embed', default='')
|
||||
mgid = self._search_regex(
|
||||
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
|
||||
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
@@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
site_id = uri.replace(video_id, '')
|
||||
|
||||
+103
-51
@@ -16,53 +16,104 @@ from ..utils import (
|
||||
|
||||
class NFLIE(InfoExtractor):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||
(?:.+?/)*
|
||||
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000467607',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frustrations flare on the field',
|
||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||
'timestamp': 1422850320,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'only_matching': True,
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?:
|
||||
nfl|
|
||||
buffalobills|
|
||||
miamidolphins|
|
||||
patriots|
|
||||
newyorkjets|
|
||||
baltimoreravens|
|
||||
bengals|
|
||||
clevelandbrowns|
|
||||
steelers|
|
||||
houstontexans|
|
||||
colts|
|
||||
jaguars|
|
||||
titansonline|
|
||||
denverbroncos|
|
||||
kcchiefs|
|
||||
raiders|
|
||||
chargers|
|
||||
dallascowboys|
|
||||
giants|
|
||||
philadelphiaeagles|
|
||||
redskins|
|
||||
chicagobears|
|
||||
detroitlions|
|
||||
packers|
|
||||
vikings|
|
||||
atlantafalcons|
|
||||
panthers|
|
||||
neworleanssaints|
|
||||
buccaneers|
|
||||
azcardinals|
|
||||
stlouisrams|
|
||||
49ers|
|
||||
seahawks
|
||||
)\.com|
|
||||
.+?\.clubs\.nfl\.com
|
||||
)
|
||||
)/
|
||||
(?:.+?/)*
|
||||
(?P<id>[^/#?&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000467607',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frustrations flare on the field',
|
||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||
'timestamp': 1422850320,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
|
||||
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
|
||||
'info_dict': {
|
||||
'id': 'n-238346',
|
||||
'ext': 'mp4',
|
||||
'title': '10 Days at Gillette',
|
||||
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
|
||||
'timestamp': 1442618809,
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def prepend_host(host, url):
|
||||
@@ -95,13 +146,14 @@ class NFLIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
||||
default='static/content/static/config/video/config.json'))
|
||||
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
|
||||
webpage, 'config URL', default='static/content/static/config/video/config.json',
|
||||
group='config'))
|
||||
# For articles, the id in the url is not the video id
|
||||
video_id = self._search_regex(
|
||||
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
||||
config = self._download_json(config_url, video_id,
|
||||
note='Downloading player config')
|
||||
r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1',
|
||||
webpage, 'video id', default=video_id, group='id')
|
||||
config = self._download_json(config_url, video_id, 'Downloading player config')
|
||||
url_template = NFLIE.prepend_host(
|
||||
host, '{contentURLTemplate:}'.format(**config))
|
||||
video_data = self._download_json(
|
||||
|
||||
@@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
@@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -146,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:news'
|
||||
IE_DESC = 'NHL news'
|
||||
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
||||
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
||||
'info_dict': {
|
||||
@@ -159,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
'duration': 37,
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
|
||||
'md5': '9f663d1c006c90ac9fb82777d4294e12',
|
||||
'info_dict': {
|
||||
'id': '836127',
|
||||
'ext': 'mp4',
|
||||
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
|
||||
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
|
||||
'duration': 93,
|
||||
'upload_date': '20150923',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, news_id)
|
||||
video_id = self._search_regex(
|
||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
|
||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
|
||||
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
@@ -9,61 +8,93 @@ from ..utils import str_to_int
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
|
||||
(?:
|
||||
v/(?P<numid>[0-9]+)|
|
||||
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
|
||||
)
|
||||
'''
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
"url": "http://9gag.tv/v/1912",
|
||||
"info_dict": {
|
||||
"id": "1912",
|
||||
"ext": "mp4",
|
||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'info_dict': {
|
||||
'id': 'Kk2X5',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
"view_count": int,
|
||||
"thumbnail": "re:^https?://",
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube']
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
'info_dict': {
|
||||
'id': 'KklwM',
|
||||
'id': 'aKolP3',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'alternate-banned-opening-scene-of-gravity',
|
||||
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
|
||||
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
|
||||
'uploader': 'Krishna Shenoi',
|
||||
'upload_date': '20140401',
|
||||
'uploader_id': 'krishnashenoi93',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('numid') or mobj.group('id')
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
post_view = json.loads(self._html_search_regex(
|
||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
|
||||
youtube_id = post_view['videoExternalId']
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view['description']
|
||||
view_count = str_to_int(post_view['externalView'])
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': youtube_id,
|
||||
'ie_key': 'Youtube',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
|
||||
@@ -25,7 +25,7 @@ class QQMusicIE(InfoExtractor):
|
||||
'id': '004295Et37taLD',
|
||||
'ext': 'mp3',
|
||||
'title': '可惜没如果',
|
||||
'upload_date': '20141227',
|
||||
'release_date': '20141227',
|
||||
'creator': '林俊杰',
|
||||
'description': 'md5:d327722d0361576fde558f1ac68a7065',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -38,11 +38,26 @@ class QQMusicIE(InfoExtractor):
|
||||
'id': '004MsGEo3DdNxV',
|
||||
'ext': 'mp3',
|
||||
'title': '如果',
|
||||
'upload_date': '20050626',
|
||||
'release_date': '20050626',
|
||||
'creator': '李季美',
|
||||
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'note': 'lyrics not in .lrc format',
|
||||
'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
|
||||
'info_dict': {
|
||||
'id': '001JyApY11tIp6',
|
||||
'ext': 'mp3',
|
||||
'title': 'Shadows Over Transylvania',
|
||||
'release_date': '19970225',
|
||||
'creator': 'Dark Funeral',
|
||||
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -112,15 +127,27 @@ class QQMusicIE(InfoExtractor):
|
||||
self._check_formats(formats, mid)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
actual_lrc_lyrics = ''.join(
|
||||
line + '\n' for line in re.findall(
|
||||
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
|
||||
|
||||
info_dict = {
|
||||
'id': mid,
|
||||
'formats': formats,
|
||||
'title': song_name,
|
||||
'upload_date': publish_time,
|
||||
'release_date': publish_time,
|
||||
'creator': singer,
|
||||
'description': lrc_content,
|
||||
'thumbnail': thumbnail_url,
|
||||
'thumbnail': thumbnail_url
|
||||
}
|
||||
if actual_lrc_lyrics:
|
||||
info_dict['subtitles'] = {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
'data': actual_lrc_lyrics,
|
||||
}]
|
||||
}
|
||||
return info_dict
|
||||
|
||||
|
||||
class QQPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
@@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
|
||||
@@ -119,6 +119,7 @@ class VidmeIE(InfoExtractor):
|
||||
'url': f['uri'],
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'preference': 0 if f.get('type', '').endswith('clip') else 1,
|
||||
} for f in video.get('formats', []) if f.get('uri')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
HEADRequest,
|
||||
@@ -16,14 +18,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movie, Type=Movie
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
|
||||
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'The listening Project',
|
||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||
'timestamp': 1214870400,
|
||||
@@ -33,10 +35,10 @@ class ViewsterIE(InfoExtractor):
|
||||
}, {
|
||||
# series episode, Type=Episode
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
|
||||
'md5': '9243079a8531809efe1b089db102c069',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-001',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'The World and a Wall',
|
||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||
'timestamp': 1428192000,
|
||||
@@ -61,6 +63,14 @@ class ViewsterIE(InfoExtractor):
|
||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
# geo restricted series
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted video
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
@@ -74,8 +84,8 @@ class ViewsterIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# Get 'api_token' cookie
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
cookies = self._get_cookies(url)
|
||||
self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
|
||||
cookies = self._get_cookies('http://www.viewster.com/')
|
||||
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
||||
|
||||
info = self._download_json(
|
||||
@@ -85,10 +95,16 @@ class ViewsterIE(InfoExtractor):
|
||||
entry_id = info.get('Id') or info['id']
|
||||
|
||||
# unfinished serie has no Type
|
||||
if info.get('Type') in ['Serie', None]:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
if info.get('Type') in ('Serie', None):
|
||||
try:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_geo_restricted()
|
||||
else:
|
||||
raise
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||
@@ -98,7 +114,7 @@ class ViewsterIE(InfoExtractor):
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
formats = []
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||
@@ -120,9 +136,22 @@ class ViewsterIE(InfoExtractor):
|
||||
fatal=False # m3u8 sometimes fail
|
||||
))
|
||||
else:
|
||||
formats.append({
|
||||
format_id = media.get('Bitrate')
|
||||
f = {
|
||||
'url': video_url,
|
||||
})
|
||||
'format_id': 'mp4-%s' % format_id,
|
||||
'height': int_or_none(media.get('Height')),
|
||||
'width': int_or_none(media.get('Width')),
|
||||
'preference': 1,
|
||||
}
|
||||
if format_id and not f['height']:
|
||||
f['height'] = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append(f)
|
||||
|
||||
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
synopsis = info.get('Synopsis', {})
|
||||
|
||||
@@ -63,7 +63,9 @@ class XHamsterIE(InfoExtractor):
|
||||
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
[r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)</title>',
|
||||
r'<h1>([^<]+)</h1>'], webpage, 'title')
|
||||
|
||||
# Only a few videos have an description
|
||||
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
|
||||
|
||||
+1
-1
@@ -619,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
||||
# https://github.com/rg3/youtube-dl/issues/6727)
|
||||
if sys.version_info < (3, 0):
|
||||
kwargs['strict'] = True
|
||||
kwargs[b'strict'] = True
|
||||
hc = http_class(*args, **kwargs)
|
||||
source_address = ydl_handler._params.get('source_address')
|
||||
if source_address is not None:
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.09.09'
|
||||
__version__ = '2015.09.28'
|
||||
|
||||
Reference in New Issue
Block a user