1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 02:22:14 +08:00

Merge remote-tracking branch 'upstream/master' into myversion

This commit is contained in:
Andrew Udvare 2018-04-13 21:41:24 -04:00
commit f0fe336307
No known key found for this signature in database
GPG Key ID: 1AFD9AFC120C26DD
51 changed files with 1327 additions and 478 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.03.14 [debug] youtube-dl version 2018.04.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,84 @@
version 2018.04.09
Core
* [YoutubeDL] Do not save/restore console title while simulate (#16103)
* [extractor/common] Relax JSON-LD context check (#16006)
Extractors
+ [generic] Add support for tube8 embeds
+ [generic] Add support for share-videos.se embeds (#16089, #16115)
* [odnoklassniki] Extend URL regular expression (#16081)
* [steam] Bypass mature content check (#16113)
+ [acast] Extract more metadata
* [acast] Fix extraction (#16118)
* [instagram:user] Fix extraction (#16119)
* [drtuber] Fix title extraction (#16107, #16108)
* [liveleak] Extend URL regular expression (#16117)
+ [openload] Add support for oload.xyz
* [openload] Relax stream URL regular expression
* [openload] Fix extraction (#16099)
+ [svtplay:series] Add support for season URLs
+ [svtplay:series] Add support for series (#11130, #16059)
version 2018.04.03
Extractors
+ [tvnow] Add support for shows (#15837)
* [dramafever] Fix authentication (#16067)
* [afreecatv] Use partial view only when necessary (#14450)
+ [afreecatv] Add support for authentication (#14450)
+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
* [xvideos] Fix thumbnail extraction (#15978, #15979)
* [medialaan] Fix vod id (#16038)
+ [openload] Add support for oload.site (#16039)
* [naver] Fix extraction (#16029)
* [dramafever] Partially switch to API v5 (#16026)
* [abc:iview] Unescape title and series meta fields (#15994)
* [videa] Extend URL regular expression (#16003)
version 2018.03.26.1
Core
+ [downloader/external] Add elapsed time to progress hook (#10876)
* [downloader/external,fragment] Fix download finalization when writing file
to stdout (#10809, #10876, #15799)
Extractors
* [vrv] Fix extraction on python2 (#15928)
* [afreecatv] Update referrer (#15947)
+ [24video] Add support for 24video.sexy (#15973)
* [crackle] Bypass geo restriction
* [crackle] Fix extraction (#15969)
+ [lenta] Add support for lenta.ru (#15953)
+ [instagram:user] Add pagination (#15934)
* [youku] Update ccode (#15939)
* [libsyn] Adapt to new page structure
version 2018.03.20
Core
* [extractor/common] Improve thumbnail extraction for HTML5 entries
* Generalize XML manifest processing code and improve XSPF parsing
+ [extractor/common] Add _download_xml_handle
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
Extractors
+ [7plus] Extract series metadata (#15862, #15906)
* [9now] Bypass geo restriction (#15920)
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
+ [ceskatelevize] Add support for iframe embeds (#15918)
+ [prosiebensat1] Add support for galileo.tv (#15894)
+ [generic] Add support for xfileshare embeds (#15879)
* [bilibili] Switch to v2 playurl API
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
* [heise] Improve extraction (#15496, #15784, #15026)
* [instagram] Fix user videos extraction (#15858)
version 2018.03.14 version 2018.03.14
Extractors Extractors

View File

@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
## Filesystem Options: ## Filesystem Options:
-a, --batch-file FILE File containing URLs to download ('-' for -a, --batch-file FILE File containing URLs to download ('-' for
stdin) stdin), one URL per line. Lines starting
with '#', ';' or ']' are considered as
comments and ignored.
--id Use only video ID in file name --id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT -o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info TEMPLATE" for all the info

View File

@ -419,6 +419,7 @@
- **Lecture2Go** - **Lecture2Go**
- **LEGO** - **LEGO**
- **Lemonde** - **Lemonde**
- **Lenta**
- **LePlaylist** - **LePlaylist**
- **LetvCloud**: 乐视云 - **LetvCloud**: 乐视云
- **Libsyn** - **Libsyn**
@ -803,6 +804,7 @@
- **SunPorno** - **SunPorno**
- **SVT** - **SVT**
- **SVTPlay**: SVT Play and Öppet arkiv - **SVTPlay**: SVT Play and Öppet arkiv
- **SVTSeries**
- **SWRMediathek** - **SWRMediathek**
- **Syfy** - **Syfy**
- **SztvHu** - **SztvHu**
@ -886,6 +888,7 @@
- **TVNoe** - **TVNoe**
- **TVNow** - **TVNow**
- **TVNowList** - **TVNowList**
- **TVNowShow**
- **tvp**: Telewizja Polska - **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska - **tvp:embed**: Telewizja Polska
- **tvp:series** - **tvp:series**

View File

@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats) self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None) expect_value(self, formats, expected_formats, None)
def test_parse_xspf(self):
_TEST_CASES = [
(
'foo_xspf',
'https://example.org/src/foo_xspf.xspf',
[{
'id': 'foo_xspf',
'title': 'Pandemonium',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 202.416,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/src/cd1/track%201.mp3',
}],
}, {
'id': 'foo_xspf',
'title': 'Final Cartridge (Nichico Twelve Remix)',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 255.857,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
}],
}, {
'id': 'foo_xspf',
'title': 'Rebuilding Nightingale',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 287.915,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/src/track3.mp3',
}, {
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.com/track3.mp3',
}]
}]
),
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
mode='r', encoding='utf-8') as f:
entries = self.ie._parse_xspf(
compat_etree_fromstring(f.read().encode('utf-8')),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

34
test/testdata/xspf/foo_xspf.xspf vendored Normal file
View File

@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<playlist version="1" xmlns="http://xspf.org/ns/0/">
<date>2018-03-09T18:01:43Z</date>
<trackList>
<track>
<location>cd1/track%201.mp3</location>
<title>Pandemonium</title>
<creator>Foilverb</creator>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<album>Pandemonium EP</album>
<trackNum>1</trackNum>
<duration>202416</duration>
</track>
<track>
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
<title>Final Cartridge (Nichico Twelve Remix)</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>2</trackNum>
<duration>255857</duration>
</track>
<track>
<location>track3.mp3</location>
<location>https://example.com/track3.mp3</location>
<title>Rebuilding Nightingale</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>3</trackNum>
<duration>287915</duration>
</track>
</trackList>
</playlist>

View File

@ -532,6 +532,8 @@ class YoutubeDL(object):
def save_console_title(self): def save_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if self.params.get('simulate', False):
return
if compat_os_name != 'nt' and 'TERM' in os.environ: if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack # Save the title on stack
self._write_string('\033[22;0t', self._screen_file) self._write_string('\033[22;0t', self._screen_file)
@ -539,6 +541,8 @@ class YoutubeDL(object):
def restore_console_title(self): def restore_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if self.params.get('simulate', False):
return
if compat_os_name != 'nt' and 'TERM' in os.environ: if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack # Restore the title from stack
self._write_string('\033[23;0t', self._screen_file) self._write_string('\033[23;0t', self._screen_file)

View File

@ -249,12 +249,13 @@ class FileDownloader(object):
if self.params.get('noprogress', False): if self.params.get('noprogress', False):
self.to_screen('[download] Download completed') self.to_screen('[download] Download completed')
else: else:
msg_template = '100%%'
if s.get('total_bytes') is not None:
s['_total_bytes_str'] = format_bytes(s['total_bytes']) s['_total_bytes_str'] = format_bytes(s['total_bytes'])
msg_template += ' of %(_total_bytes_str)s'
if s.get('elapsed') is not None: if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed']) s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' msg_template += ' in %(_elapsed_str)s'
else:
msg_template = '100%% of %(_total_bytes_str)s'
self._report_progress_status( self._report_progress_status(
msg_template % s, is_last_line=True) msg_template % s, is_last_line=True)

View File

@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path import os.path
import re
import subprocess import subprocess
import sys import sys
import re import time
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
@ -30,6 +31,7 @@ class ExternalFD(FileDownloader):
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
try: try:
started = time.time()
retval = self._call_downloader(tmpfilename, info_dict) retval = self._call_downloader(tmpfilename, info_dict)
except KeyboardInterrupt: except KeyboardInterrupt:
if not info_dict.get('is_live'): if not info_dict.get('is_live'):
@ -41,15 +43,20 @@ class ExternalFD(FileDownloader):
self.to_screen('[%s] Interrupted by user' % self.get_basename()) self.to_screen('[%s] Interrupted by user' % self.get_basename())
if retval == 0: if retval == 0:
status = {
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
}
if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
'total_bytes': fsize, 'total_bytes': fsize,
'filename': filename,
'status': 'finished',
}) })
self._hook_progress(status)
return True return True
else: else:
self.to_stderr('\n') self.to_stderr('\n')

View File

@ -241,12 +241,16 @@ class FragmentFD(FileDownloader):
if os.path.isfile(ytdl_filename): if os.path.isfile(ytdl_filename):
os.remove(ytdl_filename) os.remove(ytdl_filename)
elapsed = time.time() - ctx['started'] elapsed = time.time() - ctx['started']
if ctx['tmpfilename'] == '-':
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else:
self.try_rename(ctx['tmpfilename'], ctx['filename']) self.try_rename(ctx['tmpfilename'], ctx['filename'])
fsize = os.path.getsize(encodeFilename(ctx['filename'])) downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': downloaded_bytes,
'total_bytes': fsize, 'total_bytes': downloaded_bytes,
'filename': ctx['filename'], 'filename': ctx['filename'],
'status': 'finished', 'status': 'finished',
'elapsed': elapsed, 'elapsed': elapsed,

View File

@ -13,6 +13,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get, try_get,
unescapeHTML,
update_url_query, update_url_query,
) )
@ -109,16 +110,17 @@ class ABCIViewIE(InfoExtractor):
# ABC iview programs are normally available for 14 days only. # ABC iview programs are normally available for 14 days only.
_TESTS = [{ _TESTS = [{
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00', 'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': { 'info_dict': {
'id': 'ZW0898A003S00', 'id': 'ZY9247A021S00',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Series 5 Ep 3', 'title': "Gaston's Visit",
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79', 'series': "Ben And Holly's Little Kingdom",
'upload_date': '20171228', 'description': 'md5:18db170ad71cf161e006a4c688e33155',
'uploader_id': 'abc1', 'upload_date': '20180318',
'timestamp': 1514499187, 'uploader_id': 'abc4kids',
'timestamp': 1521400959,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -169,12 +171,12 @@ class ABCIViewIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': unescapeHTML(title),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
'duration': int_or_none(video_params.get('eventDuration')), 'duration': int_or_none(video_params.get('eventDuration')),
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
'series': video_params.get('seriesTitle'), 'series': unescapeHTML(video_params.get('seriesTitle')),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7], 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
'episode': self._html_search_meta('episode_title', webpage, default=None), 'episode': self._html_search_meta('episode_title', webpage, default=None),

View File

@ -7,7 +7,9 @@ import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none,
int_or_none, int_or_none,
try_get,
unified_timestamp, unified_timestamp,
OnDemandPagedList, OnDemandPagedList,
) )
@ -24,40 +26,58 @@ class ACastIE(InfoExtractor):
'id': '57de3baa-4bb0-487e-9418-2692c1277a34', 'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3', 'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan', 'title': '"Where Are You?": Taipei 101, Taiwan',
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'timestamp': 1196172000, 'timestamp': 1196172000,
'upload_date': '20071127', 'upload_date': '20071127',
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'duration': 211, 'duration': 211,
'creator': 'Concierge',
'series': 'Condé Nast Traveler Podcast',
'episode': '"Where Are You?": Taipei 101, Taiwan',
} }
}, { }, {
# test with multiple blings # test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0', 'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
'info_dict': { 'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3', 'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna', 'title': '2. Raggarmordet - Röster ur det förflutna',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
'timestamp': 1477346700, 'timestamp': 1477346700,
'upload_date': '20161024', 'upload_date': '20161024',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', 'duration': 2766.602563,
'duration': 2766, 'creator': 'Anton Berg & Martin Johnson',
'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna',
} }
}] }]
def _real_extract(self, url): def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups() channel, display_id = re.match(self._VALID_URL, url).groups()
s = self._download_json(
'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
display_id)['result']
media_url = s['url']
cast_data = self._download_json( cast_data = self._download_json(
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id) 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
e = cast_data['result']['episode'] display_id)['result']
e = cast_data['episode']
title = e['name']
return { return {
'id': compat_str(e['id']), 'id': compat_str(e['id']),
'display_id': display_id, 'display_id': display_id,
'url': e['mediaUrl'], 'url': media_url,
'title': e['name'], 'title': title,
'description': e.get('description'), 'description': e.get('description') or e.get('summary'),
'thumbnail': e.get('image'), 'thumbnail': e.get('image'),
'timestamp': unified_timestamp(e.get('publishingDate')), 'timestamp': unified_timestamp(e.get('publishingDate')),
'duration': int_or_none(e.get('duration')), 'duration': float_or_none(s.get('duration') or e.get('duration')),
'filesize': int_or_none(e.get('contentLength')),
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
'season_number': int_or_none(e.get('seasonNumber')),
'episode': title,
'episode_number': int_or_none(e.get('episodeNumber')),
} }

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
urlencode_postdata,
xpath_text, xpath_text,
) )
@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_NETRC_MACHINE = 'afreecatv'
_TESTS = [{ _TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# adult video # PARTIAL_ADULT
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731', 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
'info_dict': { 'info_dict': {
'id': '20171001_F1AE1711_196617479_1', 'id': '20180327_27901457_202289533_1',
'ext': 'mp4', 'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)', 'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아', 'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu', 'uploader_id': 'bjdyrksu',
'upload_date': '20171001', 'upload_date': '20180327',
'duration': 3600, 'duration': 3601,
'age_limit': 18,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['adult content'],
}, { }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True, 'only_matching': True,
@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor):
video_key['part'] = int(m.group('part')) video_key['part'] = int(m.group('part'))
return video_key return video_key
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_form = {
'szWork': 'login',
'szType': 'json',
'szUid': username,
'szPassword': password,
'isSaveId': 'false',
'szScriptVar': 'oLoginRet',
'szAction': '',
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
result = int_or_none(response.get('RESULT'))
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -188,21 +235,41 @@ class AfreecaTVIE(InfoExtractor):
video_id = self._search_regex( video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
video_xml = self._download_xml( partial_view = False
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', for _ in range(2):
video_id, headers={ query = {
'Referer': 'http://vod.afreecatv.com/embed.php',
}, query={
'nTitleNo': video_id, 'nTitleNo': video_id,
'nStationNo': station_id, 'nStationNo': station_id,
'nBbsNo': bbs_id, 'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT', }
}) if partial_view:
query['partialView'] = 'SKIP_ADULT'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
% (' (skipping adult)' if partial_view else ''),
video_id, headers={
'Referer': url,
}, query=query)
flag = xpath_text(video_xml, './track/flag', 'flag', default=None) flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED': if flag and flag == 'SUCCEED':
break
if flag == 'PARTIAL_ADULT':
self._downloader.report_warning(
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
raise ExtractorError( raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True) '%s said: %s' % (self.IE_NAME, error), expected=True)
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall(compat_xpath('./track/video'))[-1] video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None: if video_element is None or video_element.text is None:

View File

@ -117,9 +117,9 @@ class BiliBiliIE(InfoExtractor):
r'cid(?:["\']:|=)(\d+)', webpage, 'cid', r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None default=None
) or compat_parse_qs(self._search_regex( ) or compat_parse_qs(self._search_regex(
[r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)', [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'1<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0] webpage, 'player parameters'))['cid'][0]
else: else:
if 'no_bangumi_tip' not in smuggled_data: if 'no_bangumi_tip' not in smuggled_data:

View File

@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.canalc2.tv/video/%s' % video_id, video_id) 'http://www.canalc2.tv/video/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
webpage, 'title')
formats = [] formats = []
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
if video_url.startswith('rtmp://'): if video_url.startswith('rtmp://'):
@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': 'http', 'format_id': 'http',
}) })
self._sort_formats(formats)
title = self._html_search_regex( if formats:
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title') info = {
duration = parse_duration(self._search_regex(
r'id=["\']video_duree["\'][^>]*>([^<]+)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'duration': duration,
'formats': formats, 'formats': formats,
} }
else:
info = self._parse_html5_media_entries(url, webpage, url)[0]
self._sort_formats(info['formats'])
info.update({
'id': video_id,
'title': title,
'duration': parse_duration(self._search_regex(
r'id=["\']video_duree["\'][^>]*>([^<]+)',
webpage, 'duration', fatal=False)),
})
return info

View File

@ -5,7 +5,10 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
js_to_json, js_to_json,
smuggle_url, smuggle_url,
@ -206,23 +209,41 @@ class CBCWatchBaseIE(InfoExtractor):
def _call_api(self, path, video_id): def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path url = path if path.startswith('http') else self._API_BASE_URL + path
for _ in range(2):
try:
result = self._download_xml(url, video_id, headers={ result = self._download_xml(url, video_id, headers={
'X-Clearleap-DeviceId': self._device_id, 'X-Clearleap-DeviceId': self._device_id,
'X-Clearleap-DeviceToken': self._device_token, 'X-Clearleap-DeviceToken': self._device_token,
}) })
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
# Device token has expired, re-acquiring device token
self._register_device()
continue
raise
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
if error_message: if error_message:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
return result return result
def _real_initialize(self): def _real_initialize(self):
if not self._device_id or not self._device_token: if self._valid_device_token():
return
device = self._downloader.cache.load('cbcwatch', 'device') or {} device = self._downloader.cache.load('cbcwatch', 'device') or {}
self._device_id, self._device_token = device.get('id'), device.get('token') self._device_id, self._device_token = device.get('id'), device.get('token')
if not self._device_id or not self._device_token: if self._valid_device_token():
return
self._register_device()
def _valid_device_token(self):
return self._device_id and self._device_token
def _register_device(self):
self._device_id = self._device_token = None
result = self._download_xml( result = self._download_xml(
self._API_BASE_URL + 'device/register', self._API_BASE_URL + 'device/register',
None, data=b'<device><type>web</type></device>') None, 'Acquiring device token',
data=b'<device><type>web</type></device>')
self._device_id = xpath_text(result, 'deviceId', fatal=True) self._device_id = xpath_text(result, 'deviceId', fatal=True)
self._device_token = xpath_text(result, 'deviceToken', fatal=True) self._device_token = xpath_text(result, 'deviceToken', fatal=True)
self._downloader.cache.store( self._downloader.cache.store(

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .theplatform import ThePlatformFeedIE from .theplatform import ThePlatformFeedIE
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
find_xpath_attr, find_xpath_attr,
xpath_element, xpath_element,
@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
asset_types = [] asset_types = []
subtitles = {} subtitles = {}
formats = [] formats = []
last_e = None
for item in items_data.findall('.//item'): for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType') asset_type = xpath_text(item, 'assetType')
if not asset_type or asset_type in asset_types: if not asset_type or asset_type in asset_types:
@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
query['formats'] = 'MPEG4,M3U' query['formats'] = 'MPEG4,M3U'
elif asset_type in ('RTMP', 'WIFI', '3G'): elif asset_type in ('RTMP', 'WIFI', '3G'):
query['formats'] = 'MPEG4,FLV' query['formats'] = 'MPEG4,FLV'
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil( tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query(tp_release_url, query), content_id, update_url_query(tp_release_url, query), content_id,
'Downloading %s SMIL data' % asset_type) 'Downloading %s SMIL data' % asset_type)
except ExtractorError as e:
last_e = e
continue
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
self._sort_formats(formats) self._sort_formats(formats)
info = self._extract_theplatform_metadata(tp_path, content_id) info = self._extract_theplatform_metadata(tp_path, content_id)

View File

@ -13,6 +13,7 @@ from ..utils import (
float_or_none, float_or_none,
sanitized_Request, sanitized_Request,
unescapeHTML, unescapeHTML,
update_url_query,
urlencode_postdata, urlencode_postdata,
USER_AGENTS, USER_AGENTS,
) )
@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
# iframe embed
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex( data_url = update_url_query(unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url')) r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
webpage, 'iframe player url', group='url')), query={
'autoStart': 'true',
})
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@ -644,19 +644,31 @@ class InfoExtractor(object):
content, _ = res content, _ = res
return content return content
def _download_xml_handle(
self, url_or_request, video_id, note='Downloading XML',
errnote='Unable to download XML', transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
if res is False:
return res
xml_string, urlh = res
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal), urlh
def _download_xml(self, url_or_request, video_id, def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML', note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None, transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}): data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element""" """Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage( res = self._download_xml_handle(
url_or_request, video_id, note, errnote, fatal=fatal, url_or_request, video_id, note=note, errnote=errnote,
encoding=encoding, data=data, headers=headers, query=query) transform_source=transform_source, fatal=fatal, encoding=encoding,
if xml_string is False: data=data, headers=headers, query=query)
return xml_string return res if res is False else res[0]
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal)
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source: if transform_source:
@ -1013,7 +1025,7 @@ class InfoExtractor(object):
}) })
for e in json_ld: for e in json_ld:
if e.get('@context') == 'http://schema.org': if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
item_type = e.get('@type') item_type = e.get('@type')
if expected_type is not None and expected_type != item_type: if expected_type is not None and expected_type != item_type:
return info return info
@ -1694,22 +1706,24 @@ class InfoExtractor(object):
}) })
return subtitles return subtitles
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
xspf = self._download_xml( xspf = self._download_xml(
playlist_url, playlist_id, 'Downloading xpsf playlist', xspf_url, playlist_id, 'Downloading xpsf playlist',
'Unable to download xspf manifest', fatal=fatal) 'Unable to download xspf manifest', fatal=fatal)
if xspf is False: if xspf is False:
return [] return []
return self._parse_xspf(xspf, playlist_id) return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url,
xspf_base_url=base_url(xspf_url))
def _parse_xspf(self, playlist, playlist_id): def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
NS_MAP = { NS_MAP = {
'xspf': 'http://xspf.org/ns/0/', 'xspf': 'http://xspf.org/ns/0/',
's1': 'http://static.streamone.nl/player/ns/0', 's1': 'http://static.streamone.nl/player/ns/0',
} }
entries = [] entries = []
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
title = xpath_text( title = xpath_text(
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
description = xpath_text( description = xpath_text(
@ -1719,12 +1733,18 @@ class InfoExtractor(object):
duration = float_or_none( duration = float_or_none(
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
formats = [{ formats = []
'url': location.text, for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
format_url = urljoin(xspf_base_url, location.text)
if not format_url:
continue
formats.append({
'url': format_url,
'manifest_url': xspf_url,
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] })
self._sort_formats(formats) self._sort_formats(formats)
entries.append({ entries.append({
@ -1738,18 +1758,18 @@ class InfoExtractor(object):
return entries return entries
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
res = self._download_webpage_handle( res = self._download_xml_handle(
mpd_url, video_id, mpd_url, video_id,
note=note or 'Downloading MPD manifest', note=note or 'Downloading MPD manifest',
errnote=errnote or 'Failed to download MPD manifest', errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal) fatal=fatal)
if res is False: if res is False:
return [] return []
mpd, urlh = res mpd_doc, urlh = res
mpd_base_url = base_url(urlh.geturl()) mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
formats_dict=formats_dict, mpd_url=mpd_url) formats_dict=formats_dict, mpd_url=mpd_url)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
@ -2023,17 +2043,16 @@ class InfoExtractor(object):
return formats return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
res = self._download_webpage_handle( res = self._download_xml_handle(
ism_url, video_id, ism_url, video_id,
note=note or 'Downloading ISM manifest', note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest', errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal) fatal=fatal)
if res is False: if res is False:
return [] return []
ism, urlh = res ism_doc, urlh = res
return self._parse_ism_formats( return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
""" """
@ -2131,8 +2150,8 @@ class InfoExtractor(object):
return formats return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
def absolute_url(video_url): def absolute_url(item_url):
return compat_urlparse.urljoin(base_url, video_url) return urljoin(base_url, item_url)
def parse_content_type(content_type): def parse_content_type(content_type):
if not content_type: if not content_type:
@ -2189,7 +2208,7 @@ class InfoExtractor(object):
if src: if src:
_, formats = _media_formats(src, media_type) _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats) media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster') media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content: if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content): for source_tag in re.findall(r'<source[^>]+>', media_content):
source_attributes = extract_attributes(source_tag) source_attributes = extract_attributes(source_tag)

View File

@ -1,31 +1,45 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, division from __future__ import unicode_literals, division
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..compat import (
compat_str,
compat_HTTPError,
)
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
parse_age_limit,
parse_duration,
ExtractorError
)
class CrackleIE(InfoExtractor): class CrackleIE(InfoExtractor):
_GEO_COUNTRIES = ['US']
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', # geo restricted to CA
'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': { 'info_dict': {
'id': '2498934', 'id': '2502343',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Everybody Respects A Bloody Nose', 'title': 'Under The Night',
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). Theyre headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
'thumbnail': r're:^https?://.*\.jpg', 'duration': 2583,
'duration': 906, 'view_count': int,
'series': 'Comedians In Cars Getting Coffee', 'average_rating': 0,
'season_number': 8, 'age_limit': 14,
'episode_number': 4, 'genre': 'Action, Sci-Fi',
'subtitles': { 'creator': 'Allan Kroeker',
'en-US': [ 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
{'ext': 'vtt'}, 'release_year': 2000,
{'ext': 'tt'}, 'series': 'Andromeda',
] 'episode': 'Under The Night',
}, 'season_number': 1,
'episode_number': 1,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -33,109 +47,118 @@ class CrackleIE(InfoExtractor):
} }
} }
_THUMBNAIL_RES = [
(120, 90),
(208, 156),
(220, 124),
(220, 220),
(240, 180),
(250, 141),
(315, 236),
(320, 180),
(360, 203),
(400, 300),
(421, 316),
(460, 330),
(460, 460),
(462, 260),
(480, 270),
(587, 330),
(640, 480),
(700, 330),
(700, 394),
(854, 480),
(1024, 1024),
(1920, 1080),
]
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
_MEDIA_FILE_SLOTS = {
'c544.flv': {
'width': 544,
'height': 306,
},
'360p.mp4': {
'width': 640,
'height': 360,
},
'480p.mp4': {
'width': 852,
'height': 478,
},
'480p_1mbps.mp4': {
'width': 852,
'height': 478,
},
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
config_doc = self._download_xml( country_code = self._downloader.params.get('geo_bypass_country', None)
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', countries = [country_code] if country_code else (
video_id, 'Downloading config') 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
item = self._download_xml( last_e = None
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
video_id, headers=self.geo_verification_headers()).find('i') for country in countries:
title = item.attrib['t'] try:
media = self._download_json(
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
% (video_id, country), video_id,
'Downloading media JSON as %s' % country,
'Unable to download media JSON', query={
'disableProtocols': 'true',
'format': 'json'
})
except ExtractorError as e:
# 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
last_e = e
continue
raise
media_urls = media.get('MediaURLs')
if not media_urls or not isinstance(media_urls, list):
continue
title = media['Title']
formats = []
for e in media['MediaURLs']:
if e.get('UseDRM') is True:
continue
format_url = e.get('Path')
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
description = media.get('Description')
duration = int_or_none(media.get(
'DurationInSeconds')) or parse_duration(media.get('Duration'))
view_count = int_or_none(media.get('CountViews'))
average_rating = float_or_none(media.get('UserRating'))
age_limit = parse_age_limit(media.get('Rating'))
genre = media.get('Genre')
release_year = int_or_none(media.get('ReleaseYear'))
creator = media.get('Directors')
artist = media.get('Cast')
if media.get('MediaTypeDisplayValue') == 'Full Episode':
series = media.get('ShowName')
episode = title
season_number = int_or_none(media.get('Season'))
episode_number = int_or_none(media.get('Episode'))
else:
series = episode = season_number = episode_number = None
subtitles = {} subtitles = {}
formats = self._extract_m3u8_formats( cc_files = media.get('ClosedCaptionFiles')
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), if isinstance(cc_files, list):
video_id, 'mp4', m3u8_id='hls', fatal=None) for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue
cc_url = cc_file.get('Path')
if not cc_url or not isinstance(cc_url, compat_str):
continue
lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url})
thumbnails = [] thumbnails = []
path = item.attrib.get('p') images = media.get('Images')
if path: if isinstance(images, list):
for width, height in self._THUMBNAIL_RES: for image_key, image_url in images.items():
res = '%dx%d' % (width, height) mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
if not mobj:
continue
thumbnails.append({ thumbnails.append({
'id': res, 'url': image_url,
'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), 'width': int(mobj.group(1)),
'width': width, 'height': int(mobj.group(2)),
'height': height,
'resolution': res,
}) })
http_base_url = 'http://ahttp.crackle.com/' + path
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
formats.append({
'url': http_base_url + mfs_path,
'format_id': 'http-' + mfs_path.split('.')[0],
'width': mfs_info['width'],
'height': mfs_info['height'],
})
for cc in item.findall('cc'):
locale = cc.attrib.get('l')
v = cc.attrib.get('v')
if locale and v:
if locale not in subtitles:
subtitles[locale] = []
for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
subtitles.setdefault(locale, []).append({
'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
'ext': ext,
})
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': item.attrib.get('d'), 'description': description,
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, 'duration': duration,
'series': item.attrib.get('sn'), 'view_count': view_count,
'season_number': int_or_none(item.attrib.get('se')), 'average_rating': average_rating,
'episode_number': int_or_none(item.attrib.get('ep')), 'age_limit': age_limit,
'genre': genre,
'creator': creator,
'artist': artist,
'release_year': release_year,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }
raise last_e

View File

@ -2,26 +2,26 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
import json
from .amp import AMPIE from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError,
clean_html, clean_html,
ExtractorError,
int_or_none, int_or_none,
remove_end, parse_age_limit,
sanitized_Request, parse_duration,
urlencode_postdata unified_timestamp,
) )
class DramaFeverBaseIE(AMPIE): class DramaFeverBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever' _NETRC_MACHINE = 'dramafever'
_GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV' _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -38,8 +38,8 @@ class DramaFeverBaseIE(AMPIE):
'consumer secret', default=self._CONSUMER_SECRET) 'consumer secret', default=self._CONSUMER_SECRET)
def _real_initialize(self): def _real_initialize(self):
self._login()
self._consumer_secret = self._get_consumer_secret() self._consumer_secret = self._get_consumer_secret()
self._login()
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
@ -51,18 +51,28 @@ class DramaFeverBaseIE(AMPIE):
'password': password, 'password': password,
} }
request = sanitized_Request( try:
self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_json(
response = self._download_webpage( 'https://www.dramafever.com/api/users/login', None, 'Logging in',
request, None, 'Logging in') data=json.dumps(login_form).encode('utf-8'), headers={
'x-consumer-key': self._consumer_secret,
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
response = self._parse_json(
e.cause.read().decode('utf-8'), None)
else:
raise
if all(logout_pattern not in response # Successful login
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): if response.get('result') or response.get('guid') or response.get('user_guid'):
error = self._html_search_regex( return
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None) errors = response.get('errors')
if error: if errors and isinstance(errors, list):
raise ExtractorError('Unable to login: %s' % error, expected=True) error = errors[0]
message = error.get('message') or error['reason']
raise ExtractorError('Unable to login: %s' % message, expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
@ -70,18 +80,20 @@ class DramaFeverIE(DramaFeverBaseIE):
IE_NAME = 'dramafever' IE_NAME = 'dramafever'
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)' _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
'info_dict': { 'info_dict': {
'id': '4512.1', 'id': '4274.1',
'ext': 'flv', 'ext': 'wvm',
'title': 'Cooking with Shin', 'title': 'Heirs - Episode 1',
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'description': 'md5:362a24ba18209f6276e032a651c50bc2',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 3783,
'timestamp': 1381354993,
'upload_date': '20131009',
'series': 'Heirs',
'season_number': 1,
'episode': 'Episode 1', 'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 344,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -110,50 +122,95 @@ class DramaFeverIE(DramaFeverBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _call_api(self, path, video_id, note, fatal=False):
return self._download_json(
'https://www.dramafever.com/api/5/' + path,
video_id, note=note, headers={
'x-consumer-key': self._consumer_secret,
}, fatal=fatal)
def _get_subtitles(self, video_id):
subtitles = {}
subs = self._call_api(
'video/%s/subtitles/webvtt/' % video_id, video_id,
'Downloading subtitles JSON', fatal=False)
if not subs or not isinstance(subs, list):
return subtitles
for sub in subs:
if not isinstance(sub, dict):
continue
sub_url = sub.get('url')
if not sub_url or not isinstance(sub_url, compat_str):
continue
subtitles.setdefault(
sub.get('code') or sub.get('language') or 'en', []).append({
'url': sub_url
})
return subtitles
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.') video_id = self._match_id(url).replace('/', '.')
try:
info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
self.raise_geo_restricted(
msg='Currently unavailable in your country',
countries=self._GEO_COUNTRIES)
raise
# title is postfixed with video id for some reason, removing
if info.get('title'):
info['title'] = remove_end(info['title'], video_id).strip()
series_id, episode_number = video_id.split('.') series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
# and dealing with page number as with episode number
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
% (self._consumer_secret, series_id, episode_number),
video_id, 'Downloading episode info JSON', fatal=False)
if episode_info:
value = episode_info.get('value')
if isinstance(value, list):
for v in value:
if v.get('type') == 'Episode':
subfile = v.get('subfile') or v.get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
info.setdefault('subtitles', {}).setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
episode_number = int_or_none(v.get('number'))
episode_fallback = 'Episode'
if episode_number:
episode_fallback += ' %d' % episode_number
info['episode'] = v.get('title') or episode_fallback
info['episode_number'] = episode_number
break
return info video = self._call_api(
'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
'Downloading video JSON')
formats = []
download_assets = video.get('download_assets')
if download_assets and isinstance(download_assets, dict):
for format_id, format_dict in download_assets.items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url or not isinstance(format_url, compat_str):
continue
formats.append({
'url': format_url,
'format_id': format_id,
'filesize': int_or_none(video.get('filesize')),
})
stream = self._call_api(
'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
fatal=False)
if stream:
stream_url = stream.get('stream_url')
if stream_url:
formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
title = video.get('title') or 'Episode %s' % episode_number
description = video.get('description')
thumbnail = video.get('thumbnail')
timestamp = unified_timestamp(video.get('release_date'))
duration = parse_duration(video.get('duration'))
age_limit = parse_age_limit(video.get('tv_rating'))
series = video.get('series_title')
season_number = int_or_none(video.get('season'))
if series:
title = '%s - %s' % (series, title)
subtitles = self.extract_subtitles(video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'series': series,
'season_number': season_number,
'episode_number': int_or_none(episode_number),
'formats': formats,
'subtitles': subtitles,
}
class DramaFeverSeriesIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -66,7 +66,9 @@ class DrTuberIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex(
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
r'<title>([^<]+)\s*@\s+DrTuber',
r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'), r'<title>([^<]+) - \d+'),
webpage, 'title') webpage, 'title')

View File

@ -532,13 +532,14 @@ from .lcp import (
) )
from .learnr import LearnrIE from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE from .lecture2go import Lecture2GoIE
from .lego import LEGOIE
from .lemonde import LemondeIE
from .leeco import ( from .leeco import (
LeIE, LeIE,
LePlaylistIE, LePlaylistIE,
LetvCloudIE, LetvCloudIE,
) )
from .lego import LEGOIE
from .lemonde import LemondeIE
from .lenta import LentaIE
from .libraryofcongress import LibraryOfCongressIE from .libraryofcongress import LibraryOfCongressIE
from .libsyn import LibsynIE from .libsyn import LibsynIE
from .lifenews import ( from .lifenews import (
@ -1030,6 +1031,7 @@ from .sunporno import SunPornoIE
from .svt import ( from .svt import (
SVTIE, SVTIE,
SVTPlayIE, SVTPlayIE,
SVTSeriesIE,
) )
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE from .syfy import SyfyIE
@ -1135,6 +1137,7 @@ from .tvnoe import TVNoeIE
from .tvnow import ( from .tvnow import (
TVNowIE, TVNowIE,
TVNowListIE, TVNowListIE,
TVNowShowIE,
) )
from .tvp import ( from .tvp import (
TVPEmbedIE, TVPEmbedIE,

View File

@ -41,7 +41,7 @@ class FXNetworksIE(AdobePassIE):
if 'The content you are trying to access is not available in your region.' in webpage: if 'The content you are trying to access is not available in your region.' in webpage:
self.raise_geo_restricted() self.raise_geo_restricted()
video_data = extract_attributes(self._search_regex( video_data = extract_attributes(self._search_regex(
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
release_url = video_data['rel'] release_url = video_data['rel']
title = video_data['data-title'] title = video_data['data-title']

View File

@ -58,6 +58,7 @@ from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE from .drtuber import DrTuberIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .tube8 import Tube8IE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
@ -104,6 +105,7 @@ from .mediasite import MediasiteIE
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .yapfiles import YapFilesIE from .yapfiles import YapFilesIE
from .vice import ViceIE from .vice import ViceIE
from .xfileshare import XFileShareIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1269,24 +1271,6 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
# EaglePlatform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
'params': {
'skip_download': True,
},
},
# referrer protected EaglePlatform embed # referrer protected EaglePlatform embed
{ {
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
@ -1984,7 +1968,17 @@ class GenericIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} },
{
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
'md5': 'b68d276de422ab07ee1d49388103f457',
'info_dict': {
'id': '83645793',
'title': 'Lock up and get excited',
'ext': 'mp4'
},
'skip': 'TODO: fix nested playlists processing in tests',
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2231,7 +2225,11 @@ class GenericIE(InfoExtractor):
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
return smil return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
xspf_base_url=compat_str(full_response.geturl())),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, doc,
@ -2559,6 +2557,11 @@ class GenericIE(InfoExtractor):
if redtube_urls: if redtube_urls:
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
# Look for embedded Tube8 player
tube8_urls = Tube8IE._extract_urls(webpage)
if tube8_urls:
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
# Look for embedded Tvigle player # Look for embedded Tvigle player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@ -2971,6 +2974,18 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
vice_urls, video_id, video_title, ie=ViceIE.ie_key()) vice_urls, video_id, video_title, ie=ViceIE.ie_key())
xfileshare_urls = XFileShareIE._extract_urls(webpage)
if xfileshare_urls:
return self.playlist_from_matches(
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]
if sharevideos_urls:
return self.playlist_from_matches(
sharevideos_urls, video_id, video_title)
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import hashlib
import json import json
import re import re
@ -237,23 +239,43 @@ class InstagramUserIE(InfoExtractor):
} }
} }
def _entries(self, uploader_id): def _entries(self, data):
def get_count(suffix): def get_count(suffix):
return int_or_none(try_get( return int_or_none(try_get(
node, lambda x: x['edge_media_' + suffix]['count'])) node, lambda x: x['edge_media_' + suffix]['count']))
edges = self._download_json( uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
'https://www.instagram.com/graphql/query/', uploader_id, query={ csrf_token = data['config']['csrf_token']
'query_hash': '472f257a40c653c64c666ce877d59d2b', rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
'variables': json.dumps({
self._set_cookie('instagram.com', 'ig_pr', '1')
cursor = ''
for page_num in itertools.count(1):
variables = json.dumps({
'id': uploader_id, 'id': uploader_id,
'first': 999999999, 'first': 100,
'after': cursor,
}) })
})['data']['user']['edge_owner_to_timeline_media']['edges'] s = '%s:%s:%s' % (rhx_gis, csrf_token, variables)
media = self._download_json(
'https://www.instagram.com/graphql/query/', uploader_id,
'Downloading JSON page %d' % page_num, headers={
'X-Requested-With': 'XMLHttpRequest',
'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(),
}, query={
'query_hash': '472f257a40c653c64c666ce877d59d2b',
'variables': variables,
})['data']['user']['edge_owner_to_timeline_media']
edges = media.get('edges')
if not edges or not isinstance(edges, list):
break
for edge in edges: for edge in edges:
node = edge['node'] node = edge.get('node')
if not node or not isinstance(node, dict):
continue
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
continue continue
video_id = node.get('shortcode') video_id = node.get('shortcode')
@ -285,11 +307,27 @@ class InstagramUserIE(InfoExtractor):
yield info yield info
page_info = media.get('page_info')
if not page_info or not isinstance(page_info, dict):
break
has_next_page = page_info.get('has_next_page')
if not has_next_page:
break
cursor = page_info.get('end_cursor')
if not cursor or not isinstance(cursor, compat_str):
break
def _real_extract(self, url): def _real_extract(self, url):
username = self._match_id(url) username = self._match_id(url)
uploader_id = self._download_json(
'https://instagram.com/%s/' % username, username, query={ webpage = self._download_webpage(url, username)
'__a': 1,
})['graphql']['user']['id'] data = self._parse_json(
self._search_regex(
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
username)
return self.playlist_result( return self.playlist_result(
self._entries(uploader_id), username, username) self._entries(data), username, username)

View File

@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class LentaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
'info_dict': {
'id': '964400',
'ext': 'mp4',
'title': 'Надежду Савченко задержали',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 61,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
# EaglePlatform iframe embed
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
default=None)
if video_id:
return self.url_result(
'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
ie='EaglePlatform', video_id=video_id)
return self.url_result(url, ie='Generic')

View File

@ -1,24 +1,28 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import (
parse_duration,
unified_strdate,
)
class LibsynIE(InfoExtractor): class LibsynIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
'md5': '443360ee1b58007bc3dcf09b41d093bb', 'md5': '2a55e75496c790cdeb058e7e6c087746',
'info_dict': { 'info_dict': {
'id': '3377616', 'id': '6385796',
'ext': 'mp3', 'ext': 'mp3',
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", 'title': "Champion Minded - Developing a Growth Mindset",
'description': 'md5:601cb790edd05908957dae8aaa866465', 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
'upload_date': '20150220', 'upload_date': '20180320',
'thumbnail': 're:^https?://.*', 'thumbnail': 're:^https?://.*',
}, },
}, { }, {
@ -39,31 +43,45 @@ class LibsynIE(InfoExtractor):
url = m.group('mainurl') url = m.group('mainurl')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
formats = [{
'url': media_url,
} for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
podcast_title = self._search_regex( podcast_title = self._search_regex(
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None) r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
if podcast_title:
podcast_title = podcast_title.strip()
episode_title = self._search_regex( episode_title = self._search_regex(
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title') r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
if episode_title:
episode_title = episode_title.strip()
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
description = self._html_search_regex( description = self._html_search_regex(
r'<div id="info_text_body">(.+?)</div>', webpage, r'<p\s+id="info_text_body">(.+?)</p>', webpage,
'description', default=None) 'description', default=None)
thumbnail = self._search_regex( if description:
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', # Strip non-breaking and normal spaces
webpage, 'thumbnail', fatal=False) description = description.replace('\u00A0', ' ').strip()
release_date = unified_strdate(self._search_regex( release_date = unified_strdate(self._search_regex(
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
data = json.loads(data_json)
formats = [{
'url': data['media_url'],
'format_id': 'main',
}, {
'url': data['media_url_libsyn'],
'format_id': 'libsyn',
}]
thumbnail = data.get('thumbnail_url')
duration = parse_duration(data.get('duration'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': release_date, 'upload_date': release_date,
'duration': duration,
'formats': formats, 'formats': formats,
} }

View File

@ -7,7 +7,7 @@ from ..utils import int_or_none
class LiveLeakIE(InfoExtractor): class LiveLeakIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)' _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680', 'url': 'http://www.liveleak.com/view?i=757_1364311680',
'md5': '0813c2430bea7a46bf13acf3406992f4', 'md5': '0813c2430bea7a46bf13acf3406992f4',
@ -79,6 +79,9 @@ class LiveLeakIE(InfoExtractor):
'title': 'Fuel Depot in China Explosion caught on video', 'title': 'Fuel Depot in China Explosion caught on video',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
vod_id = config.get('vodId') or self._search_regex( vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"', (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'), r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None) webpage, 'video_id', default=None)

View File

@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
class NationalGeographicIE(ThePlatformIE, AdobePassIE): class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo' IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
'md5': '518c9aa655686cf81493af5cc21e2a04', 'md5': '518c9aa655686cf81493af5cc21e2a04',
'info_dict': { 'info_dict': {
'id': 'vKInpacll2pC', 'id': 'vKInpacll2pC',
@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, },
{ {
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
'md5': 'c4912f656b4cbe58f3e000c489360989', 'md5': 'c4912f656b4cbe58f3e000c489360989',
'info_dict': { 'info_dict': {
'id': 'Pok5lWCkiEFA', 'id': 'Pok5lWCkiEFA',
@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
{ {
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/', 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
'only_matching': True, 'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'only_matching': True,
} }
] ]

View File

@ -1,8 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -43,9 +41,14 @@ class NaverIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', vid = self._search_regex(
webpage) r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
if m_id is None: 'video id', fatal=None, group='value')
in_key = self._search_regex(
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'key', default=None, group='value')
if not vid or not in_key:
error = self._html_search_regex( error = self._html_search_regex(
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
webpage, 'error', default=None) webpage, 'error', default=None)
@ -53,9 +56,9 @@ class NaverIE(InfoExtractor):
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('couldn\'t extract vid and key') raise ExtractorError('couldn\'t extract vid and key')
video_data = self._download_json( video_data = self._download_json(
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
video_id, query={ video_id, query={
'key': m_id.group(2), 'key': in_key,
}) })
meta = video_data['meta'] meta = video_data['meta']
title = meta['subject'] title = meta['subject']

View File

@ -4,15 +4,17 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
ExtractorError, smuggle_url,
) )
class NineNowIE(InfoExtractor): class NineNowIE(InfoExtractor):
IE_NAME = '9now.com.au' IE_NAME = '9now.com.au'
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
_TESTS = [{ _TESTS = [{
# clip # clip
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': self._GEO_COUNTRIES}),
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': common_data.get('description'), 'description': common_data.get('description'),

View File

@ -19,7 +19,18 @@ from ..utils import (
class OdnoklassnikiIE(InfoExtractor): class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)' _VALID_URL = r'''(?x)
https?://
(?:(?:www|m|mobile)\.)?
(?:odnoklassniki|ok)\.ru/
(?:
video(?:embed)?/|
web-api/video/moviePlayer/|
live/|
dk\?.*?st\.mvId=
)
(?P<id>[\d-]+)
'''
_TESTS = [{ _TESTS = [{
# metadata in JSON # metadata in JSON
'url': 'http://ok.ru/video/20079905452', 'url': 'http://ok.ru/video/20079905452',
@ -101,6 +112,9 @@ class OdnoklassnikiIE(InfoExtractor):
}, { }, {
'url': 'https://www.ok.ru/live/484531969818', 'url': 'https://www.ok.ru/live/484531969818',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o', 'url': 'https://openload.co/f/kUEfGclsU9o',
@ -298,6 +298,9 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.stream/f/KnG-kKZdcfY', 'url': 'https://oload.stream/f/KnG-kKZdcfY',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
'only_matching': True,
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@ -334,10 +337,11 @@ class OpenloadIE(InfoExtractor):
decoded_id = (get_element_by_id('streamurl', webpage) or decoded_id = (get_element_by_id('streamurl', webpage) or
get_element_by_id('streamuri', webpage) or get_element_by_id('streamuri', webpage) or
get_element_by_id('streamurj', webpage)) get_element_by_id('streamurj', webpage) or
self._search_regex(
if not decoded_id: (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
raise ExtractorError('Can\'t find stream URL', video_id=video_id) r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage,
'stream URL'))
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id

View File

@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[\da-z]+) (?P<id>[\da-z]+)
@ -264,7 +264,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/playlist/4667351', 'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': { 'info_dict': {
@ -272,11 +272,14 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
'title': 'Nataly Hot', 'title': 'Nataly Hot',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}] }]
class PornHubUserVideosIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos' _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': { 'info_dict': {
@ -305,6 +308,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
# Most Viewed Videos # Most Viewed Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
(?: (?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
)\.(?:de|at|ch)| )\.(?:de|at|ch)|
ran\.de|fem\.com|advopedia\.de ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
) )
/(?P<id>.+) /(?P<id>.+)
''' '''
@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
'only_matching': True, 'only_matching': True,
}, },
{
# geo restricted to Germany
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
'only_matching': True,
},
{ {
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
'only_matching': True, 'only_matching': True,
@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)', r'clip[iI]d=(\d+)',
r'clip[iI]d\s*=\s*["\'](\d+)', r'clip[iI][dD]\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
r'proMamsId&quot;\s*:\s*&quot;(\d+)', r'proMamsId&quot;\s*:\s*&quot;(\d+)',
r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)',

View File

@ -4,22 +4,30 @@ from __future__ import unicode_literals
import re import re
from .brightcove import BrightcoveNewIE from .brightcove import BrightcoveNewIE
from ..utils import update_url_query from ..compat import compat_str
from ..utils import (
try_get,
update_url_query,
)
class SevenPlusIE(BrightcoveNewIE): class SevenPlusIE(BrightcoveNewIE):
IE_NAME = '7plus' IE_NAME = '7plus'
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
'info_dict': { 'info_dict': {
'id': 'BEAT-001', 'id': 'MTYS7-003',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', 'title': 'S7 E3 - Wind Surf',
'description': 'md5:37718bea20a8eedaca7f7361af566131', 'description': 'md5:29c6a69f21accda7601278f81b46483d',
'uploader_id': '5303576322001', 'uploader_id': '5303576322001',
'upload_date': '20171031', 'upload_date': '20171201',
'timestamp': 1509440068, 'timestamp': 1512106377,
'series': 'Mighty Ships',
'season_number': 7,
'episode_number': 3,
'episode': 'Wind Surf',
}, },
'params': { 'params': {
'format': 'bestvideo', 'format': 'bestvideo',
@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
value = item.get(src_key) value = item.get(src_key)
if value: if value:
info[dst_key] = value info[dst_key] = value
info['series'] = try_get(
item, lambda x: x['seriesLogo']['name'], compat_str)
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
if mobj:
info.update({
'season_number': int(mobj.group(1)),
'episode_number': int(mobj.group(2)),
'episode': mobj.group(3),
})
return info return info

View File

@ -75,6 +75,9 @@ class SteamIE(InfoExtractor):
gameID = m.group('gameID') gameID = m.group('gameID')
playlist_id = gameID playlist_id = gameID
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
self._set_cookie('steampowered.com', 'mature_content', '1')
webpage = self._download_webpage(videourl, playlist_id) webpage = self._download_webpage(videourl, playlist_id)
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:

View File

@ -4,11 +4,17 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
try_get, try_get,
urljoin,
compat_str,
) )
@ -122,7 +128,11 @@ class SVTIE(SVTBaseIE):
return info_dict return info_dict
class SVTPlayIE(SVTBaseIE): class SVTPlayBaseIE(SVTBaseIE):
_SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
@ -157,8 +167,8 @@ class SVTPlayIE(SVTBaseIE):
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'root\["__svtplay"\]\s*=\s*([^;]+);', self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
webpage, 'embedded data', default='{}'), group='json'),
video_id, fatal=False) video_id, fatal=False)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
@ -189,3 +199,84 @@ class SVTPlayIE(SVTBaseIE):
r'\s*\|\s*.+?$', '', r'\s*\|\s*.+?$', '',
info_dict.get('episode') or self._og_search_title(webpage)) info_dict.get('episode') or self._og_search_title(webpage))
return info_dict return info_dict
class SVTSeriesIE(SVTPlayBaseIE):
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://www.svtplay.se/rederiet',
'info_dict': {
'id': 'rederiet',
'title': 'Rederiet',
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
},
'playlist_mincount': 318,
}, {
'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
'info_dict': {
'id': 'rederiet-sasong2',
'title': 'Rederiet - Säsong 2',
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
},
'playlist_count': 12,
}]
@classmethod
def suitable(cls, url):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
def _real_extract(self, url):
series_id = self._match_id(url)
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
season_slug = qs.get('tab', [None])[0]
if season_slug:
series_id += '-%s' % season_slug
webpage = self._download_webpage(
url, series_id, 'Downloading series page')
root = self._parse_json(
self._search_regex(
self._SVTPLAY_RE, webpage, 'content', group='json'),
series_id)
season_name = None
entries = []
for season in root['relatedVideoContent']['relatedVideosAccordion']:
if not isinstance(season, dict):
continue
if season_slug:
if season.get('slug') != season_slug:
continue
season_name = season.get('name')
videos = season.get('videos')
if not isinstance(videos, list):
continue
for video in videos:
content_url = video.get('contentUrl')
if not content_url or not isinstance(content_url, compat_str):
continue
entries.append(
self.url_result(
urljoin(url, content_url),
ie=SVTPlayIE.ie_key(),
video_title=video.get('title')
))
metadata = root.get('metaData')
if not isinstance(metadata, dict):
metadata = {}
title = metadata.get('title')
season_name = season_name or season_slug
if title and season_name:
title = '%s - %s' % (title, season_name)
elif season_slug:
title = season_slug
return self.playlist_result(
entries, series_id, title, metadata.get('description'))

View File

@ -31,6 +31,12 @@ class Tube8IE(KeezMoviesIE):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
webpage, info = self._extract_info(url) webpage, info = self._extract_info(url)

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
parse_duration, parse_duration,
try_get,
update_url_query, update_url_query,
) )
@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor):
duration = parse_duration(info.get('duration')) duration = parse_duration(info.get('duration'))
f = info.get('format', {}) f = info.get('format', {})
thumbnails = [{
'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
}]
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
if thumbnail:
thumbnails.append({
'url': thumbnail,
})
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'series': f.get('title'), 'series': f.get('title'),
@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor):
class TVNowIE(TVNowBaseIE): class TVNowIE(TVNowBaseIE):
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)/
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE):
}, { }, {
# rtl2 # rtl2
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
'only_matching': 'True', 'only_matching': True,
}, { }, {
# rtlnitro # rtlnitro
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
'only_matching': 'True', 'only_matching': True,
}, { }, {
# superrtl # superrtl
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
'only_matching': 'True', 'only_matching': True,
}, { }, {
# ntv # ntv
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
'only_matching': 'True', 'only_matching': True,
}, { }, {
# vox # vox
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
'only_matching': 'True', 'only_matching': True,
}, { }, {
# rtlplus # rtlplus
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
'only_matching': 'True', 'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -133,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
return self._extract_video(info, display_id) return self._extract_video(info, display_id)
class TVNowListIE(TVNowBaseIE): class TVNowListBaseIE(TVNowBaseIE):
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$' _SHOW_VALID_URL = r'''(?x)
(?P<base_url>
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)
)
'''
def _extract_list_info(self, display_id, show_id):
fields = list(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
return self._call_api(
'formats/seo', display_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
class TVNowListIE(TVNowListBaseIE):
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('title', ) _SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
@ -147,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
'title': '30 Minuten Deutschland - Aktuell', 'title': '30 Minuten Deutschland - Aktuell',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, {
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url)
else super(TVNowListIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
fields = [] list_info = self._extract_list_info(season_id, show_id)
fields.extend(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._call_api(
'formats/seo', season_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
season = next( season = next(
season for season in list_info['formatTabs']['items'] season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id) if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline']) title = list_info.get('title')
headline = season.get('headline')
if title and headline:
title = '%s - %s' % (title, headline)
else:
title = headline or title
entries = [] entries = []
for container in season['formatTabPages']['items']: for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: items = try_get(
container, lambda x: x['container']['movies']['items'],
list) or []
for info in items:
seo_url = info.get('seoUrl') seo_url = info.get('seoUrl')
if not seo_url: if not seo_url:
continue continue
video_id = info.get('id')
entries.append(self.url_result( entries.append(self.url_result(
base_url + seo_url + '/player', 'TVNow', info.get('id'))) '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
compat_str(video_id) if video_id else None))
return self.playlist_result( return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title) entries, compat_str(season.get('id') or season_id), title)
class TVNowShowIE(TVNowListBaseIE):
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('id', 'title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
_VIDEO_FIELDS = ()
_TESTS = [{
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
'info_dict': {
'id': 'ab-ins-beet',
'title': 'Ab ins Beet!',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
else super(TVNowShowIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id = re.match(self._VALID_URL, url).groups()
list_info = self._extract_list_info(show_id, show_id)
entries = []
for season_info in list_info['formatTabs']['items']:
season_url = season_info.get('seoheadline')
if not season_url:
continue
season_id = season_info.get('id')
entries.append(self.url_result(
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
compat_str(season_id) if season_id else None,
season_info.get('headline')))
return self.playlist_result(entries, show_id, list_info.get('title'))

View File

@ -14,7 +14,7 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor): class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video' IE_NAME = '24video'
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.24video.net/video/view/1044982', 'url': 'http://www.24video.net/video/view/1044982',

View File

@ -28,7 +28,7 @@ from ..utils import (
class TwitchBaseIE(InfoExtractor): class TwitchBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv' _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv' _API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net' _USHER_BASE = 'https://usher.ttvnw.net'
@ -226,7 +226,7 @@ class TwitchVodIE(TwitchItemBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/| (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v player\.twitch\.tv/\?.*?\bvideo=v
) )
(?P<id>\d+) (?P<id>\d+)
@ -279,6 +279,9 @@ class TwitchVodIE(TwitchItemBaseIE):
}, { }, {
'url': 'https://www.twitch.tv/videos/6528877', 'url': 'https://www.twitch.tv/videos/6528877',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -390,14 +393,17 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_TYPE = 'profile' _PLAYLIST_TYPE = 'profile'
_TEST = { _TESTS = [{
'url': 'http://www.twitch.tv/vanillatv/profile', 'url': 'http://www.twitch.tv/vanillatv/profile',
'info_dict': { 'info_dict': {
'id': 'vanillatv', 'id': 'vanillatv',
'title': 'VanillaTV', 'title': 'VanillaTV',
}, },
'playlist_mincount': 412, 'playlist_mincount': 412,
} }, {
'url': 'http://m.twitch.tv/vanillatv/profile',
'only_matching': True,
}]
class TwitchVideosBaseIE(TwitchPlaylistBaseIE): class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
@ -411,14 +417,17 @@ class TwitchAllVideosIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
_PLAYLIST_TYPE = 'all videos' _PLAYLIST_TYPE = 'all videos'
_TEST = { _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/all', 'url': 'https://www.twitch.tv/spamfish/videos/all',
'info_dict': { 'info_dict': {
'id': 'spamfish', 'id': 'spamfish',
'title': 'Spamfish', 'title': 'Spamfish',
}, },
'playlist_mincount': 869, 'playlist_mincount': 869,
} }, {
'url': 'https://m.twitch.tv/spamfish/videos/all',
'only_matching': True,
}]
class TwitchUploadsIE(TwitchVideosBaseIE): class TwitchUploadsIE(TwitchVideosBaseIE):
@ -427,14 +436,17 @@ class TwitchUploadsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
_PLAYLIST_TYPE = 'uploads' _PLAYLIST_TYPE = 'uploads'
_TEST = { _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/uploads', 'url': 'https://www.twitch.tv/spamfish/videos/uploads',
'info_dict': { 'info_dict': {
'id': 'spamfish', 'id': 'spamfish',
'title': 'Spamfish', 'title': 'Spamfish',
}, },
'playlist_mincount': 0, 'playlist_mincount': 0,
} }, {
'url': 'https://m.twitch.tv/spamfish/videos/uploads',
'only_matching': True,
}]
class TwitchPastBroadcastsIE(TwitchVideosBaseIE): class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
@ -443,14 +455,17 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
_PLAYLIST_TYPE = 'past broadcasts' _PLAYLIST_TYPE = 'past broadcasts'
_TEST = { _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
'info_dict': { 'info_dict': {
'id': 'spamfish', 'id': 'spamfish',
'title': 'Spamfish', 'title': 'Spamfish',
}, },
'playlist_mincount': 0, 'playlist_mincount': 0,
} }, {
'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
'only_matching': True,
}]
class TwitchHighlightsIE(TwitchVideosBaseIE): class TwitchHighlightsIE(TwitchVideosBaseIE):
@ -459,14 +474,17 @@ class TwitchHighlightsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
_PLAYLIST_TYPE = 'highlights' _PLAYLIST_TYPE = 'highlights'
_TEST = { _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/highlights', 'url': 'https://www.twitch.tv/spamfish/videos/highlights',
'info_dict': { 'info_dict': {
'id': 'spamfish', 'id': 'spamfish',
'title': 'Spamfish', 'title': 'Spamfish',
}, },
'playlist_mincount': 805, 'playlist_mincount': 805,
} }, {
'url': 'https://m.twitch.tv/spamfish/videos/highlights',
'only_matching': True,
}]
class TwitchStreamIE(TwitchBaseIE): class TwitchStreamIE(TwitchBaseIE):
@ -474,7 +492,7 @@ class TwitchStreamIE(TwitchBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:(?:www|go)\.)?twitch\.tv/| (?:(?:www|go|m)\.)?twitch\.tv/|
player\.twitch\.tv/\?.*?\bchannel= player\.twitch\.tv/\?.*?\bchannel=
) )
(?P<id>[^/#?]+) (?P<id>[^/#?]+)
@ -508,6 +526,9 @@ class TwitchStreamIE(TwitchBaseIE):
}, { }, {
'url': 'https://go.twitch.tv/food', 'url': 'https://go.twitch.tv/food',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://m.twitch.tv/food',
'only_matching': True,
}] }]
@classmethod @classmethod

View File

@ -16,7 +16,7 @@ from ..utils import (
class VideaIE(InfoExtractor): class VideaIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
videa\.hu/ videa(?:kid)?\.hu/
(?: (?:
videok/(?:[^/]+/)*[^?#&]+-| videok/(?:[^/]+/)*[^?#&]+-|
player\?.*?\bv=| player\?.*?\bv=|
@ -31,7 +31,7 @@ class VideaIE(InfoExtractor):
'id': '8YfIAjxwWGwT8HVQ', 'id': '8YfIAjxwWGwT8HVQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Az őrült kígyász 285 kígyót enged szabadon', 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', 'thumbnail': r're:^https?://.*',
'duration': 21, 'duration': 21,
}, },
}, { }, {
@ -43,6 +43,15 @@ class VideaIE(InfoExtractor):
}, { }, {
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
'only_matching': True,
}, {
'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
'only_matching': True,
}, {
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -12,7 +12,7 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urlparse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
@ -39,11 +39,11 @@ class VRVBaseIE(InfoExtractor):
data = json.dumps(data).encode() data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json' headers['Content-Type'] = 'application/json'
method = 'POST' if data else 'GET' method = 'POST' if data else 'GET'
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')]) base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new( oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode() base_string.encode(), hashlib.sha1).digest()).decode()
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '') encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
return self._download_json( return self._download_json(
'?'.join([base_url, encoded_query]), video_id, '?'.join([base_url, encoded_query]), video_id,
note='Downloading %s JSON metadata' % note, headers=headers, data=data) note='Downloading %s JSON metadata' % note, headers=headers, data=data)

View File

@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
'only_matching': True 'only_matching': True
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')

View File

@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
group='title') or self._og_search_title(webpage) group='title') or self._og_search_title(webpage)
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False) (r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
r'url_bigthumb=(?P<thumbnail>.+?)&amp'),
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._og_search_property( duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration( 'duration', webpage, default=None)) or parse_duration(
self._search_regex( self._search_regex(

View File

@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
# request basic data # request basic data
basic_data_params = { basic_data_params = {
'vid': video_id, 'vid': video_id,
'ccode': '0507', 'ccode': '0590',
'client_ip': '192.168.1.1', 'client_ip': '192.168.1.1',
'utid': cna, 'utid': cna,
'client_ts': time.time() / 1000, 'client_ts': time.time() / 1000,

View File

@ -676,7 +676,8 @@ def parseOpts(overrideArguments=None):
filesystem.add_option( filesystem.add_option(
'-a', '--batch-file', '-a', '--batch-file',
dest='batchfile', metavar='FILE', dest='batchfile', metavar='FILE',
help='File containing URLs to download (\'-\' for stdin)') help="File containing URLs to download ('-' for stdin), one URL per line. "
"Lines starting with '#', ';' or ']' are considered as comments and ignored.")
filesystem.add_option( filesystem.add_option(
'--id', default=False, '--id', default=False,
action='store_true', dest='useid', help='Use only video ID in file name') action='store_true', dest='useid', help='Use only video ID in file name')

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.03.14' __version__ = '2018.04.09'