mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-12 14:10:29 +08:00
Merge remote-tracking branch 'upstream/master' into myversion
This commit is contained in:
commit
f0fe336307
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.03.14
|
||||
[debug] youtube-dl version 2018.04.09
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
81
ChangeLog
81
ChangeLog
@ -1,3 +1,84 @@
|
||||
version 2018.04.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Do not save/restore console title while simulate (#16103)
|
||||
* [extractor/common] Relax JSON-LD context check (#16006)
|
||||
|
||||
Extractors
|
||||
+ [generic] Add support for tube8 embeds
|
||||
+ [generic] Add support for share-videos.se embeds (#16089, #16115)
|
||||
* [odnoklassniki] Extend URL regular expression (#16081)
|
||||
* [steam] Bypass mature content check (#16113)
|
||||
+ [acast] Extract more metadata
|
||||
* [acast] Fix extraction (#16118)
|
||||
* [instagram:user] Fix extraction (#16119)
|
||||
* [drtuber] Fix title extraction (#16107, #16108)
|
||||
* [liveleak] Extend URL regular expression (#16117)
|
||||
+ [openload] Add support for oload.xyz
|
||||
* [openload] Relax stream URL regular expression
|
||||
* [openload] Fix extraction (#16099)
|
||||
+ [svtplay:series] Add support for season URLs
|
||||
+ [svtplay:series] Add support for series (#11130, #16059)
|
||||
|
||||
|
||||
version 2018.04.03
|
||||
|
||||
Extractors
|
||||
+ [tvnow] Add support for shows (#15837)
|
||||
* [dramafever] Fix authentication (#16067)
|
||||
* [afreecatv] Use partial view only when necessary (#14450)
|
||||
+ [afreecatv] Add support for authentication (#14450)
|
||||
+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
|
||||
* [xvideos] Fix thumbnail extraction (#15978, #15979)
|
||||
* [medialaan] Fix vod id (#16038)
|
||||
+ [openload] Add support for oload.site (#16039)
|
||||
* [naver] Fix extraction (#16029)
|
||||
* [dramafever] Partially switch to API v5 (#16026)
|
||||
* [abc:iview] Unescape title and series meta fields (#15994)
|
||||
* [videa] Extend URL regular expression (#16003)
|
||||
|
||||
|
||||
version 2018.03.26.1
|
||||
|
||||
Core
|
||||
+ [downloader/external] Add elapsed time to progress hook (#10876)
|
||||
* [downloader/external,fragment] Fix download finalization when writing file
|
||||
to stdout (#10809, #10876, #15799)
|
||||
|
||||
Extractors
|
||||
* [vrv] Fix extraction on python2 (#15928)
|
||||
* [afreecatv] Update referrer (#15947)
|
||||
+ [24video] Add support for 24video.sexy (#15973)
|
||||
* [crackle] Bypass geo restriction
|
||||
* [crackle] Fix extraction (#15969)
|
||||
+ [lenta] Add support for lenta.ru (#15953)
|
||||
+ [instagram:user] Add pagination (#15934)
|
||||
* [youku] Update ccode (#15939)
|
||||
* [libsyn] Adapt to new page structure
|
||||
|
||||
|
||||
version 2018.03.20
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve thumbnail extraction for HTML5 entries
|
||||
* Generalize XML manifest processing code and improve XSPF parsing
|
||||
+ [extractor/common] Add _download_xml_handle
|
||||
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
|
||||
|
||||
Extractors
|
||||
+ [7plus] Extract series metadata (#15862, #15906)
|
||||
* [9now] Bypass geo restriction (#15920)
|
||||
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
|
||||
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
|
||||
+ [ceskatelevize] Add support for iframe embeds (#15918)
|
||||
+ [prosiebensat1] Add support for galileo.tv (#15894)
|
||||
+ [generic] Add support for xfileshare embeds (#15879)
|
||||
* [bilibili] Switch to v2 playurl API
|
||||
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
|
||||
* [heise] Improve extraction (#15496, #15784, #15026)
|
||||
* [instagram] Fix user videos extraction (#15858)
|
||||
|
||||
|
||||
version 2018.03.14
|
||||
|
||||
Extractors
|
||||
|
@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||
stdin)
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored.
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
|
@ -419,6 +419,7 @@
|
||||
- **Lecture2Go**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
@ -803,6 +804,7 @@
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
@ -886,6 +888,7 @@
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
- **TVNowShow**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
|
@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
self.ie._sort_formats(formats)
|
||||
expect_value(self, formats, expected_formats, None)
|
||||
|
||||
def test_parse_xspf(self):
|
||||
_TEST_CASES = [
|
||||
(
|
||||
'foo_xspf',
|
||||
'https://example.org/src/foo_xspf.xspf',
|
||||
[{
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Pandemonium',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 202.416,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/cd1/track%201.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Final Cartridge (Nichico Twelve Remix)',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 255.857,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
|
||||
}],
|
||||
}, {
|
||||
'id': 'foo_xspf',
|
||||
'title': 'Rebuilding Nightingale',
|
||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||
'duration': 287.915,
|
||||
'formats': [{
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.org/src/track3.mp3',
|
||||
}, {
|
||||
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||
'url': 'https://example.com/track3.mp3',
|
||||
}]
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||
mode='r', encoding='utf-8') as f:
|
||||
entries = self.ie._parse_xspf(
|
||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||
expect_value(self, entries, expected_entries, None)
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<playlist version="1" xmlns="http://xspf.org/ns/0/">
|
||||
<date>2018-03-09T18:01:43Z</date>
|
||||
<trackList>
|
||||
<track>
|
||||
<location>cd1/track%201.mp3</location>
|
||||
<title>Pandemonium</title>
|
||||
<creator>Foilverb</creator>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>1</trackNum>
|
||||
<duration>202416</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
|
||||
<title>Final Cartridge (Nichico Twelve Remix)</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>2</trackNum>
|
||||
<duration>255857</duration>
|
||||
</track>
|
||||
<track>
|
||||
<location>track3.mp3</location>
|
||||
<location>https://example.com/track3.mp3</location>
|
||||
<title>Rebuilding Nightingale</title>
|
||||
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||
<creator>Foilverb</creator>
|
||||
<album>Pandemonium EP</album>
|
||||
<trackNum>3</trackNum>
|
||||
<duration>287915</duration>
|
||||
</track>
|
||||
</trackList>
|
||||
</playlist>
|
@ -532,6 +532,8 @@ class YoutubeDL(object):
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
self._write_string('\033[22;0t', self._screen_file)
|
||||
@ -539,6 +541,8 @@ class YoutubeDL(object):
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
self._write_string('\033[23;0t', self._screen_file)
|
||||
|
@ -249,12 +249,13 @@ class FileDownloader(object):
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
else:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template = '100%%'
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template += ' of %(_total_bytes_str)s'
|
||||
if s.get('elapsed') is not None:
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||
else:
|
||||
msg_template = '100%% of %(_total_bytes_str)s'
|
||||
msg_template += ' in %(_elapsed_str)s'
|
||||
self._report_progress_status(
|
||||
msg_template % s, is_last_line=True)
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@ -30,6 +31,7 @@ class ExternalFD(FileDownloader):
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
@ -41,15 +43,20 @@ class ExternalFD(FileDownloader):
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
status = {
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
|
@ -241,12 +241,16 @@ class FragmentFD(FileDownloader):
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
if ctx['tmpfilename'] == '-':
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
else:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'downloaded_bytes': downloaded_bytes,
|
||||
'total_bytes': downloaded_bytes,
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -109,16 +110,17 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
|
||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZW0898A003S00',
|
||||
'id': 'ZY9247A021S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 5 Ep 3',
|
||||
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
|
||||
'upload_date': '20171228',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1514499187,
|
||||
'title': "Gaston's Visit",
|
||||
'series': "Ben And Holly's Little Kingdom",
|
||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
||||
'upload_date': '20180318',
|
||||
'uploader_id': 'abc4kids',
|
||||
'timestamp': 1521400959,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -169,12 +171,12 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': unescapeHTML(title),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
|
@ -7,7 +7,9 @@ import functools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
@ -24,40 +26,58 @@ class ACastIE(InfoExtractor):
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'duration': 211,
|
||||
'creator': 'Concierge',
|
||||
'series': 'Condé Nast Traveler Podcast',
|
||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'duration': 2766,
|
||||
'duration': 2766.602563,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
media_url = s['url']
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
|
||||
e = cast_data['result']['episode']
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e['name']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': e['mediaUrl'],
|
||||
'title': e['name'],
|
||||
'description': e.get('description'),
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('description') or e.get('summary'),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': int_or_none(e.get('duration')),
|
||||
'duration': float_or_none(s.get('duration') or e.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'afreecatv'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult video
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
'info_dict': {
|
||||
'id': '20171001_F1AE1711_196617479_1',
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]서아 초심 찾기 방송 (part 1)',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'BJ서아',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20171001',
|
||||
'duration': 3600,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'szWork': 'login',
|
||||
'szType': 'json',
|
||||
'szUid': username,
|
||||
'szPassword': password,
|
||||
'isSaveId': 'false',
|
||||
'szScriptVar': 'oLoginRet',
|
||||
'szAction': '',
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
|
||||
result = int_or_none(response.get('RESULT'))
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -188,21 +235,41 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, headers={
|
||||
'Referer': 'http://vod.afreecatv.com/embed.php',
|
||||
}, query={
|
||||
partial_view = False
|
||||
for _ in range(2):
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, 'Downloading video info XML%s'
|
||||
% (' (skipping adult)' if partial_view else ''),
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
}, query=query)
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag != 'SUCCEED':
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self._downloader.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
partial_view = True
|
||||
continue
|
||||
elif flag == 'ADULT':
|
||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
||||
else:
|
||||
error = flag
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
|
@ -117,9 +117,9 @@ class BiliBiliIE(InfoExtractor):
|
||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'1<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
|
@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
|
||||
webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
||||
if video_url.startswith('rtmp://'):
|
||||
@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
if formats:
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
info = self._parse_html5_media_entries(url, webpage, url)[0]
|
||||
|
||||
return {
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False)),
|
||||
})
|
||||
return info
|
||||
|
@ -5,7 +5,10 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
@ -206,30 +209,48 @@ class CBCWatchBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||
result = self._download_xml(url, video_id, headers={
|
||||
'X-Clearleap-DeviceId': self._device_id,
|
||||
'X-Clearleap-DeviceToken': self._device_token,
|
||||
})
|
||||
for _ in range(2):
|
||||
try:
|
||||
result = self._download_xml(url, video_id, headers={
|
||||
'X-Clearleap-DeviceId': self._device_id,
|
||||
'X-Clearleap-DeviceToken': self._device_token,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
# Device token has expired, re-acquiring device token
|
||||
self._register_device()
|
||||
continue
|
||||
raise
|
||||
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
|
||||
if error_message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
|
||||
return result
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._device_id or not self._device_token:
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if not self._device_id or not self._device_token:
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
if self._valid_device_token():
|
||||
return
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if self._valid_device_token():
|
||||
return
|
||||
self._register_device()
|
||||
|
||||
def _valid_device_token(self):
|
||||
return self._device_id and self._device_token
|
||||
|
||||
def _register_device(self):
|
||||
self._device_id = self._device_token = None
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, 'Acquiring device token',
|
||||
data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
|
||||
def _parse_rss_feed(self, rss):
|
||||
channel = xpath_element(rss, 'channel', fatal=True)
|
||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
xpath_element,
|
||||
@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
last_e = None
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = unescapeHTML(self._search_regex(
|
||||
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'iframe player url', group='url'))
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
@ -644,19 +644,31 @@ class InfoExtractor(object):
|
||||
content, _ = res
|
||||
return content
|
||||
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
return self._parse_xml(
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal)
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
@ -1013,7 +1025,7 @@ class InfoExtractor(object):
|
||||
})
|
||||
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
@ -1694,22 +1706,24 @@ class InfoExtractor(object):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
xspf_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
xspf_base_url=base_url(xspf_url))
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
@ -1719,12 +1733,18 @@ class InfoExtractor(object):
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
formats = []
|
||||
for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
|
||||
format_url = urljoin(xspf_base_url, location.text)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'manifest_url': xspf_url,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
@ -1738,18 +1758,18 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_doc, urlh = res
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
@ -2023,17 +2043,16 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||
res = self._download_webpage_handle(
|
||||
res = self._download_xml_handle(
|
||||
ism_url, video_id,
|
||||
note=note or 'Downloading ISM manifest',
|
||||
errnote=errnote or 'Failed to download ISM manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
ism, urlh = res
|
||||
ism_doc, urlh = res
|
||||
|
||||
return self._parse_ism_formats(
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
@ -2131,8 +2150,8 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
@ -2189,7 +2208,7 @@ class InfoExtractor(object):
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
|
@ -1,31 +1,45 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
# geo restricted to CA
|
||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||
'info_dict': {
|
||||
'id': '2498934',
|
||||
'id': '2502343',
|
||||
'ext': 'mp4',
|
||||
'title': 'Everybody Respects A Bloody Nose',
|
||||
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 906,
|
||||
'series': 'Comedians In Cars Getting Coffee',
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [
|
||||
{'ext': 'vtt'},
|
||||
{'ext': 'tt'},
|
||||
]
|
||||
},
|
||||
'title': 'Under The Night',
|
||||
'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
|
||||
'duration': 2583,
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
'age_limit': 14,
|
||||
'genre': 'Action, Sci-Fi',
|
||||
'creator': 'Allan Kroeker',
|
||||
'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
|
||||
'release_year': 2000,
|
||||
'series': 'Andromeda',
|
||||
'episode': 'Under The Night',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -33,109 +47,118 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
(120, 90),
|
||||
(208, 156),
|
||||
(220, 124),
|
||||
(220, 220),
|
||||
(240, 180),
|
||||
(250, 141),
|
||||
(315, 236),
|
||||
(320, 180),
|
||||
(360, 203),
|
||||
(400, 300),
|
||||
(421, 316),
|
||||
(460, 330),
|
||||
(460, 460),
|
||||
(462, 260),
|
||||
(480, 270),
|
||||
(587, 330),
|
||||
(640, 480),
|
||||
(700, 330),
|
||||
(700, 394),
|
||||
(854, 480),
|
||||
(1024, 1024),
|
||||
(1920, 1080),
|
||||
]
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
'height': 306,
|
||||
},
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config_doc = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||
video_id, 'Downloading config')
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
countries = [country_code] if country_code else (
|
||||
'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers()).find('i')
|
||||
title = item.attrib['t']
|
||||
last_e = None
|
||||
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnails = []
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
for width, height in self._THUMBNAIL_RES:
|
||||
res = '%dx%d' % (width, height)
|
||||
thumbnails.append({
|
||||
'id': res,
|
||||
'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'resolution': res,
|
||||
})
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
'url': http_base_url + mfs_path,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
for cc in item.findall('cc'):
|
||||
locale = cc.attrib.get('l')
|
||||
v = cc.attrib.get('v')
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
|
||||
subtitles.setdefault(locale, []).append({
|
||||
'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
for country in countries:
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
|
||||
% (video_id, country), video_id,
|
||||
'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', query={
|
||||
'disableProtocols': 'true',
|
||||
'format': 'json'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
last_e = e
|
||||
continue
|
||||
raise
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
media_urls = media.get('MediaURLs')
|
||||
if not media_urls or not isinstance(media_urls, list):
|
||||
continue
|
||||
|
||||
title = media['Title']
|
||||
|
||||
formats = []
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media.get('Description')
|
||||
duration = int_or_none(media.get(
|
||||
'DurationInSeconds')) or parse_duration(media.get('Duration'))
|
||||
view_count = int_or_none(media.get('CountViews'))
|
||||
average_rating = float_or_none(media.get('UserRating'))
|
||||
age_limit = parse_age_limit(media.get('Rating'))
|
||||
genre = media.get('Genre')
|
||||
release_year = int_or_none(media.get('ReleaseYear'))
|
||||
creator = media.get('Directors')
|
||||
artist = media.get('Cast')
|
||||
|
||||
if media.get('MediaTypeDisplayValue') == 'Full Episode':
|
||||
series = media.get('ShowName')
|
||||
episode = title
|
||||
season_number = int_or_none(media.get('Season'))
|
||||
episode_number = int_or_none(media.get('Episode'))
|
||||
else:
|
||||
series = episode = season_number = episode_number = None
|
||||
|
||||
subtitles = {}
|
||||
cc_files = media.get('ClosedCaptionFiles')
|
||||
if isinstance(cc_files, list):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
||||
thumbnails = []
|
||||
images = media.get('Images')
|
||||
if isinstance(images, list):
|
||||
for image_key, image_url in images.items():
|
||||
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
|
||||
if not mobj:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'age_limit': age_limit,
|
||||
'genre': genre,
|
||||
'creator': creator,
|
||||
'artist': artist,
|
||||
'release_year': release_year,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
raise last_e
|
||||
|
@ -2,26 +2,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
urlencode_postdata
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverBaseIE(AMPIE):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
@ -38,8 +38,8 @@ class DramaFeverBaseIE(AMPIE):
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self._consumer_secret = self._get_consumer_secret()
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@ -51,37 +51,49 @@ class DramaFeverBaseIE(AMPIE):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://www.dramafever.com/api/users/login', None, 'Logging in',
|
||||
data=json.dumps(login_form).encode('utf-8'), headers={
|
||||
'x-consumer-key': self._consumer_secret,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
# Successful login
|
||||
if response.get('result') or response.get('guid') or response.get('user_guid'):
|
||||
return
|
||||
|
||||
errors = response.get('errors')
|
||||
if errors and isinstance(errors, list):
|
||||
error = errors[0]
|
||||
message = error.get('message') or error['reason']
|
||||
raise ExtractorError('Unable to login: %s' % message, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
|
||||
'info_dict': {
|
||||
'id': '4512.1',
|
||||
'ext': 'flv',
|
||||
'title': 'Cooking with Shin',
|
||||
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||
'id': '4274.1',
|
||||
'ext': 'wvm',
|
||||
'title': 'Heirs - Episode 1',
|
||||
'description': 'md5:362a24ba18209f6276e032a651c50bc2',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3783,
|
||||
'timestamp': 1381354993,
|
||||
'upload_date': '20131009',
|
||||
'series': 'Heirs',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1404336058,
|
||||
'upload_date': '20140702',
|
||||
'duration': 344,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -110,50 +122,95 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, video_id, note, fatal=False):
|
||||
return self._download_json(
|
||||
'https://www.dramafever.com/api/5/' + path,
|
||||
video_id, note=note, headers={
|
||||
'x-consumer-key': self._consumer_secret,
|
||||
}, fatal=fatal)
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
subtitles = {}
|
||||
subs = self._call_api(
|
||||
'video/%s/subtitles/webvtt/' % video_id, video_id,
|
||||
'Downloading subtitles JSON', fatal=False)
|
||||
if not subs or not isinstance(subs, list):
|
||||
return subtitles
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
'url': sub_url
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', '.')
|
||||
|
||||
try:
|
||||
info = self._extract_feed_info(
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
self.raise_geo_restricted(
|
||||
msg='Currently unavailable in your country',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise
|
||||
|
||||
# title is postfixed with video id for some reason, removing
|
||||
if info.get('title'):
|
||||
info['title'] = remove_end(info['title'], video_id).strip()
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
episode_info = self._download_json(
|
||||
# We only need a single episode info, so restricting page size to one episode
|
||||
# and dealing with page number as with episode number
|
||||
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
|
||||
% (self._consumer_secret, series_id, episode_number),
|
||||
video_id, 'Downloading episode info JSON', fatal=False)
|
||||
if episode_info:
|
||||
value = episode_info.get('value')
|
||||
if isinstance(value, list):
|
||||
for v in value:
|
||||
if v.get('type') == 'Episode':
|
||||
subfile = v.get('subfile') or v.get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
info.setdefault('subtitles', {}).setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
episode_number = int_or_none(v.get('number'))
|
||||
episode_fallback = 'Episode'
|
||||
if episode_number:
|
||||
episode_fallback += ' %d' % episode_number
|
||||
info['episode'] = v.get('title') or episode_fallback
|
||||
info['episode_number'] = episode_number
|
||||
break
|
||||
|
||||
return info
|
||||
video = self._call_api(
|
||||
'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
|
||||
'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
download_assets = video.get('download_assets')
|
||||
if download_assets and isinstance(download_assets, dict):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'filesize': int_or_none(video.get('filesize')),
|
||||
})
|
||||
|
||||
stream = self._call_api(
|
||||
'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
|
||||
fatal=False)
|
||||
if stream:
|
||||
stream_url = stream.get('stream_url')
|
||||
if stream_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video.get('title') or 'Episode %s' % episode_number
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnail')
|
||||
timestamp = unified_timestamp(video.get('release_date'))
|
||||
duration = parse_duration(video.get('duration'))
|
||||
age_limit = parse_age_limit(video.get('tv_rating'))
|
||||
series = video.get('series_title')
|
||||
season_number = int_or_none(video.get('season'))
|
||||
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
|
@ -66,7 +66,9 @@ class DrTuberIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||
r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
@ -532,13 +532,14 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .lenta import LentaIE
|
||||
from .libraryofcongress import LibraryOfCongressIE
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
@ -1030,6 +1031,7 @@ from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
@ -1135,6 +1137,7 @@ from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowListIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
|
@ -41,7 +41,7 @@ class FXNetworksIE(AdobePassIE):
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
|
@ -58,6 +58,7 @@ from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .redtube import RedTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
@ -104,6 +105,7 @@ from .mediasite import MediasiteIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .yapfiles import YapFilesIE
|
||||
from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -1269,24 +1271,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
# EaglePlatform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# referrer protected EaglePlatform embed
|
||||
{
|
||||
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
|
||||
@ -1984,7 +1968,17 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
|
||||
'md5': 'b68d276de422ab07ee1d49388103f457',
|
||||
'info_dict': {
|
||||
'id': '83645793',
|
||||
'title': 'Lock up and get excited',
|
||||
'ext': 'mp4'
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2231,7 +2225,11 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(smil['formats'])
|
||||
return smil
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=compat_str(full_response.geturl())),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc,
|
||||
@ -2559,6 +2557,11 @@ class GenericIE(InfoExtractor):
|
||||
if redtube_urls:
|
||||
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
|
||||
|
||||
# Look for embedded Tube8 player
|
||||
tube8_urls = Tube8IE._extract_urls(webpage)
|
||||
if tube8_urls:
|
||||
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@ -2971,6 +2974,18 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
|
||||
|
||||
xfileshare_urls = XFileShareIE._extract_urls(webpage)
|
||||
if xfileshare_urls:
|
||||
return self.playlist_from_matches(
|
||||
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
if sharevideos_urls:
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
|
||||
@ -237,59 +239,95 @@ class InstagramUserIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
def _entries(self, uploader_id):
|
||||
def _entries(self, data):
|
||||
def get_count(suffix):
|
||||
return int_or_none(try_get(
|
||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||
|
||||
edges = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id, query={
|
||||
'query_hash': '472f257a40c653c64c666ce877d59d2b',
|
||||
'variables': json.dumps({
|
||||
'id': uploader_id,
|
||||
'first': 999999999,
|
||||
})
|
||||
})['data']['user']['edge_owner_to_timeline_media']['edges']
|
||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
csrf_token = data['config']['csrf_token']
|
||||
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
|
||||
|
||||
for edge in edges:
|
||||
node = edge['node']
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
|
||||
continue
|
||||
video_id = node.get('shortcode')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
info = self.url_result(
|
||||
'https://instagram.com/p/%s/' % video_id,
|
||||
ie=InstagramIE.ie_key(), video_id=video_id)
|
||||
|
||||
description = try_get(
|
||||
node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str)
|
||||
thumbnail = node.get('thumbnail_src') or node.get('display_src')
|
||||
timestamp = int_or_none(node.get('taken_at_timestamp'))
|
||||
|
||||
comment_count = get_count('to_comment')
|
||||
like_count = get_count('preview_like')
|
||||
view_count = int_or_none(node.get('video_view_count'))
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'view_count': view_count,
|
||||
cursor = ''
|
||||
for page_num in itertools.count(1):
|
||||
variables = json.dumps({
|
||||
'id': uploader_id,
|
||||
'first': 100,
|
||||
'after': cursor,
|
||||
})
|
||||
s = '%s:%s:%s' % (rhx_gis, csrf_token, variables)
|
||||
media = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id,
|
||||
'Downloading JSON page %d' % page_num, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(),
|
||||
}, query={
|
||||
'query_hash': '472f257a40c653c64c666ce877d59d2b',
|
||||
'variables': variables,
|
||||
})['data']['user']['edge_owner_to_timeline_media']
|
||||
|
||||
yield info
|
||||
edges = media.get('edges')
|
||||
if not edges or not isinstance(edges, list):
|
||||
break
|
||||
|
||||
for edge in edges:
|
||||
node = edge.get('node')
|
||||
if not node or not isinstance(node, dict):
|
||||
continue
|
||||
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
|
||||
continue
|
||||
video_id = node.get('shortcode')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
info = self.url_result(
|
||||
'https://instagram.com/p/%s/' % video_id,
|
||||
ie=InstagramIE.ie_key(), video_id=video_id)
|
||||
|
||||
description = try_get(
|
||||
node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str)
|
||||
thumbnail = node.get('thumbnail_src') or node.get('display_src')
|
||||
timestamp = int_or_none(node.get('taken_at_timestamp'))
|
||||
|
||||
comment_count = get_count('to_comment')
|
||||
like_count = get_count('preview_like')
|
||||
view_count = int_or_none(node.get('video_view_count'))
|
||||
|
||||
info.update({
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'view_count': view_count,
|
||||
})
|
||||
|
||||
yield info
|
||||
|
||||
page_info = media.get('page_info')
|
||||
if not page_info or not isinstance(page_info, dict):
|
||||
break
|
||||
|
||||
has_next_page = page_info.get('has_next_page')
|
||||
if not has_next_page:
|
||||
break
|
||||
|
||||
cursor = page_info.get('end_cursor')
|
||||
if not cursor or not isinstance(cursor, compat_str):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
uploader_id = self._download_json(
|
||||
'https://instagram.com/%s/' % username, username, query={
|
||||
'__a': 1,
|
||||
})['graphql']['user']['id']
|
||||
|
||||
webpage = self._download_webpage(url, username)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||
username)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(uploader_id), username, username)
|
||||
self._entries(data), username, username)
|
||||
|
53
youtube_dl/extractor/lenta.py
Normal file
53
youtube_dl/extractor/lenta.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LentaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
|
||||
'info_dict': {
|
||||
'id': '964400',
|
||||
'ext': 'mp4',
|
||||
'title': 'Надежду Савченко задержали',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 61,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# EaglePlatform iframe embed
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
|
||||
default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
|
||||
ie='EaglePlatform', video_id=video_id)
|
||||
|
||||
return self.url_result(url, ie='Generic')
|
@ -1,24 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
|
||||
'md5': '2a55e75496c790cdeb058e7e6c087746',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'id': '6385796',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
'title': "Champion Minded - Developing a Growth Mindset",
|
||||
'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||
'upload_date': '20180320',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
@ -39,31 +43,45 @@ class LibsynIE(InfoExtractor):
|
||||
url = m.group('mainurl')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
} for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
|
||||
if podcast_title:
|
||||
podcast_title = podcast_title.strip()
|
||||
episode_title = self._search_regex(
|
||||
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||
r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
|
||||
if episode_title:
|
||||
episode_title = episode_title.strip()
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
if description:
|
||||
# Strip non-breaking and normal spaces
|
||||
description = description.replace('\u00A0', ' ').strip()
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
|
||||
data = json.loads(data_json)
|
||||
|
||||
formats = [{
|
||||
'url': data['media_url'],
|
||||
'format_id': 'main',
|
||||
}, {
|
||||
'url': data['media_url_libsyn'],
|
||||
'format_id': 'libsyn',
|
||||
}]
|
||||
thumbnail = data.get('thumbnail_url')
|
||||
duration = parse_duration(data.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': release_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
@ -79,6 +79,9 @@ class LiveLeakIE(InfoExtractor):
|
||||
'title': 'Fuel Depot in China Explosion caught on video',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
|
||||
|
||||
vod_id = config.get('vodId') or self._search_regex(
|
||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||
r'"vodId"\s*:\s*"(.+?)"',
|
||||
r'<[^>]+id=["\']vod-(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
|
||||
|
@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'vKInpacll2pC',
|
||||
@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': 'Pok5lWCkiEFA',
|
||||
@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -43,9 +41,14 @@ class NaverIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
vid = self._search_regex(
|
||||
r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'video id', fatal=None, group='value')
|
||||
in_key = self._search_regex(
|
||||
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'key', default=None, group='value')
|
||||
|
||||
if not vid or not in_key:
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||
webpage, 'error', default=None)
|
||||
@ -53,9 +56,9 @@ class NaverIE(InfoExtractor):
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
video_data = self._download_json(
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
|
||||
video_id, query={
|
||||
'key': m_id.group(2),
|
||||
'key': in_key,
|
||||
})
|
||||
meta = video_data['meta']
|
||||
title = meta['subject']
|
||||
|
@ -4,15 +4,17 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': self._GEO_COUNTRIES}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': common_data.get('description'),
|
||||
|
@ -19,7 +19,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|m|mobile)\.)?
|
||||
(?:odnoklassniki|ok)\.ru/
|
||||
(?:
|
||||
video(?:embed)?/|
|
||||
web-api/video/moviePlayer/|
|
||||
live/|
|
||||
dk\?.*?st\.mvId=
|
||||
)
|
||||
(?P<id>[\d-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
@ -101,6 +112,9 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ok.ru/live/484531969818',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@ -298,6 +298,9 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.stream/f/KnG-kKZdcfY',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@ -334,10 +337,11 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
decoded_id = (get_element_by_id('streamurl', webpage) or
|
||||
get_element_by_id('streamuri', webpage) or
|
||||
get_element_by_id('streamurj', webpage))
|
||||
|
||||
if not decoded_id:
|
||||
raise ExtractorError('Can\'t find stream URL', video_id=video_id)
|
||||
get_element_by_id('streamurj', webpage) or
|
||||
self._search_regex(
|
||||
(r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
|
||||
r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage,
|
||||
'stream URL'))
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
|
@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
@ -264,7 +264,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||
'info_dict': {
|
||||
@ -272,11 +272,14 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
'title': 'Nataly Hot',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
@ -305,6 +308,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
# Most Viewed Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
(?:
|
||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
||||
)\.(?:de|at|ch)|
|
||||
ran\.de|fem\.com|advopedia\.de
|
||||
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||
)
|
||||
/(?P<id>.+)
|
||||
'''
|
||||
@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geo restricted to Germany
|
||||
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
||||
'only_matching': True,
|
||||
@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
||||
r'clip[iI][dD]\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
r'proMamsId"\s*:\s*"(\d+)',
|
||||
|
@ -4,22 +4,30 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import update_url_query
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SevenPlusIE(BrightcoveNewIE):
|
||||
IE_NAME = '7plus'
|
||||
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
|
||||
'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
|
||||
'info_dict': {
|
||||
'id': 'BEAT-001',
|
||||
'id': 'MTYS7-003',
|
||||
'ext': 'mp4',
|
||||
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
|
||||
'description': 'md5:37718bea20a8eedaca7f7361af566131',
|
||||
'title': 'S7 E3 - Wind Surf',
|
||||
'description': 'md5:29c6a69f21accda7601278f81b46483d',
|
||||
'uploader_id': '5303576322001',
|
||||
'upload_date': '20171031',
|
||||
'timestamp': 1509440068,
|
||||
'upload_date': '20171201',
|
||||
'timestamp': 1512106377,
|
||||
'series': 'Mighty Ships',
|
||||
'season_number': 7,
|
||||
'episode_number': 3,
|
||||
'episode': 'Wind Surf',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
value = item.get(src_key)
|
||||
if value:
|
||||
info[dst_key] = value
|
||||
info['series'] = try_get(
|
||||
item, lambda x: x['seriesLogo']['name'], compat_str)
|
||||
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
|
||||
if mobj:
|
||||
info.update({
|
||||
'season_number': int(mobj.group(1)),
|
||||
'episode_number': int(mobj.group(2)),
|
||||
'episode': mobj.group(3),
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -75,6 +75,9 @@ class SteamIE(InfoExtractor):
|
||||
gameID = m.group('gameID')
|
||||
playlist_id = gameID
|
||||
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
|
||||
|
||||
self._set_cookie('steampowered.com', 'mature_content', '1')
|
||||
|
||||
webpage = self._download_webpage(videourl, playlist_id)
|
||||
|
||||
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
||||
|
@ -4,11 +4,17 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@ -122,7 +128,11 @@ class SVTIE(SVTBaseIE):
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
class SVTPlayBaseIE(SVTBaseIE):
|
||||
_SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
|
||||
|
||||
|
||||
class SVTPlayIE(SVTPlayBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
@ -157,8 +167,8 @@ class SVTPlayIE(SVTBaseIE):
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'root\["__svtplay"\]\s*=\s*([^;]+);',
|
||||
webpage, 'embedded data', default='{}'),
|
||||
self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
|
||||
group='json'),
|
||||
video_id, fatal=False)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@ -189,3 +199,84 @@ class SVTPlayIE(SVTBaseIE):
|
||||
r'\s*\|\s*.+?$', '',
|
||||
info_dict.get('episode') or self._og_search_title(webpage))
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svtplay.se/rederiet',
|
||||
'info_dict': {
|
||||
'id': 'rederiet',
|
||||
'title': 'Rederiet',
|
||||
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
|
||||
},
|
||||
'playlist_mincount': 318,
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
|
||||
'info_dict': {
|
||||
'id': 'rederiet-sasong2',
|
||||
'title': 'Rederiet - Säsong 2',
|
||||
'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
season_slug = qs.get('tab', [None])[0]
|
||||
|
||||
if season_slug:
|
||||
series_id += '-%s' % season_slug
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, series_id, 'Downloading series page')
|
||||
|
||||
root = self._parse_json(
|
||||
self._search_regex(
|
||||
self._SVTPLAY_RE, webpage, 'content', group='json'),
|
||||
series_id)
|
||||
|
||||
season_name = None
|
||||
|
||||
entries = []
|
||||
for season in root['relatedVideoContent']['relatedVideosAccordion']:
|
||||
if not isinstance(season, dict):
|
||||
continue
|
||||
if season_slug:
|
||||
if season.get('slug') != season_slug:
|
||||
continue
|
||||
season_name = season.get('name')
|
||||
videos = season.get('videos')
|
||||
if not isinstance(videos, list):
|
||||
continue
|
||||
for video in videos:
|
||||
content_url = video.get('contentUrl')
|
||||
if not content_url or not isinstance(content_url, compat_str):
|
||||
continue
|
||||
entries.append(
|
||||
self.url_result(
|
||||
urljoin(url, content_url),
|
||||
ie=SVTPlayIE.ie_key(),
|
||||
video_title=video.get('title')
|
||||
))
|
||||
|
||||
metadata = root.get('metaData')
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
|
||||
title = metadata.get('title')
|
||||
season_name = season_name or season_slug
|
||||
|
||||
if title and season_name:
|
||||
title = '%s - %s' % (title, season_name)
|
||||
elif season_slug:
|
||||
title = season_slug
|
||||
|
||||
return self.playlist_result(
|
||||
entries, series_id, title, metadata.get('description'))
|
||||
|
@ -31,6 +31,12 @@ class Tube8IE(KeezMoviesIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor):
|
||||
duration = parse_duration(info.get('duration'))
|
||||
|
||||
f = info.get('format', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
|
||||
}]
|
||||
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||
if thumbnail:
|
||||
thumbnails.append({
|
||||
'url': thumbnail,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': f.get('title'),
|
||||
@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class TVNowIE(TVNowBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?P<show_id>[^/]+)/
|
||||
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
|
||||
@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE):
|
||||
}, {
|
||||
# rtl2
|
||||
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# rtlnitro
|
||||
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# superrtl
|
||||
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ntv
|
||||
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vox
|
||||
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# rtlplus
|
||||
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
|
||||
'only_matching': 'True',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -133,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
|
||||
return self._extract_video(info, display_id)
|
||||
|
||||
|
||||
class TVNowListIE(TVNowBaseIE):
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
|
||||
class TVNowListBaseIE(TVNowBaseIE):
|
||||
_SHOW_VALID_URL = r'''(?x)
|
||||
(?P<base_url>
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?P<show_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
|
||||
def _extract_list_info(self, display_id, show_id):
|
||||
fields = list(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
return self._call_api(
|
||||
'formats/seo', display_id, query={
|
||||
'fields': ','.join(fields),
|
||||
'name': show_id + '.php'
|
||||
})
|
||||
|
||||
|
||||
class TVNowListIE(TVNowListBaseIE):
|
||||
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
@ -147,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
|
||||
'title': '30 Minuten Deutschland - Aktuell',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url)
|
||||
else super(TVNowListIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
fields = []
|
||||
fields.extend(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
|
||||
list_info = self._call_api(
|
||||
'formats/seo', season_id, query={
|
||||
'fields': ','.join(fields),
|
||||
'name': show_id + '.php'
|
||||
})
|
||||
list_info = self._extract_list_info(season_id, show_id)
|
||||
|
||||
season = next(
|
||||
season for season in list_info['formatTabs']['items']
|
||||
if season.get('seoheadline') == season_id)
|
||||
|
||||
title = '%s - %s' % (list_info['title'], season['headline'])
|
||||
title = list_info.get('title')
|
||||
headline = season.get('headline')
|
||||
if title and headline:
|
||||
title = '%s - %s' % (title, headline)
|
||||
else:
|
||||
title = headline or title
|
||||
|
||||
entries = []
|
||||
for container in season['formatTabPages']['items']:
|
||||
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
|
||||
items = try_get(
|
||||
container, lambda x: x['container']['movies']['items'],
|
||||
list) or []
|
||||
for info in items:
|
||||
seo_url = info.get('seoUrl')
|
||||
if not seo_url:
|
||||
continue
|
||||
video_id = info.get('id')
|
||||
entries.append(self.url_result(
|
||||
base_url + seo_url + '/player', 'TVNow', info.get('id')))
|
||||
'%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
|
||||
compat_str(video_id) if video_id else None))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(season.get('id') or season_id), title)
|
||||
|
||||
|
||||
class TVNowShowIE(TVNowListBaseIE):
|
||||
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('id', 'title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
_VIDEO_FIELDS = ()
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
|
||||
'info_dict': {
|
||||
'id': 'ab-ins-beet',
|
||||
'title': 'Ab ins Beet!',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
|
||||
else super(TVNowShowIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
list_info = self._extract_list_info(show_id, show_id)
|
||||
|
||||
entries = []
|
||||
for season_info in list_info['formatTabs']['items']:
|
||||
season_url = season_info.get('seoheadline')
|
||||
if not season_url:
|
||||
continue
|
||||
season_id = season_info.get('id')
|
||||
entries.append(self.url_result(
|
||||
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
|
||||
compat_str(season_id) if season_id else None,
|
||||
season_info.get('headline')))
|
||||
|
||||
return self.playlist_result(entries, show_id, list_info.get('title'))
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
|
@ -28,7 +28,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TwitchBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||
@ -226,7 +226,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@ -279,6 +279,9 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/videos/6528877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -390,14 +393,17 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
|
||||
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_PLAYLIST_TYPE = 'profile'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/vanillatv/profile',
|
||||
'info_dict': {
|
||||
'id': 'vanillatv',
|
||||
'title': 'VanillaTV',
|
||||
},
|
||||
'playlist_mincount': 412,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.twitch.tv/vanillatv/profile',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
|
||||
@ -411,14 +417,17 @@ class TwitchAllVideosIE(TwitchVideosBaseIE):
|
||||
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
|
||||
_PLAYLIST_TYPE = 'all videos'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.twitch.tv/spamfish/videos/all',
|
||||
'info_dict': {
|
||||
'id': 'spamfish',
|
||||
'title': 'Spamfish',
|
||||
},
|
||||
'playlist_mincount': 869,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/spamfish/videos/all',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class TwitchUploadsIE(TwitchVideosBaseIE):
|
||||
@ -427,14 +436,17 @@ class TwitchUploadsIE(TwitchVideosBaseIE):
|
||||
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
|
||||
_PLAYLIST_TYPE = 'uploads'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.twitch.tv/spamfish/videos/uploads',
|
||||
'info_dict': {
|
||||
'id': 'spamfish',
|
||||
'title': 'Spamfish',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/spamfish/videos/uploads',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
|
||||
@ -443,14 +455,17 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
|
||||
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
|
||||
_PLAYLIST_TYPE = 'past broadcasts'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
|
||||
'info_dict': {
|
||||
'id': 'spamfish',
|
||||
'title': 'Spamfish',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class TwitchHighlightsIE(TwitchVideosBaseIE):
|
||||
@ -459,14 +474,17 @@ class TwitchHighlightsIE(TwitchVideosBaseIE):
|
||||
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
|
||||
_PLAYLIST_TYPE = 'highlights'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.twitch.tv/spamfish/videos/highlights',
|
||||
'info_dict': {
|
||||
'id': 'spamfish',
|
||||
'title': 'Spamfish',
|
||||
},
|
||||
'playlist_mincount': 805,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/spamfish/videos/highlights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
@ -474,7 +492,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|go)\.)?twitch\.tv/|
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/|
|
||||
player\.twitch\.tv/\?.*?\bchannel=
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@ -508,6 +526,9 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
}, {
|
||||
'url': 'https://go.twitch.tv/food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/food',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||
class VideaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
videa\.hu/
|
||||
videa(?:kid)?\.hu/
|
||||
(?:
|
||||
videok/(?:[^/]+/)*[^?#&]+-|
|
||||
player\?.*?\bv=|
|
||||
@ -31,7 +31,7 @@ class VideaIE(InfoExtractor):
|
||||
'id': '8YfIAjxwWGwT8HVQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||
'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 21,
|
||||
},
|
||||
}, {
|
||||
@ -43,6 +43,15 @@ class VideaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@ -12,7 +12,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
@ -39,11 +39,11 @@ class VRVBaseIE(InfoExtractor):
|
||||
data = json.dumps(data).encode()
|
||||
headers['Content-Type'] = 'application/json'
|
||||
method = 'POST' if data else 'GET'
|
||||
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
|
||||
base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
|
||||
oauth_signature = base64.b64encode(hmac.new(
|
||||
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
|
||||
base_string.encode(), hashlib.sha1).digest()).decode()
|
||||
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
|
||||
encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
|
||||
return self._download_json(
|
||||
'?'.join([base_url, encoded_query]), video_id,
|
||||
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
|
||||
|
@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
|
||||
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
|
||||
group='title') or self._og_search_title(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
(r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
|
||||
r'url_bigthumb=(?P<thumbnail>.+?)&'),
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, default=None)) or parse_duration(
|
||||
self._search_regex(
|
||||
|
@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0507',
|
||||
'ccode': '0590',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
|
@ -676,7 +676,8 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option(
|
||||
'-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE',
|
||||
help='File containing URLs to download (\'-\' for stdin)')
|
||||
help="File containing URLs to download ('-' for stdin), one URL per line. "
|
||||
"Lines starting with '#', ';' or ']' are considered as comments and ignored.")
|
||||
filesystem.add_option(
|
||||
'--id', default=False,
|
||||
action='store_true', dest='useid', help='Use only video ID in file name')
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.03.14'
|
||||
__version__ = '2018.04.09'
|
||||
|
Loading…
x
Reference in New Issue
Block a user