1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-10 20:07:17 +08:00

Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into ytdl-org-master

# Conflicts:
#	youtube_dl/extractor/vk.py
This commit is contained in:
Avichai Cohen 2019-10-29 12:56:14 +02:00
commit 2e7b96ec6e
27 changed files with 658 additions and 690 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.10.22 [debug] youtube-dl version 2019.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.10.22 [debug] youtube-dl version 2019.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,51 @@
version 2019.10.29
Core
* [utils] Actualize major IPv4 address blocks per country
Extractors
+ [go] Add support for abc.com and freeform.com (#22823, #22864)
+ [mtv] Add support for mtvjapan.com
* [mtv] Fix extraction for mtv.de (#22113)
* [videodetective] Fix extraction
* [internetvideoarchive] Fix extraction
* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
- [hark] Remove extractor
- [tutv] Remove extractor
- [learnr] Remove extractor
- [macgamestore] Remove extractor
* [la7] Update Kaltura service URL (#22358)
* [thesun] Fix extraction (#16966)
- [makertv] Remove extractor
+ [tenplay] Add support for 10play.com.au (#21446)
* [soundcloud] Improve extraction
* Improve format extraction (#22123)
+ Extract uploader_id and uploader_url (#21916)
+ Extract all known thumbnails (#19071, #20659)
* Fix extration for private playlists (#20976)
+ Add support for playlist embeds (#20976)
* Skip preview formats (#22806)
* [dplay] Improve extraction
+ Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
* Fix it.dplay.com extraction (#22826)
+ Extract creator, tags and thumbnails
* Handle playback API call errors
+ [discoverynetworks] Add support for dplay.co.uk
* [vk] Improve extraction
+ Add support for Odnoklassniki embeds
+ Extract more videos from user lists (#4470)
+ Fix wall post audio extraction (#18332)
* Improve error detection (#22568)
+ [odnoklassniki] Add support for embeds
* [puhutv] Improve extraction
* Fix subtitles extraction
* Transform HLS URLs to HTTP URLs
* Improve metadata extraction
* [ceskatelevize] Skip DRM media
+ [facebook] Extract subtitles (#22777)
* [globo] Handle alternative hash signing method
version 2019.10.22 version 2019.10.22
Core Core

View File

@ -232,7 +232,6 @@
- **DouyuShow** - **DouyuShow**
- **DouyuTV**: 斗鱼 - **DouyuTV**: 斗鱼
- **DPlay** - **DPlay**
- **DPlayIt**
- **DRBonanza** - **DRBonanza**
- **Dropbox** - **Dropbox**
- **DrTuber** - **DrTuber**
@ -339,7 +338,6 @@
- **Goshgay** - **Goshgay**
- **GPUTechConf** - **GPUTechConf**
- **Groupon** - **Groupon**
- **Hark**
- **hbo** - **hbo**
- **HearThisAt** - **HearThisAt**
- **Heise** - **Heise**
@ -432,7 +430,6 @@
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
- **Le**: 乐视网 - **Le**: 乐视网
- **Learnr**
- **Lecture2Go** - **Lecture2Go**
- **Lecturio** - **Lecturio**
- **LecturioCourse** - **LecturioCourse**
@ -466,11 +463,9 @@
- **lynda**: lynda.com videos - **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses - **lynda:course**: lynda.com online courses
- **m6** - **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru - **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru
- **MakerTV**
- **MallTV** - **MallTV**
- **mangomolo:live** - **mangomolo:live**
- **mangomolo:video** - **mangomolo:video**
@ -526,8 +521,8 @@
- **mtg**: MTG services - **mtg**: MTG services
- **mtv** - **mtv**
- **mtv.de** - **mtv.de**
- **mtv81**
- **mtv:video** - **mtv:video**
- **mtvjapan**
- **mtvservices:embedded** - **mtvservices:embedded**
- **MuenchenTV**: münchen.tv - **MuenchenTV**: münchen.tv
- **MusicPlayOn** - **MusicPlayOn**
@ -815,6 +810,7 @@
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:trackstation** - **soundcloud:trackstation**
- **soundcloud:user** - **soundcloud:user**
- **SoundcloudEmbed**
- **soundgasm** - **soundgasm**
- **soundgasm:profile** - **soundgasm:profile**
- **southpark.cc.com** - **southpark.cc.com**
@ -887,6 +883,7 @@
- **TeleTask** - **TeleTask**
- **Telewebion** - **Telewebion**
- **TennisTV** - **TennisTV**
- **TenPlay**
- **TF1** - **TF1**
- **TFO** - **TFO**
- **TheIntercept** - **TheIntercept**
@ -925,7 +922,6 @@
- **tunein:topic** - **tunein:topic**
- **TunePk** - **TunePk**
- **Turbo** - **Turbo**
- **Tutv**
- **tv.dfb.de** - **tv.dfb.de**
- **TV2** - **TV2**
- **tv2.hu** - **tv2.hu**

View File

@ -367,7 +367,10 @@ from .fourtube import (
FuxIE, FuxIE,
) )
from .fox import FOXIE from .fox import FOXIE
from .fox9 import FOX9IE from .fox9 import (
FOX9IE,
FOX9NewsIE,
)
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import ( from .foxnews import (
FoxNewsIE, FoxNewsIE,
@ -428,7 +431,6 @@ from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE
from .hbo import HBOIE from .hbo import HBOIE
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
@ -546,7 +548,6 @@ from .lcp import (
LcpPlayIE, LcpPlayIE,
LcpIE, LcpIE,
) )
from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE from .lecture2go import Lecture2GoIE
from .lecturio import ( from .lecturio import (
LecturioIE, LecturioIE,
@ -598,13 +599,11 @@ from .lynda import (
LyndaCourseIE LyndaCourseIE
) )
from .m6 import M6IE from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import ( from .mailru import (
MailRuIE, MailRuIE,
MailRuMusicIE, MailRuMusicIE,
MailRuMusicSearchIE, MailRuMusicSearchIE,
) )
from .makertv import MakerTVIE
from .malltv import MallTVIE from .malltv import MallTVIE
from .mangomolo import ( from .mangomolo import (
MangomoloVideoIE, MangomoloVideoIE,
@ -670,7 +669,7 @@ from .mtv import (
MTVVideoIE, MTVVideoIE,
MTVServicesEmbeddedIE, MTVServicesEmbeddedIE,
MTVDEIE, MTVDEIE,
MTV81IE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE from .musicplayon import MusicPlayOnIE
@ -1033,6 +1032,7 @@ from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .sonyliv import SonyLIVIE from .sonyliv import SonyLIVIE
from .soundcloud import ( from .soundcloud import (
SoundcloudEmbedIE,
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
@ -1132,6 +1132,7 @@ from .telequebec import (
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE
from .tennistv import TennisTVIE from .tennistv import TennisTVIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .tfo import TFOIE from .tfo import TFOIE
@ -1184,7 +1185,6 @@ from .tunein import (
) )
from .tunepk import TunePkIE from .tunepk import TunePkIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE
from .tv2 import ( from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,

View File

@ -1,13 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .anvato import AnvatoIE from .common import InfoExtractor
class FOX9IE(AnvatoIE): class FOX9IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story' _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story', def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
'Anvato', video_id)
class FOX9NewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
'md5': 'd6e1b2572c3bab8a849c9103615dd243', 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': { 'info_dict': {
'id': '314473', 'id': '314473',
@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
'categories': ['News', 'Sports'], 'categories': ['News', 'Sports'],
'tags': ['news', 'video'], 'tags': ['news', 'video'],
}, },
}, { }
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, video_id) anvato_id = self._search_regex(
r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
video_id = self._parse_json( return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
self._search_regex(
r"this\.videosJson\s*=\s*'(\[.+?\])';",
webpage, 'anvato playlist'),
video_id)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)

View File

@ -80,7 +80,7 @@ from .theplatform import ThePlatformIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .soundcloud import SoundcloudIE from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .dbtv import DBTVIE from .dbtv import DBTVIE
@ -2749,9 +2749,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url) return self.url_result(myvi_url)
# Look for embedded soundcloud player # Look for embedded soundcloud player
soundcloud_urls = SoundcloudIE._extract_urls(webpage) soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls: if soundcloud_urls:
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player # Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage) tunein_urls = TuneInBaseIE._extract_urls(webpage)

View File

@ -40,8 +40,17 @@ class GoIE(AdobePassIE):
'resource_id': 'Disney', 'resource_id': 'Disney',
} }
} }
_VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ _VALID_URL = r'''(?x)
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) https?://
(?:
(?:(?P<sub_domain>%s)\.)?go|
(?P<sub_domain_2>abc|freeform|disneynow)
)\.com/
(?:
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
)
''' % '|'.join(list(_SITE_INFO.keys()))
_TESTS = [{ _TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': { 'info_dict': {
@ -54,6 +63,7 @@ class GoIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This content is no longer available.',
}, { }, {
'url': 'http://watchdisneyxd.go.com/doraemon', 'url': 'http://watchdisneyxd.go.com/doraemon',
'info_dict': { 'info_dict': {
@ -61,6 +71,34 @@ class GoIE(AdobePassIE):
'id': 'SH55574025', 'id': 'SH55574025',
}, },
'playlist_mincount': 51, 'playlist_mincount': 51,
}, {
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
'info_dict': {
'id': 'VDKA3609139',
'ext': 'mp4',
'title': 'This Guilty Blood',
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
'age_limit': 14,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
'info_dict': {
'id': 'VDKA13435179',
'ext': 'mp4',
'title': 'The Bet',
'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
'age_limit': 14,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True, 'only_matching': True,
@ -95,10 +133,13 @@ class GoIE(AdobePassIE):
if not video_id or not site_info: if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id) webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex( video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'" (
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
default=video_id) r'data-video-id=["\']*(VDKA\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
), webpage, 'video id', default=video_id)
if not site_info: if not site_info:
brand = self._search_regex( brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)', (r'data-brand=\s*["\']\s*(\d+)',

View File

@ -1,33 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
_TEST = {
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
'info_dict': {
'id': 'mmbzyhkgny',
'ext': 'mp3',
'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
'duration': 11,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.hark.com/clips/%s.json' % video_id, video_id)
return {
'id': video_id,
'url': data['url'],
'title': data['name'],
'description': data.get('description'),
'thumbnail': data.get('image_original'),
'duration': data.get('duration'),
}

View File

@ -1,15 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urlparse, compat_urlparse,
) )
from ..utils import (
determine_ext,
int_or_none,
xpath_text,
)
class InternetVideoArchiveIE(InfoExtractor): class InternetVideoArchiveIE(InfoExtractor):
@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '194487', 'id': '194487',
'ext': 'mp4', 'ext': 'mp4',
'title': 'KICK-ASS 2', 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
}, },
'params': { 'params': {
@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor):
def _build_json_url(query): def _build_json_url(query):
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
@staticmethod
def _build_xml_url(query):
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
def _real_extract(self, url): def _real_extract(self, url):
query = compat_urlparse.urlparse(url).query query = compat_parse_qs(compat_urlparse.urlparse(url).query)
query_dic = compat_parse_qs(query) video_id = query['publishedid'][0]
video_id = query_dic['publishedid'][0] data = self._download_json(
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
if '/player/' in url: video_id, data=json.dumps({
configuration = self._download_json(url, video_id) 'customerid': query['customerid'][0],
'publishedid': video_id,
# There are multiple videos in the playlist whlie only the first one }).encode())
# matches the video played in browsers title = data['Title']
video_info = configuration['playlist'][0] formats = self._extract_m3u8_formats(
title = video_info['title'] data['VideoUrl'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
formats = [] file_url = formats[0]['url']
for source in video_info['sources']: if '.ism/' in file_url:
file_url = source['file'] replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
if determine_ext(file_url) == 'm3u8': formats.extend(self._extract_f4m_formats(
m3u8_formats = self._extract_m3u8_formats( replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats(
if m3u8_formats: replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
formats.extend(m3u8_formats) formats.extend(self._extract_ism_formats(
file_url = m3u8_formats[0]['url'] replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
formats.extend(self._extract_f4m_formats( self._sort_formats(formats)
file_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
file_url.replace('.m3u8', '.mpd'),
video_id, mpd_id='dash', fatal=False))
else:
a_format = {
'url': file_url,
}
if source.get('label') and source['label'][-4:] == ' kbs':
tbr = int_or_none(source['label'][:-4])
a_format.update({
'tbr': tbr,
'format_id': 'http-%d' % tbr,
})
formats.append(a_format)
self._sort_formats(formats)
description = video_info.get('description')
thumbnail = video_info.get('image')
else:
configuration = self._download_xml(url, video_id)
formats = [{
'url': xpath_text(configuration, './file', 'file URL', fatal=True),
}]
thumbnail = xpath_text(configuration, './image', 'thumbnail')
title = 'InternetVideoArchive video %s' % video_id
description = None
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': data.get('PosterUrl'),
'description': description, 'description': data.get('Description'),
} }

View File

@ -20,7 +20,7 @@ class LA7IE(InfoExtractor):
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': { 'info_dict': {
'id': 'inccool8-02-10-2015-163722', 'id': '0_42j6wd36',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Inc.Cool8', 'title': 'Inc.Cool8',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
@ -57,7 +57,7 @@ class LA7IE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], { 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
'service_url': 'http://kdam.iltrovatore.it', 'service_url': 'http://nkdam.iltrovatore.it',
}), }),
'id': video_id, 'id': video_id,
'title': player_data['title'], 'title': player_data['title'],

View File

@ -1,33 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class LearnrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
'md5': '3719fdf0a68397f49899e82c308a89de',
'info_dict': {
'id': '51624',
'ext': 'mp4',
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
'uploader': 'LearnCode.academy',
'uploader_id': 'learncodeacademy',
'upload_date': '20131021',
},
'add_ie': ['Youtube'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'url': self._search_regex(
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
'id': video_id,
}

View File

@ -1,42 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class MacGameStoreIE(InfoExtractor):
IE_NAME = 'macgamestore'
IE_DESC = 'MacGameStore trailers'
_VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
_TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': {
'id': '2450',
'ext': 'm4v',
'title': 'Crow',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, 'Downloading trailer page')
if '>Missing Media<' in webpage:
raise ExtractorError(
'Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
video_url = self._html_search_regex(
r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_title
}

View File

@ -1,32 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
'upload_date': '20150918',
'timestamp': 1442549540,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -349,33 +350,29 @@ class MTVIE(MTVServicesInfoExtractor):
}] }]
class MTV81IE(InfoExtractor): class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv81' IE_NAME = 'mtvjapan'
_VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)' _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
_TEST = { _TEST = {
'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
'info_dict': { 'info_dict': {
'id': '5e14040d-18a4-47c4-a582-43ff602de88e', 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', },
'timestamp': 1468846800, 'params': {
'upload_date': '20160718', 'skip_download': True,
}, },
} }
_GEO_COUNTRIES = ['JP']
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
def _extract_mgid(self, webpage): def _get_feed_query(self, uri):
return self._search_regex( return {
r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage, 'arcEp': 'mtvjapan.com',
'mgid', group='id') 'mgid': uri,
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
class MTVVideoIE(MTVServicesInfoExtractor): class MTVVideoIE(MTVServicesInfoExtractor):
@ -425,14 +422,14 @@ class MTVVideoIE(MTVServicesInfoExtractor):
class MTVDEIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv.de' IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$' _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
'info_dict': { 'info_dict': {
'id': 'music_video-a50bc5f0b3aa4b3190aa', 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
'ext': 'flv', 'ext': 'mp4',
'title': 'MusicVideo_cro-traum', 'title': 'Traum',
'description': 'Cro - Traum', 'description': 'Traum',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -441,11 +438,12 @@ class MTVDEIE(MTVServicesInfoExtractor):
'skip': 'Blocked at Travis CI', 'skip': 'Blocked at Travis CI',
}, { }, {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
'info_dict': { 'info_dict': {
'id': 'local_playlist-f5ae778b9832cc837189', 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
'ext': 'flv', 'ext': 'mp4',
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', 'title': 'Teen Mom 2',
'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -453,7 +451,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
}, },
'skip': 'Blocked at Travis CI', 'skip': 'Blocked at Travis CI',
}, { }, {
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
'info_dict': { 'info_dict': {
'id': 'local_playlist-4e760566473c4c8c5344', 'id': 'local_playlist-4e760566473c4c8c5344',
'ext': 'mp4', 'ext': 'mp4',
@ -466,25 +464,11 @@ class MTVDEIE(MTVServicesInfoExtractor):
}, },
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
}] }]
_GEO_COUNTRIES = ['DE']
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
def _real_extract(self, url): def _get_feed_query(self, uri):
video_id = self._match_id(url) return {
'arcEp': 'mtv.de',
webpage = self._download_webpage(url, video_id) 'mgid': uri,
}
playlist = self._parse_json(
self._search_regex(
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
video_id)
def _mrss_url(item):
return item['mrss'] + item.get('mrssvars', '')
# news pages contain single video in playlist with different id
if len(playlist) == 1:
return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
for item in playlist:
item_id = item.get('id')
if item_id and compat_str(item_id) == video_id:
return self._get_videos_info_from_url(_mrss_url(item), video_id)

View File

@ -9,9 +9,13 @@ from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
smuggle_url,
update_url_query,
int_or_none, int_or_none,
js_to_json,
parse_duration,
smuggle_url,
try_get,
unified_timestamp,
update_url_query,
) )
@ -285,13 +289,12 @@ class NBCNewsIE(ThePlatformIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64', 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
'info_dict': { 'info_dict': {
'id': '269389891880', 'id': '269389891880',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview', 'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'uploader': 'NBCU-NEWS',
'timestamp': 1401363060, 'timestamp': 1401363060,
'upload_date': '20140529', 'upload_date': '20140529',
}, },
@ -309,28 +312,26 @@ class NBCNewsIE(ThePlatformIE):
}, },
{ {
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'md5': '8eb831eca25bfa7d25ddd83e85946548',
'info_dict': { 'info_dict': {
'id': '394064451844', 'id': '394064451844',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900, 'timestamp': 1423104900,
'uploader': 'NBCU-NEWS',
'upload_date': '20150205', 'upload_date': '20150205',
}, },
}, },
{ {
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d', 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
'info_dict': { 'info_dict': {
'id': '529953347624', 'id': 'n431456',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
'description': 'md5:c8be487b2d80ff0594c005add88d8351', 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'upload_date': '20150922', 'upload_date': '20150922',
'timestamp': 1442917800, 'timestamp': 1442917800,
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -343,7 +344,6 @@ class NBCNewsIE(ThePlatformIE):
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420', 'upload_date': '20160420',
'timestamp': 1461152093, 'timestamp': 1461152093,
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -357,7 +357,6 @@ class NBCNewsIE(ThePlatformIE):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406937606, 'timestamp': 1406937606,
'upload_date': '20140802', 'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -373,20 +372,61 @@ class NBCNewsIE(ThePlatformIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not video_id.isdigit(): webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex( data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.+});', webpage, r'window\.__data\s*=\s*({.+});', webpage,
'bootstrap json'), video_id) 'bootstrap json'), video_id, js_to_json)
video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] video_data = try_get(data, lambda x: x['video']['current'], dict)
if not video_data:
video_data = data['article']['content'][0]['primaryMedia']['video']
title = video_data['headline']['primary']
formats = []
for va in video_data.get('videoAssets', []):
public_url = va.get('publicUrl')
if not public_url:
continue
if '://link.theplatform.com/' in public_url:
public_url = update_url_query(public_url, {'format': 'redirect'})
format_id = va.get('format')
if format_id == 'M3U':
formats.extend(self._extract_m3u8_formats(
public_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
continue
tbr = int_or_none(va.get('bitrate'), 1000)
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': public_url,
'width': int_or_none(va.get('width')),
'height': int_or_none(va.get('height')),
'tbr': tbr,
'ext': 'mp4',
})
self._sort_formats(formats)
subtitles = {}
closed_captioning = video_data.get('closedCaptioning')
if closed_captioning:
for cc_url in closed_captioning.values():
if not cc_url:
continue
subtitles.setdefault('en', []).append({
'url': cc_url,
})
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works 'title': title,
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), 'description': try_get(video_data, lambda x: x['description']['primary']),
'ie_key': 'ThePlatformFeed', 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
'duration': parse_duration(video_data.get('duration')),
'timestamp': unified_timestamp(video_data.get('datePublished')),
'formats': formats,
'subtitles': subtitles,
} }

View File

@ -20,6 +20,8 @@ from ..utils import (
class OnetBaseIE(InfoExtractor): class OnetBaseIE(InfoExtractor):
_URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
def _search_mvp_id(self, webpage): def _search_mvp_id(self, webpage):
return self._search_regex( return self._search_regex(
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
@ -45,7 +47,7 @@ class OnetBaseIE(InfoExtractor):
video = response['result'].get('0') video = response['result'].get('0')
formats = [] formats = []
for _, formats_dict in video['formats'].items(): for format_type, formats_dict in video['formats'].items():
if not isinstance(formats_dict, dict): if not isinstance(formats_dict, dict):
continue continue
for format_id, format_list in formats_dict.items(): for format_id, format_list in formats_dict.items():
@ -56,21 +58,31 @@ class OnetBaseIE(InfoExtractor):
if not video_url: if not video_url:
continue continue
ext = determine_ext(video_url) ext = determine_ext(video_url)
if format_id == 'ism': if format_id.startswith('ism'):
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False)) video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
elif format_id.startswith('hls'):
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else: else:
formats.append({ http_f = {
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,
'height': int_or_none(f.get('vertical_resolution')),
'width': int_or_none(f.get('horizontal_resolution')),
'abr': float_or_none(f.get('audio_bitrate')), 'abr': float_or_none(f.get('audio_bitrate')),
'vbr': float_or_none(f.get('video_bitrate')), }
}) if format_type == 'audio':
http_f['vcodec'] = 'none'
else:
http_f.update({
'height': int_or_none(f.get('vertical_resolution')),
'width': int_or_none(f.get('horizontal_resolution')),
'vbr': float_or_none(f.get('video_bitrate')),
})
formats.append(http_f)
self._sort_formats(formats) self._sort_formats(formats)
meta = video.get('meta', {}) meta = video.get('meta', {})
@ -105,12 +117,12 @@ class OnetMVPIE(OnetBaseIE):
class OnetIE(OnetBaseIE): class OnetIE(OnetBaseIE):
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
IE_NAME = 'onet.tv' IE_NAME = 'onet.tv'
_TEST = { _TESTS = [{
'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
'md5': 'e3ffbf47590032ac3f27249204173d50', 'md5': '436102770fb095c75b8bb0392d3da9ff',
'info_dict': { 'info_dict': {
'id': 'qbpyqc', 'id': 'qbpyqc',
'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
@ -120,7 +132,10 @@ class OnetIE(OnetBaseIE):
'upload_date': '20160705', 'upload_date': '20160705',
'timestamp': 1467721580, 'timestamp': 1467721580,
}, },
} }, {
'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -140,18 +155,21 @@ class OnetIE(OnetBaseIE):
class OnetChannelIE(OnetBaseIE): class OnetChannelIE(OnetBaseIE):
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)' _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
IE_NAME = 'onet.tv:channel' IE_NAME = 'onet.tv:channel'
_TEST = { _TESTS = [{
'url': 'http://onet.tv/k/openerfestival', 'url': 'http://onet.tv/k/openerfestival',
'info_dict': { 'info_dict': {
'id': 'openerfestival', 'id': 'openerfestival',
'title': 'Open\'er Festival Live', 'title': "Open'er Festival",
'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
}, },
'playlist_mincount': 46, 'playlist_mincount': 35,
} }, {
'url': 'https://onet100.vod.pl/k/openerfestival',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
@ -173,7 +191,7 @@ class OnetChannelIE(OnetBaseIE):
'Downloading channel %s - add --no-playlist to just download video %s' % ( 'Downloading channel %s - add --no-playlist to just download video %s' % (
channel_id, video_name)) channel_id, video_name))
matches = re.findall( matches = re.findall(
r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
webpage) webpage)
entries = [ entries = [
self.url_result(video_link, OnetIE.ie_key()) self.url_result(video_link, OnetIE.ie_key())

View File

@ -11,14 +11,13 @@ from .common import (
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urlparse, compat_urlparse,
compat_urllib_parse_urlencode,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest,
int_or_none, int_or_none,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext, mimetype2ext,
str_or_none, str_or_none,
try_get, try_get,
@ -28,6 +27,20 @@ from ..utils import (
) )
class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)'
@staticmethod
def _extract_urls(webpage):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]
def _real_extract(self, url):
return self.url_result(compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)['url'][0])
class SoundcloudIE(InfoExtractor): class SoundcloudIE(InfoExtractor):
"""Information extractor for soundcloud.com """Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token To access the media, the uid of the song and a stream token
@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor):
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/? (?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$) (?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+))?) (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
) )
''' '''
IE_NAME = 'soundcloud' IE_NAME = 'soundcloud'
@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
'uploader_id': '1571244',
'timestamp': 1349920598, 'timestamp': 1349920598,
'upload_date': '20121011', 'upload_date': '20121011',
'duration': 143.216, 'duration': 143.216,
@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed', 'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
'uploader_id': '9615865',
'timestamp': 1337635207, 'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 30, 'duration': 30,
@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Preview',
}, },
# private link # private link
{ {
@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'uploader_id': '69767071',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9.927, 'duration': 9.927,
@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'uploader_id': '69767071',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9.927, 'duration': 9.927,
@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes', 'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples', 'uploader': 'oddsamples',
'uploader_id': '73680509',
'timestamp': 1389232924, 'timestamp': 1389232924,
'upload_date': '20140109', 'upload_date': '20140109',
'duration': 17.346, 'duration': 17.346,
@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music', 'uploader': 'Ori Uplift Music',
'uploader_id': '12563093',
'timestamp': 1504206263, 'timestamp': 1504206263,
'upload_date': '20170831', 'upload_date': '20170831',
'duration': 7449.096, 'duration': 7449.096,
@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)', 'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee', 'uploader': 'garyvee',
'uploader_id': '2366352',
'timestamp': 1488152409, 'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207.012, 'duration': 207.012,
@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Mezzo Valzer', 'title': 'Mezzo Valzer',
'description': 'md5:4138d582f81866a530317bae316e8b61', 'description': 'md5:4138d582f81866a530317bae316e8b61',
'uploader': 'Giovanni Sarani', 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531',
'timestamp': 1551394171, 'timestamp': 1551394171,
'upload_date': '20190228', 'upload_date': '20190228',
'duration': 180.157, 'duration': 180.157,
@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor):
} }
] ]
_API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@staticmethod _ARTWORK_MAP = {
def _extract_urls(webpage): 'mini': 16,
return [m.group('url') for m in re.finditer( 'tiny': 20,
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', 'small': 32,
webpage)] 'badge': 47,
't67x67': 67,
'large': 100,
't300x300': 300,
'crop': 400,
't500x500': 500,
'original': 0,
}
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
title = info['title'] title = info['title']
name = full_title or track_id track_base_url = self._API_BASE + 'tracks/%s' % track_id
if quiet:
self.report_extraction(name)
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
username = try_get(info, lambda x: x['user']['username'], compat_str)
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
like_count = extract_count('favoritings')
if like_count is None:
like_count = extract_count('likes')
result = {
'id': track_id,
'uploader': username,
'timestamp': unified_timestamp(info.get('created_at')),
'title': title,
'description': info.get('description'),
'thumbnail': thumbnail,
'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': like_count,
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
}
format_urls = set() format_urls = set()
formats = [] formats = []
query = {'client_id': self._CLIENT_ID} query = {'client_id': self._CLIENT_ID}
if secret_token is not None: if secret_token:
query['secret_token'] = secret_token query['secret_token'] = secret_token
if info.get('downloadable', False):
# We can build a direct link to the song if info.get('downloadable'):
format_url = update_url_query( format_url = update_url_query(
'https://api.soundcloud.com/tracks/%s/download' % track_id, query) info.get('download_url') or track_base_url + '/download', query)
format_urls.add(format_url) format_urls.add(format_url)
if version == 2:
v1_info = self._download_json(
track_base_url, track_id, query=query, fatal=False) or {}
else:
v1_info = info
formats.append({ formats.append({
'format_id': 'download', 'format_id': 'download',
'ext': info.get('original_format', 'mp3'), 'ext': v1_info.get('original_format') or 'mp3',
'filesize': int_or_none(v1_info.get('original_content_size')),
'url': format_url, 'url': format_url,
'vcodec': 'none',
'preference': 10, 'preference': 10,
}) })
# Old API, does not work for some tracks (e.g. def invalid_url(url):
# https://soundcloud.com/giovannisarani/mezzo-valzer) return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url)
format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
track_id, 'Downloading track url', query=query, fatal=False)
if format_dict: def add_format(f, protocol):
for key, stream_url in format_dict.items(): mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
if stream_url in format_urls: if mobj:
continue for k, v in mobj.groupdict().items():
format_urls.add(stream_url) if not f.get(k):
ext, abr = 'mp3', None f[k] = v
mobj = re.search(r'_([^_]+)_(\d+)_url', key) format_id_list = []
if mobj: if protocol:
ext, abr = mobj.groups() format_id_list.append(protocol)
abr = int(abr) for k in ('ext', 'abr'):
if key.startswith('http'): v = f.get(k)
stream_formats = [{ if v:
'format_id': key, format_id_list.append(v)
'ext': ext, abr = f.get('abr')
'url': stream_url, if abr:
}] f['abr'] = int(abr)
elif key.startswith('rtmp'): f.update({
# The url doesn't have an rtmp app, we have to extract the playpath 'format_id': '_'.join(format_id_list),
url, path = stream_url.split('mp3:', 1) 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
stream_formats = [{ })
'format_id': key, formats.append(f)
'url': url,
'play_path': 'mp3:' + path,
'ext': 'flv',
}]
elif key.startswith('hls'):
stream_formats = self._extract_m3u8_formats(
stream_url, track_id, ext, entry_protocol='m3u8_native',
m3u8_id=key, fatal=False)
else:
continue
if abr:
for f in stream_formats:
f['abr'] = abr
formats.extend(stream_formats)
# New API # New API
transcodings = try_get( transcodings = try_get(
@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor):
if not isinstance(t, dict): if not isinstance(t, dict):
continue continue
format_url = url_or_none(t.get('url')) format_url = url_or_none(t.get('url'))
if not format_url: if not format_url or t.get('snipped') or '/preview/' in format_url:
continue continue
stream = self._download_json( stream = self._download_json(
update_url_query(format_url, query), track_id, fatal=False) format_url, track_id, query=query, fatal=False)
if not isinstance(stream, dict): if not isinstance(stream, dict):
continue continue
stream_url = url_or_none(stream.get('url')) stream_url = url_or_none(stream.get('url'))
if not stream_url: if invalid_url(stream_url):
continue
if stream_url in format_urls:
continue continue
format_urls.add(stream_url) format_urls.add(stream_url)
protocol = try_get(t, lambda x: x['format']['protocol'], compat_str) stream_format = t.get('format') or {}
protocol = stream_format.get('protocol')
if protocol != 'hls' and '/hls' in format_url: if protocol != 'hls' and '/hls' in format_url:
protocol = 'hls' protocol = 'hls'
ext = None ext = None
preset = str_or_none(t.get('preset')) preset = str_or_none(t.get('preset'))
if preset: if preset:
ext = preset.split('_')[0] ext = preset.split('_')[0]
if ext not in KNOWN_EXTENSIONS: if ext not in KNOWN_EXTENSIONS:
mimetype = try_get( ext = mimetype2ext(stream_format.get('mime_type'))
t, lambda x: x['format']['mime_type'], compat_str) add_format({
ext = mimetype2ext(mimetype) or 'mp3'
format_id_list = []
if protocol:
format_id_list.append(protocol)
format_id_list.append(ext)
format_id = '_'.join(format_id_list)
formats.append({
'url': stream_url, 'url': stream_url,
'format_id': format_id,
'ext': ext, 'ext': ext,
'protocol': 'm3u8_native' if protocol == 'hls' else 'http', }, 'http' if protocol == 'progressive' else protocol)
})
if not formats:
# Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
# and might serve preview URLs (e.g.
# http://www.soundcloud.com/snbrn/ele)
format_dict = self._download_json(
track_base_url + '/streams', track_id,
'Downloading track url', query=query, fatal=False) or {}
for key, stream_url in format_dict.items():
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
if mobj:
protocol, ext, abr = mobj.groups()
add_format({
'abr': abr,
'ext': ext,
'url': stream_url,
}, protocol)
if not formats: if not formats:
# We fallback to the stream_url in the original info, this # We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error # cannot be always used, sometimes it can give an HTTP 404 error
formats.append({ urlh = self._request_webpage(
'format_id': 'fallback', HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
'url': update_url_query(info['stream_url'], query), track_id, query=query, fatal=False)
'ext': 'mp3', if urlh:
}) stream_url = urlh.geturl()
self._check_formats(formats, track_id) if not invalid_url(stream_url):
add_format({'url': stream_url}, 'http')
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'
self._sort_formats(formats) self._sort_formats(formats)
result['formats'] = formats
return result user = info.get('user') or {}
thumbnails = []
artwork_url = info.get('artwork_url')
thumbnail = artwork_url or user.get('avatar_url')
if isinstance(thumbnail, compat_str):
if re.search(self._IMAGE_REPL_RE, thumbnail):
for image_id, size in self._ARTWORK_MAP.items():
i = {
'id': image_id,
'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
}
if image_id == 'tiny' and not artwork_url:
size = 18
elif image_id == 'original':
i['preference'] = 10
if size:
i.update({
'width': size,
'height': size,
})
thumbnails.append(i)
else:
thumbnails = [{'url': thumbnail}]
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
return {
'id': track_id,
'uploader': user.get('username'),
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')),
'title': title,
'description': info.get('description'),
'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
'formats': formats
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
new_info = {}
if track_id is not None: query = {
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID 'client_id': self._CLIENT_ID,
}
if track_id:
info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id full_title = track_id
token = mobj.group('secret_token') token = mobj.group('secret_token')
if token: if token:
info_json_url += '&secret_token=' + token query['secret_token'] = token
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
real_url = query['url'][0]
# If the token is in the query of the original url we have to
# manually add it
if 'secret_token' in query:
real_url += '?secret_token=' + query['secret_token'][0]
return self.url_result(real_url)
else: else:
# extract uploader (which is in the url) full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('title')
token = mobj.group('token') token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token: if token:
resolve_title += '/%s' % token resolve_title += '/%s' % token
info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
webpage = self._download_webpage(url, full_title, fatal=False) version = 2
if webpage:
entries = self._parse_json(
self._search_regex(
r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
'data', default='[]'), full_title, fatal=False)
if entries:
for e in entries:
if not isinstance(e, dict):
continue
if e.get('id') != 67:
continue
data = try_get(e, lambda x: x['data'][0], dict)
if data:
new_info = data
break
info_json_url = self._resolv_url(
'https://soundcloud.com/%s' % resolve_title)
# Contains some additional info missing from new_info
info = self._download_json( info = self._download_json(
info_json_url, full_title, 'Downloading info JSON') info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
if not info:
info = self._download_json(
info_json_url.replace(self._API_V2_BASE, self._API_BASE),
full_title, 'Downloading info JSON', query=query)
version = 1
return self._extract_info_dict( return self._extract_info_dict(info, full_title, token, version)
merge_dicts(info, new_info), full_title, secret_token=token)
class SoundcloudPlaylistBaseIE(SoundcloudIE): class SoundcloudPlaylistBaseIE(SoundcloudIE):
@staticmethod def _extract_track_entries(self, tracks, token=None):
def _extract_id(e): entries = []
return compat_str(e['id']) if e.get('id') else None for track in tracks:
track_id = str_or_none(track.get('id'))
def _extract_track_entries(self, tracks): url = track.get('permalink_url')
return [ if not url:
self.url_result( if not track_id:
track['permalink_url'], SoundcloudIE.ie_key(), continue
video_id=self._extract_id(track)) url = self._API_V2_BASE + 'tracks/' + track_id
for track in tracks if track.get('permalink_url')] if token:
url += '?secret_token=' + token
entries.append(self.url_result(
url, SoundcloudIE.ie_key(), track_id))
return entries
class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
# extract uploader (which is in the url) full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('slug_title')
full_title = '%s/sets/%s' % (uploader, slug_title)
url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
token = mobj.group('token') token = mobj.group('token')
if token: if token:
full_title += '/' + token full_title += '/' + token
url += '/' + token
resolv_url = self._resolv_url(url) info = self._download_json(self._resolv_url(
info = self._download_json(resolv_url, full_title) self._BASE_URL + full_title), full_title)
if 'errors' in info: if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors']) msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
entries = self._extract_track_entries(info['tracks']) entries = self._extract_track_entries(info['tracks'], token)
return { return self.playlist_result(
'_type': 'playlist', entries, str_or_none(info.get('id')), info.get('title'))
'entries': entries,
'id': '%s' % info['id'],
'title': info['title'],
}
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = { COMMON_QUERY = {
'limit': 50, 'limit': 2000000000,
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
'linked_partitioning': '1', 'linked_partitioning': '1',
} }
@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
query = COMMON_QUERY.copy() query = COMMON_QUERY.copy()
query['offset'] = 0 query['offset'] = 0
next_href = base_url + '?' + compat_urllib_parse_urlencode(query) next_href = base_url
entries = [] entries = []
for i in itertools.count(): for i in itertools.count():
response = self._download_json( response = self._download_json(
next_href, playlist_id, 'Downloading track page %s' % (i + 1)) next_href, playlist_id,
'Downloading track page %s' % (i + 1), query=query)
collection = response['collection'] collection = response['collection']
@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
continue continue
return self.url_result( return self.url_result(
permalink_url, permalink_url,
ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
video_id=self._extract_id(cand), str_or_none(cand.get('id')), cand.get('title'))
video_title=cand.get('title'))
for e in collection: for e in collection:
entry = resolve_entry((e, e.get('track'), e.get('playlist'))) entry = resolve_entry((e, e.get('track'), e.get('playlist')))
@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
if not next_href: if not next_href:
break break
parsed_next_href = compat_urlparse.urlparse(response['next_href']) next_href = response['next_href']
qs = compat_urlparse.parse_qs(parsed_next_href.query) parsed_next_href = compat_urlparse.urlparse(next_href)
qs.update(COMMON_QUERY) query = compat_urlparse.parse_qs(parsed_next_href.query)
next_href = compat_urlparse.urlunparse( query.update(COMMON_QUERY)
parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
return { return {
'_type': 'playlist', '_type': 'playlist',
@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/jcv246/sets', 'url': 'https://soundcloud.com/jcv246/sets',
'info_dict': { 'info_dict': {
'id': '12982173', 'id': '12982173',
'title': 'Jordi / cv (Playlists)', 'title': 'Jordi / cv (Sets)',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, { }, {
@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
}] }]
_BASE_URL_MAP = { _BASE_URL_MAP = {
'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'all': 'stream/users/%s',
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'tracks': 'users/%s/tracks',
'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'albums': 'users/%s/albums',
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'sets': 'users/%s/playlists',
'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'reposts': 'stream/users/%s/reposts',
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'likes': 'users/%s/likes',
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'spotlight': 'users/%s/spotlight',
}
_TITLE_MAP = {
'all': 'All',
'tracks': 'Tracks',
'albums': 'Albums',
'sets': 'Playlists',
'reposts': 'Reposts',
'likes': 'Likes',
'spotlight': 'Spotlight',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user') uploader = mobj.group('user')
url = 'https://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url)
user = self._download_json( user = self._download_json(
resolv_url, uploader, 'Downloading user info') self._resolv_url(self._BASE_URL + uploader),
uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all' resource = mobj.group('rsrc') or 'all'
return self._extract_playlist( return self._extract_playlist(
self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
'%s (%s)' % (user['username'], self._TITLE_MAP[resource])) str_or_none(user.get('id')),
'%s (%s)' % (user['username'], resource.capitalize()))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
'info_dict': { 'info_dict': {
'id': '286017854', 'id': '286017854',
'title': 'Track station: your-text', 'title': 'Track station: your text',
}, },
'playlist_mincount': 47, 'playlist_mincount': 47,
}] }]
@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
track_name = self._match_id(url) track_name = self._match_id(url)
webpage = self._download_webpage(url, track_name) track = self._download_json(self._resolv_url(url), track_name)
track_id = self._search_regex( track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', webpage, 'track id') r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
return self._extract_playlist( return self._extract_playlist(
'%s/stations/soundcloud:track-stations:%s/tracks' self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
% (self._API_V2_BASE, track_id), track_id, 'Track station: %s' % track['title'])
track_id, 'Track station: %s' % track_name)
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist' IE_NAME = 'soundcloud:playlist'
_TESTS = [{ _TESTS = [{
'url': 'https://api.soundcloud.com/playlists/4110309', 'url': 'https://api.soundcloud.com/playlists/4110309',
@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
data_dict = { query = {
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
} }
token = mobj.group('token') token = mobj.group('token')
if token: if token:
data_dict['secret_token'] = token query['secret_token'] = token
data = compat_urllib_parse_urlencode(data_dict)
data = self._download_json( data = self._download_json(
base_url + data, playlist_id, 'Downloading playlist') self._API_V2_BASE + 'playlists/' + playlist_id,
playlist_id, 'Downloading playlist', query=query)
entries = self._extract_track_entries(data['tracks']) entries = self._extract_track_entries(data['tracks'], token)
return { return self.playlist_result(
'_type': 'playlist', entries, playlist_id, data.get('title'), data.get('description'))
'id': playlist_id,
'title': data.get('title'),
'description': data.get('description'),
'entries': entries,
}
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
_SEARCH_KEY = 'scsearch' _SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200 _MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50 _DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query): def _get_collection(self, endpoint, collection_id, **query):
limit = min( limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE) self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit query.update({
query['client_id'] = self._CLIENT_ID 'limit': limit,
query['linked_partitioning'] = '1' 'client_id': self._CLIENT_ID,
query['offset'] = 0 'linked_partitioning': 1,
data = compat_urllib_parse_urlencode(query) 'offset': 0,
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) })
next_url = update_url_query(self._API_V2_BASE + endpoint, query)
collected_results = 0 collected_results = 0
@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
break break
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query) tracks = self._get_collection('search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query) return self.playlist_result(tracks, playlist_title=query)

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_age_limit,
parse_iso8601,
smuggle_url,
)
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})'
_TEST = {
'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
'info_dict': {
'id': '6060533435001',
'ext': 'mp4',
'title': 'MasterChef - S1 Ep. 1',
'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
'age_limit': 10,
'timestamp': 1240828200,
'upload_date': '20090427',
'uploader_id': '2199827728001',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
}
}
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
def _real_extract(self, url):
content_id = self._match_id(url)
data = self._download_json(
'https://10play.com.au/api/video/' + content_id, content_id)
video = data.get('video') or {}
metadata = data.get('metaData') or {}
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
brightcove_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': ['AU']})
return {
'_type': 'url_transparent',
'url': brightcove_url,
'id': content_id,
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
'description': video.get('description'),
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
'series': metadata.get('showName'),
'season': metadata.get('showContentSeason'),
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
'ie_key': 'BrightcoveNew',
}

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE from ..utils import extract_attributes
class TheSunIE(InfoExtractor): class TheSunIE(InfoExtractor):
@ -16,6 +16,7 @@ class TheSunIE(InfoExtractor):
}, },
'playlist_count': 2, 'playlist_count': 2,
} }
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
article_id = self._match_id(url) article_id = self._match_id(url)
@ -23,10 +24,15 @@ class TheSunIE(InfoExtractor):
webpage = self._download_webpage(url, article_id) webpage = self._download_webpage(url, article_id)
entries = [] entries = []
for ooyala_id in re.findall( for video in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)', r'<video[^>]+data-video-id-pending=[^>]+>',
webpage): webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id)) attrs = extract_attributes(video)
video_id = attrs['data-video-id-pending']
account_id = attrs.get('data-account', '5067014667001')
entries.append(self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
'BrightcoveNew', video_id))
return self.playlist_result( return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False)) entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@ -1,36 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_parse_qs,
)
class TutvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
'url': 'http://tu.tv/videos/robots-futbolistas',
'md5': '0cd9e28ad270488911b0d2a72323395d',
'info_dict': {
'id': '2973058',
'ext': 'mp4',
'title': 'Robots futbolistas',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
data_content = self._download_webpage(
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
return {
'id': internal_id,
'url': video_url,
'title': self._og_search_title(webpage),
}

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
@ -13,7 +12,7 @@ class VideoDetectiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '194487', 'id': '194487',
'ext': 'mp4', 'ext': 'mp4',
'title': 'KICK-ASS 2', 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
}, },
'params': { 'params': {
@ -24,7 +23,7 @@ class VideoDetectiveIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) query = 'customerid=69249&publishedid=' + video_id
og_video = self._og_search_video_url(webpage) return self.url_result(
query = compat_urlparse.urlparse(og_video).query InternetVideoArchiveIE._build_json_url(query),
return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) ie=InternetVideoArchiveIE.ie_key())

View File

@ -4979,7 +4979,7 @@ class ISO3166Utils(object):
class GeoUtils(object): class GeoUtils(object):
# Major IPv4 address blocks per country # Major IPv4 address blocks per country
_country_ip_map = { _country_ip_map = {
'AD': '85.94.160.0/19', 'AD': '46.172.224.0/19',
'AE': '94.200.0.0/13', 'AE': '94.200.0.0/13',
'AF': '149.54.0.0/17', 'AF': '149.54.0.0/17',
'AG': '209.59.64.0/18', 'AG': '209.59.64.0/18',
@ -4987,28 +4987,30 @@ class GeoUtils(object):
'AL': '46.99.0.0/16', 'AL': '46.99.0.0/16',
'AM': '46.70.0.0/15', 'AM': '46.70.0.0/15',
'AO': '105.168.0.0/13', 'AO': '105.168.0.0/13',
'AP': '159.117.192.0/21', 'AP': '182.50.184.0/21',
'AQ': '23.154.160.0/24',
'AR': '181.0.0.0/12', 'AR': '181.0.0.0/12',
'AS': '202.70.112.0/20', 'AS': '202.70.112.0/20',
'AT': '84.112.0.0/13', 'AT': '77.116.0.0/14',
'AU': '1.128.0.0/11', 'AU': '1.128.0.0/11',
'AW': '181.41.0.0/18', 'AW': '181.41.0.0/18',
'AZ': '5.191.0.0/16', 'AX': '185.217.4.0/22',
'AZ': '5.197.0.0/16',
'BA': '31.176.128.0/17', 'BA': '31.176.128.0/17',
'BB': '65.48.128.0/17', 'BB': '65.48.128.0/17',
'BD': '114.130.0.0/16', 'BD': '114.130.0.0/16',
'BE': '57.0.0.0/8', 'BE': '57.0.0.0/8',
'BF': '129.45.128.0/17', 'BF': '102.178.0.0/15',
'BG': '95.42.0.0/15', 'BG': '95.42.0.0/15',
'BH': '37.131.0.0/17', 'BH': '37.131.0.0/17',
'BI': '154.117.192.0/18', 'BI': '154.117.192.0/18',
'BJ': '137.255.0.0/16', 'BJ': '137.255.0.0/16',
'BL': '192.131.134.0/24', 'BL': '185.212.72.0/23',
'BM': '196.12.64.0/18', 'BM': '196.12.64.0/18',
'BN': '156.31.0.0/16', 'BN': '156.31.0.0/16',
'BO': '161.56.0.0/16', 'BO': '161.56.0.0/16',
'BQ': '161.0.80.0/20', 'BQ': '161.0.80.0/20',
'BR': '152.240.0.0/12', 'BR': '191.128.0.0/12',
'BS': '24.51.64.0/18', 'BS': '24.51.64.0/18',
'BT': '119.2.96.0/19', 'BT': '119.2.96.0/19',
'BW': '168.167.0.0/16', 'BW': '168.167.0.0/16',
@ -5016,20 +5018,20 @@ class GeoUtils(object):
'BZ': '179.42.192.0/18', 'BZ': '179.42.192.0/18',
'CA': '99.224.0.0/11', 'CA': '99.224.0.0/11',
'CD': '41.243.0.0/16', 'CD': '41.243.0.0/16',
'CF': '196.32.200.0/21', 'CF': '197.242.176.0/21',
'CG': '197.214.128.0/17', 'CG': '160.113.0.0/16',
'CH': '85.0.0.0/13', 'CH': '85.0.0.0/13',
'CI': '154.232.0.0/14', 'CI': '102.136.0.0/14',
'CK': '202.65.32.0/19', 'CK': '202.65.32.0/19',
'CL': '152.172.0.0/14', 'CL': '152.172.0.0/14',
'CM': '165.210.0.0/15', 'CM': '102.244.0.0/14',
'CN': '36.128.0.0/10', 'CN': '36.128.0.0/10',
'CO': '181.240.0.0/12', 'CO': '181.240.0.0/12',
'CR': '201.192.0.0/12', 'CR': '201.192.0.0/12',
'CU': '152.206.0.0/15', 'CU': '152.206.0.0/15',
'CV': '165.90.96.0/19', 'CV': '165.90.96.0/19',
'CW': '190.88.128.0/17', 'CW': '190.88.128.0/17',
'CY': '46.198.0.0/15', 'CY': '31.153.0.0/16',
'CZ': '88.100.0.0/14', 'CZ': '88.100.0.0/14',
'DE': '53.0.0.0/8', 'DE': '53.0.0.0/8',
'DJ': '197.241.0.0/17', 'DJ': '197.241.0.0/17',
@ -5046,6 +5048,7 @@ class GeoUtils(object):
'EU': '2.16.0.0/13', 'EU': '2.16.0.0/13',
'FI': '91.152.0.0/13', 'FI': '91.152.0.0/13',
'FJ': '144.120.0.0/16', 'FJ': '144.120.0.0/16',
'FK': '80.73.208.0/21',
'FM': '119.252.112.0/20', 'FM': '119.252.112.0/20',
'FO': '88.85.32.0/19', 'FO': '88.85.32.0/19',
'FR': '90.0.0.0/9', 'FR': '90.0.0.0/9',
@ -5055,8 +5058,8 @@ class GeoUtils(object):
'GE': '31.146.0.0/16', 'GE': '31.146.0.0/16',
'GF': '161.22.64.0/18', 'GF': '161.22.64.0/18',
'GG': '62.68.160.0/19', 'GG': '62.68.160.0/19',
'GH': '45.208.0.0/14', 'GH': '154.160.0.0/12',
'GI': '85.115.128.0/19', 'GI': '95.164.0.0/16',
'GL': '88.83.0.0/19', 'GL': '88.83.0.0/19',
'GM': '160.182.0.0/15', 'GM': '160.182.0.0/15',
'GN': '197.149.192.0/18', 'GN': '197.149.192.0/18',
@ -5085,13 +5088,13 @@ class GeoUtils(object):
'JE': '87.244.64.0/18', 'JE': '87.244.64.0/18',
'JM': '72.27.0.0/17', 'JM': '72.27.0.0/17',
'JO': '176.29.0.0/16', 'JO': '176.29.0.0/16',
'JP': '126.0.0.0/8', 'JP': '133.0.0.0/8',
'KE': '105.48.0.0/12', 'KE': '105.48.0.0/12',
'KG': '158.181.128.0/17', 'KG': '158.181.128.0/17',
'KH': '36.37.128.0/17', 'KH': '36.37.128.0/17',
'KI': '103.25.140.0/22', 'KI': '103.25.140.0/22',
'KM': '197.255.224.0/20', 'KM': '197.255.224.0/20',
'KN': '198.32.32.0/19', 'KN': '198.167.192.0/19',
'KP': '175.45.176.0/22', 'KP': '175.45.176.0/22',
'KR': '175.192.0.0/10', 'KR': '175.192.0.0/10',
'KW': '37.36.0.0/14', 'KW': '37.36.0.0/14',
@ -5099,10 +5102,10 @@ class GeoUtils(object):
'KZ': '2.72.0.0/13', 'KZ': '2.72.0.0/13',
'LA': '115.84.64.0/18', 'LA': '115.84.64.0/18',
'LB': '178.135.0.0/16', 'LB': '178.135.0.0/16',
'LC': '192.147.231.0/24', 'LC': '24.92.144.0/20',
'LI': '82.117.0.0/19', 'LI': '82.117.0.0/19',
'LK': '112.134.0.0/15', 'LK': '112.134.0.0/15',
'LR': '41.86.0.0/19', 'LR': '102.183.0.0/16',
'LS': '129.232.0.0/17', 'LS': '129.232.0.0/17',
'LT': '78.56.0.0/13', 'LT': '78.56.0.0/13',
'LU': '188.42.0.0/16', 'LU': '188.42.0.0/16',
@ -5127,7 +5130,7 @@ class GeoUtils(object):
'MT': '46.11.0.0/16', 'MT': '46.11.0.0/16',
'MU': '105.16.0.0/12', 'MU': '105.16.0.0/12',
'MV': '27.114.128.0/18', 'MV': '27.114.128.0/18',
'MW': '105.234.0.0/16', 'MW': '102.70.0.0/15',
'MX': '187.192.0.0/11', 'MX': '187.192.0.0/11',
'MY': '175.136.0.0/13', 'MY': '175.136.0.0/13',
'MZ': '197.218.0.0/15', 'MZ': '197.218.0.0/15',
@ -5158,23 +5161,23 @@ class GeoUtils(object):
'PW': '202.124.224.0/20', 'PW': '202.124.224.0/20',
'PY': '181.120.0.0/14', 'PY': '181.120.0.0/14',
'QA': '37.210.0.0/15', 'QA': '37.210.0.0/15',
'RE': '139.26.0.0/16', 'RE': '102.35.0.0/16',
'RO': '79.112.0.0/13', 'RO': '79.112.0.0/13',
'RS': '178.220.0.0/14', 'RS': '93.86.0.0/15',
'RU': '5.136.0.0/13', 'RU': '5.136.0.0/13',
'RW': '105.178.0.0/15', 'RW': '41.186.0.0/16',
'SA': '188.48.0.0/13', 'SA': '188.48.0.0/13',
'SB': '202.1.160.0/19', 'SB': '202.1.160.0/19',
'SC': '154.192.0.0/11', 'SC': '154.192.0.0/11',
'SD': '154.96.0.0/13', 'SD': '102.120.0.0/13',
'SE': '78.64.0.0/12', 'SE': '78.64.0.0/12',
'SG': '152.56.0.0/14', 'SG': '8.128.0.0/10',
'SI': '188.196.0.0/14', 'SI': '188.196.0.0/14',
'SK': '78.98.0.0/15', 'SK': '78.98.0.0/15',
'SL': '197.215.0.0/17', 'SL': '102.143.0.0/17',
'SM': '89.186.32.0/19', 'SM': '89.186.32.0/19',
'SN': '41.82.0.0/15', 'SN': '41.82.0.0/15',
'SO': '197.220.64.0/19', 'SO': '154.115.192.0/18',
'SR': '186.179.128.0/17', 'SR': '186.179.128.0/17',
'SS': '105.235.208.0/21', 'SS': '105.235.208.0/21',
'ST': '197.159.160.0/19', 'ST': '197.159.160.0/19',
@ -5197,15 +5200,15 @@ class GeoUtils(object):
'TV': '202.2.96.0/19', 'TV': '202.2.96.0/19',
'TW': '120.96.0.0/11', 'TW': '120.96.0.0/11',
'TZ': '156.156.0.0/14', 'TZ': '156.156.0.0/14',
'UA': '93.72.0.0/13', 'UA': '37.52.0.0/14',
'UG': '154.224.0.0/13', 'UG': '102.80.0.0/13',
'US': '3.0.0.0/8', 'US': '6.0.0.0/8',
'UY': '167.56.0.0/13', 'UY': '167.56.0.0/13',
'UZ': '82.215.64.0/18', 'UZ': '84.54.64.0/18',
'VA': '212.77.0.0/19', 'VA': '212.77.0.0/19',
'VC': '24.92.144.0/20', 'VC': '207.191.240.0/21',
'VE': '186.88.0.0/13', 'VE': '186.88.0.0/13',
'VG': '172.103.64.0/18', 'VG': '66.81.192.0/20',
'VI': '146.226.0.0/16', 'VI': '146.226.0.0/16',
'VN': '14.160.0.0/11', 'VN': '14.160.0.0/11',
'VU': '202.80.32.0/20', 'VU': '202.80.32.0/20',
@ -5214,8 +5217,8 @@ class GeoUtils(object):
'YE': '134.35.0.0/16', 'YE': '134.35.0.0/16',
'YT': '41.242.116.0/22', 'YT': '41.242.116.0/22',
'ZA': '41.0.0.0/11', 'ZA': '41.0.0.0/11',
'ZM': '165.56.0.0/13', 'ZM': '102.144.0.0/13',
'ZW': '41.85.192.0/19', 'ZW': '102.177.192.0/18',
} }
@classmethod @classmethod

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.10.22' __version__ = '2019.10.29'