diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6bc7d0366..145c3ff83 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.01.14 +[debug] youtube-dl version 2018.01.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index bfafaca6a..65a01fcc7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +version 2018.01.21 + +Core +* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187) +* [utils] Improve scientific notation handling in js_to_json (#14789) + +Extractors ++ [southparkdk] Add support for southparkstudios.nu ++ [southpark] Add support for collections (#14803) +* [franceinter] Fix upload date extraction (#14996) ++ [rtvs] Add support for rtvs.sk (#9242, #15187) +* [restudy] Fix extraction and extend URL regular expression (#15347) +* [youtube:live] Improve live detection (#15365) ++ [springboardplatform] Add support for springboardplatform.com +* [prosiebensat1] Add another clip id regular expression (#15290) +- [ringtv] Remove extractor (#15345) + + +version 2018.01.18 + +Extractors +* [soundcloud] Update client id (#15306) +- [kamcord] Remove extractor (#15322) ++ [spiegel] Add support for nexx videos (#15285) +* [twitch] Fix authentication and error capture (#14090, #15264) +* [vk] Detect more errors due to copyright complaints (#15259) + + version 2018.01.14 Extractors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c04a75b88..b0825c58b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -383,7 +383,6 @@ - **JWPlatform** - **Kakao** - **Kaltura** - - **Kamcord** - **KanalPlay**: Kanal 5/9/11 Play - **Kankan** - **Karaoketv** @@ -683,7 +682,6 @@ - **revision** - **revision3:embed** - **RICE** - - **RingTV** - **RMCDecouverte** - **RockstarGames** - **RoosterTeeth** @@ -704,6 +702,7 @@ - **rtve.es:live**: RTVE.es live streams - **rtve.es:television** - **RTVNH** + - **RTVS** - **Rudo** - **RUHD** - **RulePorn** @@ -774,6 +773,7 @@ - **Sport5** - **SportBoxEmbed** - **SportDeutschland** + - **SpringboardPlatform** - **Sprout** - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** diff --git a/test/test_download.py b/test/test_download.py index 209f5f6d6..ebe820dfc 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -92,8 +92,8 @@ class TestDownload(unittest.TestCase): def generator(test_case, tname): def test_template(self): - ie = youtube_dl.extractor.get_info_extractor(test_case['name']) - other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] + ie = youtube_dl.extractor.get_info_extractor(test_case['name'])() + other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])] is_playlist = any(k.startswith('playlist') for k in test_case) test_cases = test_case.get( 'playlist', [] if is_playlist else [test_case]) diff --git a/test/test_utils.py b/test/test_utils.py index 0857c0fc0..fdf6031f7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -814,6 +814,9 @@ class TestUtil(unittest.TestCase): inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' + self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -885,6 +888,13 @@ class TestUtil(unittest.TestCase): on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') self.assertEqual(json.loads(on), {'42': 42}) + on = js_to_json('{42:4.2e1}') + self.assertEqual(json.loads(on), {'42': 42.0}) + + def test_js_to_json_malformed(self): + self.assertEqual(js_to_json('42a1'), '42"a1"') + self.assertEqual(js_to_json('42a-1'), '42"a"-1') + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b9b8fe1a9..92da0ce3c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2420,7 +2420,7 @@ class InfoExtractor(object): formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': + elif source_type == 'dash' or ext == 'mpd': formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) elif ext == 'smil': diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c82614bf9..57e74ba62 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -490,7 +490,6 @@ from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kakao import KakaoIE from .kaltura import KalturaIE -from .kamcord import KamcordIE from .kanalplay import KanalPlayIE from .kankan import KankanIE from .karaoketv import KaraoketvIE @@ -882,7 +881,6 @@ from .revision3 import ( Revision3IE, ) from .rice import RICEIE -from .ringtv import RingTVIE from .rmcdecouverte import RMCDecouverteIE from .ro220 import Ro220IE from .rockstargames import RockstarGamesIE @@ -902,6 +900,7 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE +from .rtvs import RTVSIE from .rudo import RudoIE from .ruhd import RUHDIE from .ruleporn import RulePornIE @@ -991,6 +990,7 @@ from .stitcher import StitcherIE from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE +from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE from .srgssr import ( SRGSSRIE, @@ -1046,7 +1046,6 @@ from .theplatform import ( ThePlatformFeedIE, ) from .thescene import TheSceneIE -from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thesun import TheSunIE from .theweatherchannel import TheWeatherChannelIE diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 707b9e00d..05806895c 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -33,7 +33,7 @@ class FranceInterIE(InfoExtractor): description = self._og_search_description(webpage) upload_date_str = self._search_regex( - r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', + r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', webpage, 'upload date', fatal=False) if upload_date_str: upload_date_list = upload_date_str.split() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9b0cd004f..1d9da8115 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -101,6 +101,7 @@ from .vzaar import VzaarIE from .channel9 import Channel9IE from .vshare import VShareIE from .mediasite import MediasiteIE +from .springboardplatform import SpringboardPlatformIE class GenericIE(InfoExtractor): @@ -1938,6 +1939,21 @@ class GenericIE(InfoExtractor): 'timestamp': 1474354800, 'upload_date': '20160920', } + }, + { + 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton', + 'info_dict': { + 'id': '1731611', + 'ext': 'mp4', + 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!', + 'description': 'md5:eb5f23826a027ba95277d105f248b825', + 'timestamp': 1516100691, + 'upload_date': '20180116', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [SpringboardPlatformIE.ie_key()], } # { # # TODO: find another test @@ -2906,6 +2922,12 @@ class GenericIE(InfoExtractor): for mediasite_url in mediasite_urls] return self.playlist_result(entries, video_id, video_title) + springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage) + if springboardplatform_urls: + return self.playlist_from_matches( + springboardplatform_urls, video_id, video_title, + ie=SpringboardPlatformIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/kamcord.py b/youtube_dl/extractor/kamcord.py deleted file mode 100644 index b50120d98..000000000 --- a/youtube_dl/extractor/kamcord.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - qualities, -) - - -class KamcordIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://www.kamcord.com/v/hNYRduDgWb4', - 'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c', - 'info_dict': { - 'id': 'hNYRduDgWb4', - 'ext': 'mp4', - 'title': 'Drinking Madness', - 'uploader': 'jacksfilms', - 'uploader_id': '3044562', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video = self._parse_json( - self._search_regex( - r'window\.__props\s*=\s*({.+?});?(?:\n|\s*\s*(.+?)', diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py index fd50065d4..d47fb45ca 100644 --- a/youtube_dl/extractor/restudy.py +++ b/youtube_dl/extractor/restudy.py @@ -5,8 +5,8 @@ from .common import InfoExtractor class RestudyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P[0-9]+)' + _TESTS = [{ 'url': 'https://www.restudy.dk/video/play/id/1637', 'info_dict': { 'id': '1637', @@ -18,7 +18,10 @@ class RestudyIE(InfoExtractor): # rtmp download 'skip_download': True, } - } + }, { + 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -29,7 +32,7 @@ class RestudyIE(InfoExtractor): description = self._og_search_description(webpage).strip() formats = self._extract_smil_formats( - 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, + 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) self._sort_formats(formats) diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py deleted file mode 100644 index 2c2c707bd..000000000 --- a/youtube_dl/extractor/ringtv.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class RingTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?Pnews|videos/video)/(?P[^/?#]+)' - _TEST = { - 'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30', - 'md5': 'd25945f5df41cdca2d2587165ac28720', - 'info_dict': { - 'id': '857645', - 'ext': 'mp4', - 'title': 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV', - 'description': 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id').split('-')[0] - webpage = self._download_webpage(url, video_id) - - if mobj.group('type') == 'news': - video_id = self._search_regex( - r'''(?x)]+src="http://cms\.springboardplatform\.com/ - embed_iframe/[0-9]+/video/([0-9]+)/''', - webpage, 'real video ID') - title = self._og_search_title(webpage) - description = self._html_search_regex( - r'addthis:description="([^"]+)"', - webpage, 'description', fatal=False) - final_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4' % video_id - thumbnail_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg' % video_id - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'thumbnail': thumbnail_url, - 'description': description, - } diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py new file mode 100644 index 000000000..6573b260d --- /dev/null +++ b/youtube_dl/extractor/rtvs.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTVSIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P\d+)' + _TESTS = [{ + # radio archive + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '414872', + 'ext': 'mp3', + 'title': 'Ostrov pokladov 1 časť.mp3' + }, + 'params': { + 'skip_download': True, + } + }, { + # tv archive + 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', + 'md5': '85e2c55cf988403b70cac24f5c086dc6', + 'info_dict': { + 'id': '63118', + 'ext': 'mp4', + 'title': 'Amaro Džives - Náš deň', + 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' + }, + 'params': { + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._search_regex( + r'playlist["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'playlist url', group='url') + + data = self._download_json( + playlist_url, video_id, 'Downloading playlist')[0] + return self._parse_jwplayer_data(data, video_id=video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 6c9816eef..97ff422f0 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -157,7 +157,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn' + _CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index d8ce416fc..da75a43a7 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -20,6 +20,9 @@ class SouthParkIE(MTVServicesInfoExtractor): 'timestamp': 1112760000, 'upload_date': '20050406', }, + }, { + 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', + 'only_matching': True, }] @@ -41,7 +44,7 @@ class SouthParkEsIE(SouthParkIE): class SouthParkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:clips|alle-episoden)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:clips|alle-episoden|collections)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ @@ -70,12 +73,15 @@ class SouthParkDeIE(SouthParkIE): 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', }, 'playlist_count': 3, + }, { + 'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1', + 'only_matching': True, }] class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.nl/(?:clips|(?:full-)?episodes)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' _TESTS = [{ @@ -90,7 +96,7 @@ class SouthParkNlIE(SouthParkIE): class SouthParkDkIE(SouthParkIE): IE_NAME = 'southparkstudios.dk' - _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.dk/(?:clips|full-episodes)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' _TESTS = [{ @@ -100,4 +106,10 @@ class SouthParkDkIE(SouthParkIE): 'description': 'Butters is convinced he\'s living in a virtual reality.', }, 'playlist_mincount': 3, + }, { + 'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1', + 'only_matching': True, + }, { + 'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1', + 'only_matching': True, }] diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 84298fee4..fc995e8c1 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .nexx import NexxEmbedIE +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .spiegeltv import SpiegeltvIE from ..compat import compat_urlparse from ..utils import ( @@ -51,6 +54,10 @@ class SpiegelIE(InfoExtractor): }, { 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html', 'only_matching': True, + }, { + # nexx video + 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -61,6 +68,14 @@ class SpiegelIE(InfoExtractor): if SpiegeltvIE.suitable(handle.geturl()): return self.url_result(handle.geturl(), 'Spiegeltv') + nexx_id = self._search_regex( + r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None) + if nexx_id: + domain_id = NexxIE._extract_domain_id(webpage) or '748' + return self.url_result( + 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(), + video_id=nexx_id) + video_data = extract_attributes(self._search_regex(r'(]+id="spVideoElements"[^>]+>)', webpage, 'video element', default='')) title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage) diff --git a/youtube_dl/extractor/springboardplatform.py b/youtube_dl/extractor/springboardplatform.py new file mode 100644 index 000000000..07d99b579 --- /dev/null +++ b/youtube_dl/extractor/springboardplatform.py @@ -0,0 +1,125 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + xpath_attr, + xpath_text, + xpath_element, + unescapeHTML, + unified_timestamp, +) + + +class SpringboardPlatformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + cms\.springboardplatform\.com/ + (?: + (?:previews|embed_iframe)/(?P\d+)/video/(?P\d+)| + xml_feeds_advanced/index/(?P\d+)/rss3/(?P\d+) + ) + ''' + _TESTS = [{ + 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1', + 'md5': '5c3cb7b5c55740d482561099e920f192', + 'info_dict': { + 'id': '981017', + 'ext': 'mp4', + 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', + 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1409132328, + 'upload_date': '20140827', + 'duration': 193, + }, + }, { + 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1', + 'only_matching': True, + }, { + 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10', + 'only_matching': True, + }, { + 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1', + webpage)] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('id_2') + index = mobj.group('index') or mobj.group('index_2') + + video = self._download_xml( + 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s' + % (index, video_id), video_id) + + item = xpath_element(video, './/item', 'item', fatal=True) + + content = xpath_element( + item, './{http://search.yahoo.com/mrss/}content', 'content', + fatal=True) + title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True)) + + video_url = content.attrib['url'] + + if 'error_video.mp4' in video_url: + raise ExtractorError( + 'Video %s no longer exists' % video_id, expected=True) + + duration = int_or_none(content.get('duration')) + tbr = int_or_none(content.get('bitrate')) + filesize = int_or_none(content.get('fileSize')) + width = int_or_none(content.get('width')) + height = int_or_none(content.get('height')) + + description = unescapeHTML(xpath_text( + item, './description', 'description')) + thumbnail = xpath_attr( + item, './{http://search.yahoo.com/mrss/}thumbnail', 'url', + 'thumbnail') + + timestamp = unified_timestamp(xpath_text( + item, './{http://cms.springboardplatform.com/namespaces.html}created', + 'timestamp')) + + formats = [{ + 'url': video_url, + 'format_id': 'http', + 'tbr': tbr, + 'filesize': filesize, + 'width': width, + 'height': height, + }] + + m3u8_format = formats[0].copy() + m3u8_format.update({ + 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8', + 'ext': 'mp4', + 'format_id': 'hls', + 'protocol': 'm3u8_native', + }) + formats.append(m3u8_format) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py deleted file mode 100644 index d63aef5de..000000000 --- a/youtube_dl/extractor/thesixtyone.py +++ /dev/null @@ -1,106 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import unified_strdate - - -class TheSixtyOneIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/ - (?:.*?/)* - (?: - s| - song/comments/list| - song - )/(?:[^/]+/)?(?P[A-Za-z0-9]+)/?$''' - _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}' - _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream' - _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop' - _TESTS = [ - { - 'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/', - 'md5': '821cc43b0530d3222e3e2b70bb4622ea', - 'info_dict': { - 'id': 'SrE3zD7s1jt', - 'ext': 'mp3', - 'title': 'CASIO - Unicorn War Mixtape', - 'thumbnail': 're:^https?://.*_desktop$', - 'upload_date': '20071217', - 'duration': 3208, - } - }, - { - 'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt', - 'only_matching': True, - }, - { - 'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/', - 'only_matching': True, - }, - { - 'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/', - 'only_matching': True, - }, - { - 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/', - 'only_matching': True, - }, - { - 'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/', - 'only_matching': True, - }, - ] - - _DECODE_MAP = { - 'x': 'a', - 'm': 'b', - 'w': 'c', - 'q': 'd', - 'n': 'e', - 'p': 'f', - 'a': '0', - 'h': '1', - 'e': '2', - 'u': '3', - 's': '4', - 'i': '5', - 'o': '6', - 'y': '7', - 'r': '8', - 'c': '9' - } - - def _real_extract(self, url): - song_id = self._match_id(url) - - webpage = self._download_webpage( - self._SONG_URL_TEMPLATE.format(song_id), song_id) - - song_data = self._parse_json(self._search_regex( - r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id) - - if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None): - song_data['audio_server'] = 's3.amazonaws.com' - else: - song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com' - - keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']] - url = self._SONG_FILE_URL_TEMPLATE.format( - "".join(reversed(keys)), **song_data) - - formats = [{ - 'format_id': 'sd', - 'url': url, - 'ext': 'mp3', - }] - - return { - 'id': song_id, - 'title': '{artist:} - {name:}'.format(**song_data), - 'formats': formats, - 'comment_count': song_data.get('comments_count'), - 'duration': song_data.get('play_time'), - 'like_count': song_data.get('score'), - 'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data), - 'upload_date': unified_strdate(song_data.get('publish_date')), - } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f9164af09..1981b4d4a 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -85,10 +85,15 @@ class TwitchBaseIE(InfoExtractor): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: response = self._parse_json( e.cause.read().decode('utf-8'), None) - fail(response['message']) + fail(response.get('message') or response['errors'][0]) raise - redirect_url = urljoin(post_url, response['redirect']) + if 'Authenticated successfully' in response.get('message', ''): + return None, None + + redirect_url = urljoin( + post_url, + response.get('redirect') or response['redirect_path']) return self._download_webpage_handle( redirect_url, None, 'Downloading login redirect page', headers=headers) @@ -106,6 +111,10 @@ class TwitchBaseIE(InfoExtractor): 'password': password, }) + # Successful login + if not redirect_page: + return + if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: # TODO: Add mechanism to request an SMS or phone call tfa_token = self._get_tfa_info('two-factor authentication token') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d4838b3e5..b8ea50362 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -318,9 +318,14 @@ class VKIE(VKBaseIE): 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', expected=True) + ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' + ERRORS = { r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': - 'Video %s has been removed from public access due to rightholder complaint.', + ERROR_COPYRIGHT, + + r'>The video .*? was removed from public access by request of the copyright holder.<': + ERROR_COPYRIGHT, r'Please log in or <': 'Video %s is only available for registered users, ' diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a01ec1436..f698a5627 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2530,10 +2530,11 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage(url, channel_id, fatal=False) if webpage: page_type = self._og_search_property( - 'type', webpage, 'page type', default=None) + 'type', webpage, 'page type', default='') video_id = self._html_search_meta( 'videoId', webpage, 'video id', default=None) - if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id): + if page_type.startswith('video') and video_id and re.match( + r'^[0-9A-Za-z_-]{11}$', video_id): return self.url_result(video_id, YoutubeIE.ie_key()) return self.url_result(base_url) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 386897a85..2fe9cf585 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2267,7 +2267,7 @@ def js_to_json(code): "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| {comment}|,(?={skip}[\]}}])| - [a-zA-Z_][.a-zA-Z_0-9]*| + (?:(?