From 62b3e5b7acc6ee3c90d53f5a911db9aead32e3f8 Mon Sep 17 00:00:00 2001 From: Martin Trigaux Date: Tue, 29 Mar 2016 14:18:44 +0200 Subject: [PATCH 1/2] screencast.com: support missing www The "www." part of the URL is not mandatory --- youtube_dl/extractor/screencast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index dfd897ba3..d5111c629 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -12,7 +12,7 @@ from ..utils import ( class ScreencastIE(InfoExtractor): - _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', @@ -34,7 +34,7 @@ class ScreencastIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } }, { - 'url': 'http://www.screencast.com/t/aAB3iowa', + 'url': 'http://screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', 'info_dict': { 'id': 'aAB3iowa', From 52392831b87fd52b92d0ab04e562b9507ed8c9b4 Mon Sep 17 00:00:00 2001 From: Martin Trigaux Date: Tue, 29 Mar 2016 14:34:58 +0200 Subject: [PATCH 2/2] screencast.com: fallback on page title When determining the title of the page, use the tag of the page --- youtube_dl/extractor/screencast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index d5111c629..99d2c96ab 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -95,7 +95,8 @@ class ScreencastIE(InfoExtractor): if title is None: title = self._html_search_regex( [r'<b>Title:</b> ([^<]*)</div>', - r'class="tabSeperator">></span><span class="tabText">(.*?)<'], + r'class="tabSeperator">></span><span class="tabText">(.*?)<', + r'<title>([^<]*)'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None)