From b2401d8e663ad307e27580df62d0000d0e2dcfd9 Mon Sep 17 00:00:00 2001 From: gcmalloc Date: Thu, 25 Oct 2012 17:15:56 +0200 Subject: [PATCH 1/3] adding youtube user --- test/test_download.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_download.py b/test/test_download.py index d1d6b119b..584e0d7c0 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -10,6 +10,7 @@ from youtube_dl.InfoExtractors import MetacafeIE, BlipTVIE from youtube_dl.InfoExtractors import XVideosIE, VimeoIE from youtube_dl.InfoExtractors import SoundcloudIE, StanfordOpenClassroomIE from youtube_dl.InfoExtractors import CollegeHumorIE, XNXXIE +from youtube_dl.InfoExtractors import YoutubeUserIE class DownloadTest(unittest.TestCase): @@ -21,6 +22,8 @@ class DownloadTest(unittest.TestCase): YOUTUBE_URL = "http://www.youtube.com/watch?v=BaW_jenozKc" YOUTUBE_FILE = "BaW_jenozKc.mp4" + YOUTUBEUSER_URL = "http://www.youtube.com/user/phihag" + DAILYMOTION_MD5 = "d363a50e9eb4f22ce90d08d15695bb47" DAILYMOTION_URL = "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech" DAILYMOTION_FILE = "x33vw9.mp4" @@ -70,6 +73,15 @@ class DownloadTest(unittest.TestCase): self.assertTrue(os.path.exists(DownloadTest.YOUTUBE_FILE)) self.assertEqual(os.path.getsize(DownloadTest.YOUTUBE_FILE), DownloadTest.YOUTUBE_SIZE) + def test_youtubeuser(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(YoutubeUserIE()) + fd.add_info_extractor(YoutubeIE()) + fd.download([DownloadTest.YOUTUBEUSER_URL]) + self.assertTrue(os.path.exists(DownloadTest.YOUTUBE_FILE)) + self.assertEqual(os.path.getsize(DownloadTest.YOUTUBE_FILE), DownloadTest.YOUTUBE_SIZE) + def test_dailymotion(self): with open(DownloadTest.PARAMETERS_FILE) as f: fd = FileDownloader(json.load(f)) @@ -120,7 +132,6 @@ class DownloadTest(unittest.TestCase): def test_vimeo2(self): #skipped for the moment produce an error - return with open(DownloadTest.PARAMETERS_FILE) as f: fd = FileDownloader(json.load(f)) fd.add_info_extractor(VimeoIE()) From 00953b54b356ebd9e751b01c88a958158bae7d5f Mon Sep 17 00:00:00 2001 From: gcmalloc Date: Wed, 28 Nov 2012 14:25:40 +0100 Subject: [PATCH 2/3] removing college humour as it is in a non working state --- youtube_dl/InfoExtractors.py | 114 ++++++++--------------------------- youtube_dl/__init__.py | 1 - 2 files changed, 25 insertions(+), 90 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 13b04ab5b..12aa93d23 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -152,7 +152,7 @@ class YoutubeIE(InfoExtractor): '44': '480x854', '45': '720x1280', '46': '1080x1920', - } + } IE_NAME = u'youtube' def suitable(self, url): @@ -380,7 +380,7 @@ class YoutubeIE(InfoExtractor): video_description = get_element_by_id("eow-description", video_webpage.decode('utf8')) if video_description: video_description = clean_html(video_description) else: video_description = '' - + # closed captions video_subtitles = None if self._downloader.params.get('writesubtitles', False): @@ -1074,7 +1074,7 @@ class VimeoIE(InfoExtractor): except: self._downloader.trouble(u'ERROR: unable to extract info section') return - + # Extract title video_title = config["video"]["title"] @@ -1161,7 +1161,7 @@ class GenericIE(InfoExtractor): def report_following_redirect(self, new_url): """Report information extraction.""" self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) - + def _test_redirect(self, url): """Check if it is a redirect, like url shorteners, in case restart chain.""" class HeadRequest(urllib2.Request): @@ -1170,38 +1170,38 @@ class GenericIE(InfoExtractor): class HEADRedirectHandler(urllib2.HTTPRedirectHandler): """ - Subclass the HTTPRedirectHandler to make it use our + Subclass the HTTPRedirectHandler to make it use our HeadRequest also on the redirected URL """ - def redirect_request(self, req, fp, code, msg, headers, newurl): + def redirect_request(self, req, fp, code, msg, headers, newurl): if code in (301, 302, 303, 307): - newurl = newurl.replace(' ', '%20') + newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type")) - return HeadRequest(newurl, + return HeadRequest(newurl, headers=newheaders, - origin_req_host=req.get_origin_req_host(), - unverifiable=True) - else: - raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) + origin_req_host=req.get_origin_req_host(), + unverifiable=True) + else: + raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) class HTTPMethodFallback(urllib2.BaseHandler): """ Fallback to GET if HEAD is not allowed (405 HTTP error) """ - def http_error_405(self, req, fp, code, msg, headers): + def http_error_405(self, req, fp, code, msg, headers): fp.read() fp.close() newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type")) - return self.parent.open(urllib2.Request(req.get_full_url(), - headers=newheaders, - origin_req_host=req.get_origin_req_host(), + return self.parent.open(urllib2.Request(req.get_full_url(), + headers=newheaders, + origin_req_host=req.get_origin_req_host(), unverifiable=True)) # Build our opener - opener = urllib2.OpenerDirector() + opener = urllib2.OpenerDirector() for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler, HTTPMethodFallback, HEADRedirectHandler, urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]: @@ -1209,9 +1209,9 @@ class GenericIE(InfoExtractor): response = opener.open(HeadRequest(url)) new_url = response.geturl() - + if url == new_url: return False - + self.report_following_redirect(new_url) self._downloader.download([new_url]) return True @@ -2195,7 +2195,7 @@ class MyVideoIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - + def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) @@ -2343,7 +2343,7 @@ class ComedyCentralIE(InfoExtractor): return else: mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] - + playerUrl_raw = mMovieParams[0][0] self.report_player_url(epTitle) try: @@ -2392,7 +2392,7 @@ class ComedyCentralIE(InfoExtractor): if len(turls) == 0: self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') continue - + if self._downloader.params.get('listformats', None): self._print_formats([i[0] for i in turls]) return @@ -2432,7 +2432,7 @@ class ComedyCentralIE(InfoExtractor): } results.append(info) - + return results @@ -2510,70 +2510,6 @@ class EscapistIE(InfoExtractor): return [info] -class CollegeHumorIE(InfoExtractor): - """Information extractor for collegehumor.com""" - - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P[0-9]+)/(?P.*)$' - IE_NAME = u'collegehumor' - - def report_webpage(self, video_id): - """Report information extraction.""" - self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) - - def report_extraction(self, video_id): - """Report information extraction.""" - self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) - return - video_id = mobj.group('videoid') - - self.report_webpage(video_id) - request = urllib2.Request(url) - try: - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) - return - - m = re.search(r'id="video:(?P[0-9]+)"', webpage) - if m is None: - self._downloader.trouble(u'ERROR: Cannot extract internal video ID') - return - internal_video_id = m.group('internalvideoid') - - info = { - 'id': video_id, - 'internal_id': internal_video_id, - } - - self.report_extraction(video_id) - xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id - try: - metaXml = urllib2.urlopen(xmlUrl).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) - return - - mdoc = xml.etree.ElementTree.fromstring(metaXml) - try: - videoNode = mdoc.findall('./video')[0] - info['description'] = videoNode.findall('./description')[0].text - info['title'] = videoNode.findall('./caption')[0].text - info['url'] = videoNode.findall('./file')[0].text - info['thumbnail'] = videoNode.findall('./thumbnail')[0].text - info['ext'] = info['url'].rpartition('.')[2] - info['format'] = info['ext'] - except IndexError: - self._downloader.trouble(u'\nERROR: Invalid metadata XML file') - return - - return [info] - - class XVideosIE(InfoExtractor): """Information extractor for xvideos.com""" @@ -3005,7 +2941,7 @@ class StanfordOpenClassroomIE(InfoExtractor): assert entry['type'] == 'reference' results += self.extract(entry['url']) return results - + else: # Root page info = { 'id': 'Stanford OpenClassroom', @@ -3077,7 +3013,7 @@ class MTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract performer') return performer = unescapeHTML(mobj.group(1).decode('iso-8859-1')) - video_title = performer + ' - ' + song_name + video_title = performer + ' - ' + song_name mobj = re.search(r'', webpage) if mobj is None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5fc39184a..a596ad9a4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -352,7 +352,6 @@ def gen_extractors(): MyVideoIE(), ComedyCentralIE(), EscapistIE(), - CollegeHumorIE(), XVideosIE(), SoundcloudIE(), InfoQIE(), From c95a5148842814349ba33f7213cfc8e9407290c2 Mon Sep 17 00:00:00 2001 From: gcmalloc Date: Wed, 28 Nov 2012 14:28:32 +0100 Subject: [PATCH 3/3] removing corresponding test --- test/test_download.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 584e0d7c0..19799589d 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -56,10 +56,6 @@ class DownloadTest(unittest.TestCase): STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100" STANDFORD_FILE = "PracticalUnix_intro-environment.mp4" - COLLEGEHUMOR_MD5 = "" - COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody" - COLLEGEHUMOR_FILE = "" - XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d" XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_" XNXX_FILE = "1135332.flv" @@ -158,15 +154,6 @@ class DownloadTest(unittest.TestCase): md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE) self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5) - def test_collegehumor(self): - with open(DownloadTest.PARAMETERS_FILE) as f: - fd = FileDownloader(json.load(f)) - fd.add_info_extractor(CollegeHumorIE()) - fd.download([DownloadTest.COLLEGEHUMOR_URL]) - self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE)) - md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE) - self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5) - def test_xnxx(self): with open(DownloadTest.PARAMETERS_FILE) as f: fd = FileDownloader(json.load(f))