From fafea72fda19b3f9bf7a5564c733dfcbff716e0a Mon Sep 17 00:00:00 2001 From: Ravi Date: Sat, 9 Jul 2011 16:08:28 -0400 Subject: [PATCH 1/7] Added parallel mode to download multiple videos concurrently Changed youtube playlist regex to match the new format of playlist url's --- youtube-dl | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..3101ea819 100755 --- a/youtube-dl +++ b/youtube-dl @@ -30,6 +30,8 @@ import time import urllib import urllib2 import zlib +import threading +import Queue # parse_qs was moved from the cgi module to the urlparse module recently. try: @@ -47,6 +49,8 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +downloadqueue=Queue.Queue() + def preferredencoding(): """Get preferred encoding. @@ -303,6 +307,7 @@ class FileDownloader(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params + self.queue=Queue.Queue @staticmethod def pmkdir(filename): @@ -651,8 +656,17 @@ class FileDownloader(object): else: self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False - + def _do_download(self, filename, url, player_url): + if self.params.get('parallel') > 0: + downloadqueue.put({'filename':filename,'url':url,'player_url':player_url,'params':self.params}) + return False + else: + self._do_real_download(filename, url, player_url) + + + + def _do_real_download(self, filename, url, player_url): # Check file already present if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) @@ -783,6 +797,27 @@ class FileDownloader(object): self.try_utime(filename, data.info().get('last-modified', None)) return True + + +class FileDownloadHelper(FileDownloader,threading.Thread): + """File Downloader that does threaded download if needed. + Download parameters are added to downloadqueue in FileDownloader class, + which each thread waits on and calls FileDownloader._do_real_download . + Individual threads are created in main function. + """ + + def __init__(self): + threading.Thread.__init__(self) + + + def run(self): + while True: + d=downloadqueue.get() + self.params=d['params'] + super(FileDownloadHelper,self).__init__(d['params']) + self._do_real_download(d['filename'],d['url'],d['player_url']) + downloadqueue.task_done() + class InfoExtractor(object): """Information Extractor class. @@ -2097,7 +2132,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|playlist|my_playlists|artist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2746,6 +2781,8 @@ if __name__ == '__main__': parser.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) + parser.add_option('-P','--parallel', + type="int",dest='parallel',help='Number of parallel downloads',default=0) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2949,6 +2986,7 @@ if __name__ == '__main__': 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, + 'parallel': opts.parallel, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) @@ -2975,6 +3013,14 @@ if __name__ == '__main__': # Update version if opts.update_self: update_self(fd, sys.argv[0]) + + #create downloader threads that wait for url's + downloadparallel=opts.parallel + if downloadparallel > 0: + for threadcount in xrange(downloadparallel): + d=FileDownloadHelper() + d.setDaemon(True) + d.start() # Maybe do nothing if len(all_urls) < 1: @@ -2983,6 +3029,14 @@ if __name__ == '__main__': else: sys.exit() retcode = fd.download(all_urls) + + #wait for download threads to terminate + if downloadparallel > 0: + while True: + if downloadqueue.empty(): + break + time.sleep(10) #otherwise, join won't let main thread catch keyboard interrupt + # Dump cookie jar if requested if opts.cookiefile is not None: From fd58277adf036b5142576808ca6c0ce974bf800f Mon Sep 17 00:00:00 2001 From: Ravi Date: Sun, 10 Jul 2011 22:38:19 -0400 Subject: [PATCH 2/7] Add playlist feature Refactor threading code and change how shutdown is handled --- youtube-dl | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3101ea819..f26612e73 100755 --- a/youtube-dl +++ b/youtube-dl @@ -607,6 +607,11 @@ class FileDownloader(object): # Extract information from URL and process it ie.extract(url) + + #parallel downloader needs dummy at the end to signal end of queue + #for the thread to exit + for i in xrange(self.params.get('parallel')): + downloadqueue.put({'filename':None } ) # Suitable InfoExtractor had been found; go to next URL break @@ -658,6 +663,11 @@ class FileDownloader(object): return False def _do_download(self, filename, url, player_url): + if ( self.params.get('playlistfile') != None ): + self.params.get('playlistfile').write(filename+"\n") + self.params.get('playlistfile').flush() + + if self.params.get('parallel') > 0: downloadqueue.put({'filename':filename,'url':url,'player_url':player_url,'params':self.params}) return False @@ -799,11 +809,11 @@ class FileDownloader(object): return True -class FileDownloadHelper(FileDownloader,threading.Thread): +class FileDownloadHelper(threading.Thread): """File Downloader that does threaded download if needed. Download parameters are added to downloadqueue in FileDownloader class, - which each thread waits on and calls FileDownloader._do_real_download . - Individual threads are created in main function. + which each thread waits on and calls FileDownloader._do_real_download + Individual threads are created in main function. """ def __init__(self): @@ -813,9 +823,11 @@ class FileDownloadHelper(FileDownloader,threading.Thread): def run(self): while True: d=downloadqueue.get() + if ( d['filename'] == None): + break self.params=d['params'] - super(FileDownloadHelper,self).__init__(d['params']) - self._do_real_download(d['filename'],d['url'],d['player_url']) + fd=FileDownloader(d['params']) + fd._do_real_download(d['filename'],d['url'],d['player_url']) downloadqueue.task_done() @@ -2783,6 +2795,10 @@ if __name__ == '__main__': help='display the current browser identification', default=False) parser.add_option('-P','--parallel', type="int",dest='parallel',help='Number of parallel downloads',default=0) + parser.add_option('-s', '--save-playlist', + action='store_true', dest='saveplaylist', help='do not create playlist file for playlists') + + authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2950,6 +2966,10 @@ if __name__ == '__main__': facebook_ie = FacebookIE() generic_ie = GenericIE() + playlistfile=None + if ( opts.saveplaylist): + playlistfile=open("playlist.m3u","w") + # File downloader fd = FileDownloader({ 'usenetrc': opts.usenetrc, @@ -2987,6 +3007,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'parallel': opts.parallel, + 'playlistfile':playlistfile }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) @@ -3014,13 +3035,15 @@ if __name__ == '__main__': if opts.update_self: update_self(fd, sys.argv[0]) - #create downloader threads that wait for url's + #create downloader threads that wait for URLs downloadparallel=opts.parallel + threads=[] if downloadparallel > 0: for threadcount in xrange(downloadparallel): d=FileDownloadHelper() d.setDaemon(True) d.start() + threads.append(d) # Maybe do nothing if len(all_urls) < 1: @@ -3033,9 +3056,15 @@ if __name__ == '__main__': #wait for download threads to terminate if downloadparallel > 0: while True: - if downloadqueue.empty(): + if( not threads[0].isAlive()): break - time.sleep(10) #otherwise, join won't let main thread catch keyboard interrupt + time.sleep(1) + for threadcount in xrange(downloadparallel): + threads[threadcount].join() + # while True: + # if downloadqueue.empty(): + # break + # time.sleep(1) #otherwise, join won't let main thread catch keyboard interrupt # Dump cookie jar if requested @@ -3045,6 +3074,8 @@ if __name__ == '__main__': except (IOError, OSError), err: sys.exit(u'ERROR: unable to save cookie jar') + if ( opts.saveplaylist): + playlistfile.close() sys.exit(retcode) except DownloadError: From c82a911fc961d1c3464c9d4b8d455baf6f9ecad7 Mon Sep 17 00:00:00 2001 From: Ravi Date: Sun, 10 Jul 2011 22:44:19 -0400 Subject: [PATCH 3/7] remove unused code --- youtube-dl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index f26612e73..3e1d9cf6e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3061,10 +3061,6 @@ if __name__ == '__main__': time.sleep(1) for threadcount in xrange(downloadparallel): threads[threadcount].join() - # while True: - # if downloadqueue.empty(): - # break - # time.sleep(1) #otherwise, join won't let main thread catch keyboard interrupt # Dump cookie jar if requested From 9ad8976d748f4dc5869f159eed57e8d568d93f33 Mon Sep 17 00:00:00 2001 From: Ravi Date: Sun, 10 Jul 2011 22:57:19 -0400 Subject: [PATCH 4/7] fix typo --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3e1d9cf6e..abe5a8018 100755 --- a/youtube-dl +++ b/youtube-dl @@ -608,8 +608,8 @@ class FileDownloader(object): # Extract information from URL and process it ie.extract(url) - #parallel downloader needs dummy at the end to signal end of queue - #for the thread to exit + #parallel downloader needs dummy at the end to signal end of queue + #for the thread to exit for i in xrange(self.params.get('parallel')): downloadqueue.put({'filename':None } ) @@ -2796,7 +2796,7 @@ if __name__ == '__main__': parser.add_option('-P','--parallel', type="int",dest='parallel',help='Number of parallel downloads',default=0) parser.add_option('-s', '--save-playlist', - action='store_true', dest='saveplaylist', help='do not create playlist file for playlists') + action='store_true', dest='saveplaylist', help='Save file list to a playlist file') From 23d09bfa5d3494ac0dacc1acfe0978c6ca6874ba Mon Sep 17 00:00:00 2001 From: Ravi Date: Mon, 11 Jul 2011 00:20:47 -0400 Subject: [PATCH 5/7] check isAlive for all the threads before calling join --- youtube-dl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index abe5a8018..92ebb8655 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3055,10 +3055,11 @@ if __name__ == '__main__': #wait for download threads to terminate if downloadparallel > 0: - while True: - if( not threads[0].isAlive()): - break - time.sleep(1) + for threadcount in xrange(downloadparallel): + while True: + if( not threads[threadcount].isAlive()): + break + time.sleep(1) for threadcount in xrange(downloadparallel): threads[threadcount].join() From 167b0a90253287894eb000e03540e1c092894718 Mon Sep 17 00:00:00 2001 From: Ravi Date: Tue, 12 Jul 2011 20:52:57 -0400 Subject: [PATCH 6/7] accept playlist filename as commandline argument; refractor threading code --- youtube-dl | 75 ++++++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 45 deletions(-) diff --git a/youtube-dl b/youtube-dl index 92ebb8655..ba1f2df36 100755 --- a/youtube-dl +++ b/youtube-dl @@ -49,7 +49,7 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') -downloadqueue=Queue.Queue() +downloadqueue = Queue.Queue() def preferredencoding(): """Get preferred encoding. @@ -307,7 +307,6 @@ class FileDownloader(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - self.queue=Queue.Queue @staticmethod def pmkdir(filename): @@ -607,11 +606,10 @@ class FileDownloader(object): # Extract information from URL and process it ie.extract(url) - - #parallel downloader needs dummy at the end to signal end of queue - #for the thread to exit + #parallel downloader needs dummy at the end to signal end of queue + #for the thread to exit for i in xrange(self.params.get('parallel')): - downloadqueue.put({'filename':None } ) + downloadqueue.put({'filename':None }) # Suitable InfoExtractor had been found; go to next URL break @@ -663,19 +661,15 @@ class FileDownloader(object): return False def _do_download(self, filename, url, player_url): - if ( self.params.get('playlistfile') != None ): + if (self.params.get('playlistfile') != None): self.params.get('playlistfile').write(filename+"\n") self.params.get('playlistfile').flush() - - if self.params.get('parallel') > 0: downloadqueue.put({'filename':filename,'url':url,'player_url':player_url,'params':self.params}) return False else: self._do_real_download(filename, url, player_url) - - def _do_real_download(self, filename, url, player_url): # Check file already present if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): @@ -809,26 +803,19 @@ class FileDownloader(object): return True -class FileDownloadHelper(threading.Thread): +def threadedFileDownloader(): """File Downloader that does threaded download if needed. Download parameters are added to downloadqueue in FileDownloader class, which each thread waits on and calls FileDownloader._do_real_download Individual threads are created in main function. """ - - def __init__(self): - threading.Thread.__init__(self) - - - def run(self): - while True: - d=downloadqueue.get() - if ( d['filename'] == None): - break - self.params=d['params'] - fd=FileDownloader(d['params']) - fd._do_real_download(d['filename'],d['url'],d['player_url']) - downloadqueue.task_done() + while True: + d = downloadqueue.get() + if (d['filename'] == None): + break + fd=FileDownloader(d['params']) + fd._do_real_download(d['filename'],d['url'],d['player_url']) + downloadqueue.task_done() class InfoExtractor(object): @@ -2796,7 +2783,7 @@ if __name__ == '__main__': parser.add_option('-P','--parallel', type="int",dest='parallel',help='Number of parallel downloads',default=0) parser.add_option('-s', '--save-playlist', - action='store_true', dest='saveplaylist', help='Save file list to a playlist file') + action='store', dest='saveplaylist', help='Save file list to a playlist file') @@ -2966,9 +2953,13 @@ if __name__ == '__main__': facebook_ie = FacebookIE() generic_ie = GenericIE() - playlistfile=None - if ( opts.saveplaylist): - playlistfile=open("playlist.m3u","w") + playlistfile = None + if (opts.saveplaylist != None): + if(opts.saveplaylist.find(".") == -1 ): + playlist_filename = opts.saveplaylist + ".m3u" + else: + playlist_filename = opts.saveplaylist + playlistfile=open(playlist_filename,"w") # File downloader fd = FileDownloader({ @@ -3007,7 +2998,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'parallel': opts.parallel, - 'playlistfile':playlistfile + 'playlistfile': playlistfile }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) @@ -3036,14 +3027,14 @@ if __name__ == '__main__': update_self(fd, sys.argv[0]) #create downloader threads that wait for URLs - downloadparallel=opts.parallel - threads=[] + downloadparallel = opts.parallel + threads = [] if downloadparallel > 0: for threadcount in xrange(downloadparallel): - d=FileDownloadHelper() - d.setDaemon(True) - d.start() - threads.append(d) + t = threading.Thread(target=threadedFileDownloader) + t.setDaemon(True) + t.start() + threads.append(t) # Maybe do nothing if len(all_urls) < 1: @@ -3055,14 +3046,8 @@ if __name__ == '__main__': #wait for download threads to terminate if downloadparallel > 0: - for threadcount in xrange(downloadparallel): - while True: - if( not threads[threadcount].isAlive()): - break - time.sleep(1) - for threadcount in xrange(downloadparallel): - threads[threadcount].join() - + for t in threads: + t.join(2**32) # Dump cookie jar if requested if opts.cookiefile is not None: From f382bc28d75bb6dd12924c1f35716329d123ee45 Mon Sep 17 00:00:00 2001 From: Ravi Date: Fri, 22 Jul 2011 20:11:10 -0400 Subject: [PATCH 7/7] Get rid of global parameter for queue and playlist file handle in options --- youtube-dl | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/youtube-dl b/youtube-dl index ba1f2df36..2f10f5f5f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -49,7 +49,6 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') -downloadqueue = Queue.Queue() def preferredencoding(): """Get preferred encoding. @@ -606,10 +605,6 @@ class FileDownloader(object): # Extract information from URL and process it ie.extract(url) - #parallel downloader needs dummy at the end to signal end of queue - #for the thread to exit - for i in xrange(self.params.get('parallel')): - downloadqueue.put({'filename':None }) # Suitable InfoExtractor had been found; go to next URL break @@ -617,6 +612,11 @@ class FileDownloader(object): if not suitable_found: self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) + #parallel downloader needs dummy at the end to signal end of queue + #for the thread to exit + for i in xrange(self.params.get('parallel')): + FileDownloader.downloadqueue.put({'filename':None }) + return self._download_retcode def post_process(self, filename, ie_info): @@ -661,11 +661,11 @@ class FileDownloader(object): return False def _do_download(self, filename, url, player_url): - if (self.params.get('playlistfile') != None): - self.params.get('playlistfile').write(filename+"\n") - self.params.get('playlistfile').flush() + if (FileDownloader.playlistfile != None): + FileDownloader.playlistfile.write(filename+"\n") + FileDownloader.playlistfile.flush() if self.params.get('parallel') > 0: - downloadqueue.put({'filename':filename,'url':url,'player_url':player_url,'params':self.params}) + FileDownloader.downloadqueue.put({'filename':filename,'url':url,'player_url':player_url,'params':self.params}) return False else: self._do_real_download(filename, url, player_url) @@ -810,12 +810,12 @@ def threadedFileDownloader(): Individual threads are created in main function. """ while True: - d = downloadqueue.get() - if (d['filename'] == None): + d = FileDownloader.downloadqueue.get() + if (d['filename'] is None): break fd=FileDownloader(d['params']) fd._do_real_download(d['filename'],d['url'],d['player_url']) - downloadqueue.task_done() + FileDownloader.downloadqueue.task_done() class InfoExtractor(object): @@ -2953,13 +2953,10 @@ if __name__ == '__main__': facebook_ie = FacebookIE() generic_ie = GenericIE() - playlistfile = None if (opts.saveplaylist != None): - if(opts.saveplaylist.find(".") == -1 ): - playlist_filename = opts.saveplaylist + ".m3u" - else: - playlist_filename = opts.saveplaylist - playlistfile=open(playlist_filename,"w") + FileDownloader.playlistfile = open(opts.saveplaylist, "w") + else: + FileDownloader.playlistfile = None # File downloader fd = FileDownloader({ @@ -2998,7 +2995,6 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'parallel': opts.parallel, - 'playlistfile': playlistfile }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) @@ -3030,6 +3026,7 @@ if __name__ == '__main__': downloadparallel = opts.parallel threads = [] if downloadparallel > 0: + FileDownloader.downloadqueue = Queue.Queue() for threadcount in xrange(downloadparallel): t = threading.Thread(target=threadedFileDownloader) t.setDaemon(True) @@ -3046,8 +3043,11 @@ if __name__ == '__main__': #wait for download threads to terminate if downloadparallel > 0: - for t in threads: - t.join(2**32) + while True: + for t in threads: + t.join(2**32) + if all(not t.isAlive() for t in threads): + break # Dump cookie jar if requested if opts.cookiefile is not None: @@ -3057,7 +3057,7 @@ if __name__ == '__main__': sys.exit(u'ERROR: unable to save cookie jar') if ( opts.saveplaylist): - playlistfile.close() + FileDownloader.playlistfile.close() sys.exit(retcode) except DownloadError: