From 6ffe8e171373fa78c067f8c38747971ccf91f6a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Dach?= Date: Tue, 6 Feb 2024 14:48:49 +0100 Subject: [PATCH] Use staggered exponential backoff when retrying in `PersistentEndpointClientConnector` There are suspicions that the straight 5s retry could have caused a situation a few days ago for `osu-server-spectator` wherein it was getting hammered by constant retry requests. This should make that a little less likely to happen. Numbers chosen are arbitrary, but mostly follow stable's bancho retry intervals because why not. Stable also skips the exponential backoff in case of errors it considers transient, but I decided not to bother for now. Starts off from 3 seconds, then ramps up to up to 2 minutes. Added stagger factor is 25% of duration, either direction. The stagger factor helps given that if spectator server is dead, each client has three separate connections to it which it will retry on (one to each hub). --- .../PersistentEndpointClientConnector.cs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/osu.Game/Online/PersistentEndpointClientConnector.cs b/osu.Game/Online/PersistentEndpointClientConnector.cs index 024a0fea73..9e7543ce2b 100644 --- a/osu.Game/Online/PersistentEndpointClientConnector.cs +++ b/osu.Game/Online/PersistentEndpointClientConnector.cs @@ -7,6 +7,7 @@ using System.Threading.Tasks; using osu.Framework.Bindables; using osu.Framework.Extensions.TypeExtensions; using osu.Framework.Logging; +using osu.Framework.Utils; using osu.Game.Online.API; namespace osu.Game.Online @@ -31,6 +32,12 @@ namespace osu.Game.Online private CancellationTokenSource connectCancelSource = new CancellationTokenSource(); private bool started; + /// + /// How much to delay before attempting to connect again, in milliseconds. + /// Subject to exponential back-off. + /// + private int retryDelay = 3000; + /// /// Constructs a new . /// @@ -78,6 +85,8 @@ namespace osu.Game.Online private async Task connect() { cancelExistingConnect(); + // reset retry delay to default. + retryDelay = 3000; if (!await connectionLock.WaitAsync(10000).ConfigureAwait(false)) throw new TimeoutException("Could not obtain a lock to connect. A previous attempt is likely stuck."); @@ -134,8 +143,15 @@ namespace osu.Game.Online /// private async Task handleErrorAndDelay(Exception exception, CancellationToken cancellationToken) { - Logger.Log($"{ClientName} connect attempt failed: {exception.Message}", LoggingTarget.Network); - await Task.Delay(5000, cancellationToken).ConfigureAwait(false); + // random stagger factor to avoid mass incidental synchronisation + // compare: https://github.com/peppy/osu-stable-reference/blob/013c3010a9d495e3471a9c59518de17006f9ad89/osu!/Online/BanchoClient.cs#L331 + int thisDelay = (int)(retryDelay * RNG.NextDouble(0.75, 1.25)); + // exponential backoff with upper limit + // compare: https://github.com/peppy/osu-stable-reference/blob/013c3010a9d495e3471a9c59518de17006f9ad89/osu!/Online/BanchoClient.cs#L539 + retryDelay = Math.Min(120000, (int)(retryDelay * 1.5)); + + Logger.Log($"{ClientName} connect attempt failed: {exception.Message}. Next attempt in {thisDelay / 1000:N0} seconds.", LoggingTarget.Network); + await Task.Delay(thisDelay, cancellationToken).ConfigureAwait(false); } ///