From d145f484e827f2354d27fe1935afe65799e4e6b2 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 21:44:21 -0500 Subject: [PATCH 01/21] Disallow custom chat commands while the server is rebooting Also a bunch of cleanups in `ChatManager.ProcessMessage` --- .../Components/Chat/ChatManager.cs | 107 ++++++------------ .../Components/Session/ISessionController.cs | 5 - .../Components/Session/SessionController.cs | 8 +- .../Components/Watchdog/BasicWatchdog.cs | 2 - 4 files changed, 41 insertions(+), 81 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs index ecb0fa6498c..fb1b6176259 100644 --- a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs +++ b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs @@ -154,7 +154,7 @@ public ChatManager( synchronizationLock = new object(); - builtinCommands = new Dictionary(); + builtinCommands = new Dictionary(StringComparer.OrdinalIgnoreCase); providers = new Dictionary(); mappedChannels = new Dictionary(); trackingContexts = new List(); @@ -713,6 +713,18 @@ async ValueTask ProcessMessage(IProvider provider, Message message, bool recurse return; } + ValueTask TextReply(string reply) => SendMessage( + new List + { + message.User.Channel.RealId, + }, + message, + new MessageContent + { + Text = reply, + }, + cancellationToken); + if (message.User.Channel.IsPrivateChannel) lock (mappedChannels) if (!mappedChannel.HasValue) @@ -753,17 +765,7 @@ async ValueTask ProcessMessage(IProvider provider, Message message, bool recurse logger.LogTrace("message: {messageJson}", JsonConvert.SerializeObject(message)); lock (mappedChannels) logger.LogTrace("mappedChannels: {mappedChannelsJson}", JsonConvert.SerializeObject(mappedChannels)); - await SendMessage( - new List - { - message.User.Channel.RealId, - }, - message, - new MessageContent - { - Text = "TGS: Processing error, check logs!", - }, - cancellationToken); + await TextReply("TGS: Processing error, check logs!"); return; } @@ -805,41 +807,29 @@ await SendMessage( if (splits.Count == 0) { // just a mention - await SendMessage( - new List - { - message.User.Channel.RealId, - }, - message, - new MessageContent - { - Text = "Hi!", - }, - cancellationToken); + await TextReply("Hi!"); return; } - var command = splits[0].ToUpperInvariant(); + var command = splits[0]; splits.RemoveAt(0); var arguments = String.Join(" ", splits); - ICommand GetCommand(string commandName) + Tuple GetCommand() { - if (!builtinCommands.TryGetValue(commandName, out var handler)) - { - handler = trackingContexts - .Where(x => x.CustomCommands != null) - .SelectMany(x => x.CustomCommands) - .Where(x => x.Name.ToUpperInvariant() == commandName) + if (!builtinCommands.TryGetValue(command, out var handler)) + return trackingContexts + .Where(trackingContext => trackingContext.Active) + .SelectMany(trackingContext => trackingContext.CustomCommands.Select(customCommand => Tuple.Create(customCommand, trackingContext))) + .Where(tuple => tuple.Item1.Name.Equals(command, StringComparison.OrdinalIgnoreCase)) .FirstOrDefault(); - } - return handler; + return Tuple.Create(handler, null); } - const string UnknownCommandMessage = "Unknown command! Type '?' or 'help' for available commands."; + const string UnknownCommandMessage = "TGS: Unknown command! Type '?' or 'help' for available commands."; - if (command == "HELP" || command == "?") + if (command.Equals("help", StringComparison.OrdinalIgnoreCase) || command == "?") { string helpText; if (splits.Count == 0) @@ -847,56 +837,40 @@ ICommand GetCommand(string commandName) var allCommands = builtinCommands.Select(x => x.Value).ToList(); allCommands.AddRange( trackingContexts - .Where(x => x.CustomCommands != null) .SelectMany( x => x.CustomCommands)); helpText = String.Format(CultureInfo.InvariantCulture, "Available commands (Type '?' or 'help' and then a command name for more details): {0}", String.Join(", ", allCommands.Select(x => x.Name))); } else { - var helpHandler = GetCommand(splits[0].ToUpperInvariant()); + var (helpHandler, _) = GetCommand(); if (helpHandler != default) helpText = String.Format(CultureInfo.InvariantCulture, "{0}: {1}{2}", helpHandler.Name, helpHandler.HelpText, helpHandler.AdminOnly ? " - May only be used in admin channels" : String.Empty); else helpText = UnknownCommandMessage; } - await SendMessage( - new List { message.User.Channel.RealId }, - message, - new MessageContent - { - Text = helpText, - }, - cancellationToken); + await TextReply(helpText); return; } - var commandHandler = GetCommand(command); + var (commandHandler, trackingContext) = GetCommand(); if (commandHandler == default) { - await SendMessage( - new List { message.User.Channel.RealId }, - message, - new MessageContent - { - Text = UnknownCommandMessage, - }, - cancellationToken); + await TextReply(UnknownCommandMessage); + return; + } + + if (trackingContext?.Active == false) + { + await TextReply("TGS: The server is rebooting, please try again later"); return; } if (commandHandler.AdminOnly && !message.User.Channel.IsAdminChannel) { - await SendMessage( - new List { message.User.Channel.RealId }, - message, - new MessageContent - { - Text = "Use this command in an admin channel!", - }, - cancellationToken); + await TextReply("TGS: Use this command in an admin channel!"); return; } @@ -912,14 +886,7 @@ await SendMessage( { // error bc custom commands should reply about why it failed logger.LogError(e, "Error processing chat command"); - await SendMessage( - new List { message.User.Channel.RealId }, - message, - new MessageContent - { - Text = "TGS: Internal error processing command! Check server logs!", - }, - cancellationToken); + await TextReply("TGS: Internal error processing command! Check server logs!"); } finally { diff --git a/src/Tgstation.Server.Host/Components/Session/ISessionController.cs b/src/Tgstation.Server.Host/Components/Session/ISessionController.cs index 72e66c2066e..0372a32ec0a 100644 --- a/src/Tgstation.Server.Host/Components/Session/ISessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/ISessionController.cs @@ -119,11 +119,6 @@ interface ISessionController : IProcessBase, IRenameNotifyee, IAsyncDisposable /// void ResetRebootState(); - /// - /// Enables the reading of custom chat commands from the . - /// - void EnableCustomChatCommands(); - /// /// Replace the in use with a given , disposing the old one. /// diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index 93e05417a3e..04a34af9aa8 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -401,9 +401,6 @@ public async ValueTask ProcessBridgeRequest(BridgeParameters par } } - /// - public void EnableCustomChatCommands() => chatTrackingContext.Active = DMApiAvailable; - /// public ValueTask Release() { @@ -751,6 +748,7 @@ async ValueTask ProcessBridgeCommand(BridgeParameters parameters break; case BridgeCommandType.Kill: Logger.LogInformation("Bridge requested process termination!"); + chatTrackingContext.Active = false; TerminationWasRequested = true; process.Terminate(); break; @@ -839,15 +837,17 @@ async ValueTask ProcessBridgeCommand(BridgeParameters parameters // Load custom commands chatTrackingContext.CustomCommands = parameters.CustomCommands; + chatTrackingContext.Active = true; Interlocked.Exchange(ref startupTcs, new TaskCompletionSource()).SetResult(); break; case BridgeCommandType.Reboot: Interlocked.Increment(ref rebootBridgeRequestsProcessing); try { + chatTrackingContext.Active = false; + if (ClosePortOnReboot) { - chatTrackingContext.Active = false; response.NewPort = 0; portClosedForReboot = true; } diff --git a/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs b/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs index d915a8b9356..ee110646fad 100644 --- a/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs +++ b/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs @@ -256,8 +256,6 @@ await ReattachFailure( await SessionStartupPersist(cancellationToken); await CheckLaunchResult(Server, "Server", cancellationToken); - - Server.EnableCustomChatCommands(); } catch (Exception ex) { From b1b5d71da3a6c99eb2d93c681d28c11308f51e38 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:00:14 -0500 Subject: [PATCH 02/21] Cancel pending topic requests when the server dies or reboots --- .../Components/Session/SessionController.cs | 49 ++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index 04a34af9aa8..16f0661852f 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -432,10 +432,38 @@ public async ValueTask SendCommand(TopicParameters parameters, Ca return null; } + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + var combinedCancellationToken = cts.Token; + async ValueTask CancelIfLifetimeElapses() + { + try + { + var lifetime = Lifetime; + var completed = await Task.WhenAny(lifetime, RebootGate).WaitAsync(combinedCancellationToken); + + Logger.LogDebug( + "Server {action}, cancelling pending command: {commandType}", + completed == lifetime + ? "process ended" + : "rebooting", + parameters.CommandType); + cts.Cancel(); + } + catch (OperationCanceledException) + { + // expected, not even worth tracing + } + catch (Exception ex) + { + Logger.LogError(ex, "Error in CancelIfLifetimeElapses!"); + } + } + TopicResponse fullResponse = null; + var lifetimeWatchingTask = CancelIfLifetimeElapses(); try { - var combinedResponse = await SendTopicRequest(parameters, cancellationToken); + var combinedResponse = await SendTopicRequest(parameters, combinedCancellationToken); void LogCombinedResponse() { @@ -453,7 +481,7 @@ void LogCombinedResponse() do { var nextRequest = await ProcessChunk( - (completedResponse, cancellationToken) => + (completedResponse, _) => { fullResponse = completedResponse; return ValueTask.FromResult(null); @@ -464,12 +492,12 @@ void LogCombinedResponse() return null; }, combinedResponse?.InteropResponse?.Chunk, - cancellationToken); + combinedCancellationToken); if (nextRequest != null) { nextRequest.PayloadId = nextChunk.PayloadId; - combinedResponse = await SendTopicRequest(nextRequest, cancellationToken); + combinedResponse = await SendTopicRequest(nextRequest, combinedCancellationToken); LogCombinedResponse(); nextChunk = combinedResponse?.InteropResponse?.Chunk; } @@ -486,11 +514,20 @@ void LogCombinedResponse() Logger.LogDebug( ex, "Topic request {cancellationType}!", - cancellationToken.IsCancellationRequested - ? "aborted" + combinedCancellationToken.IsCancellationRequested + ? cancellationToken.IsCancellationRequested + ? "cancelled" + : "aborted" : "timed out"); + + // throw only if the original token was the trigger cancellationToken.ThrowIfCancellationRequested(); } + finally + { + cts.Cancel(); + await lifetimeWatchingTask; + } if (fullResponse?.ErrorMessage != null) Logger.LogWarning( From d744504fa88b16a84beb7b2effe894f63c283d83 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:15:57 -0500 Subject: [PATCH 03/21] Minor documentation update for `IChatTrackingContext` --- .../Components/Chat/IChatTrackingContext.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Tgstation.Server.Host/Components/Chat/IChatTrackingContext.cs b/src/Tgstation.Server.Host/Components/Chat/IChatTrackingContext.cs index ae291c2424d..37e1da9584b 100644 --- a/src/Tgstation.Server.Host/Components/Chat/IChatTrackingContext.cs +++ b/src/Tgstation.Server.Host/Components/Chat/IChatTrackingContext.cs @@ -13,6 +13,7 @@ public interface IChatTrackingContext : IChannelSink, IDisposable /// /// If the should be used. /// + /// This should only be set by the that sets the . bool Active { get; set; } /// From 882e08f0e13201c414d15c0ebf75b93949792e1f Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:16:34 -0500 Subject: [PATCH 04/21] More guards against sending topic requests while the server is starting or rebooting --- .../Components/Session/SessionController.cs | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index 16f0661852f..e6db00c5459 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -432,18 +432,32 @@ public async ValueTask SendCommand(TopicParameters parameters, Ca return null; } + var rebootGate = RebootGate; + var launchResult = await LaunchResult.WaitAsync(cancellationToken); + if (launchResult.ExitCode.HasValue) + { + Logger.LogDebug("Not sending topic request {commandType} to server that failed to launch!", parameters.CommandType); + return null; + } + + // meh, this is kind of a hack, but it works + if (!chatTrackingContext.Active) + { + Logger.LogDebug("Not sending topic request {commandType} to server that is rebooting/starting.", parameters.CommandType); + return null; + } + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); var combinedCancellationToken = cts.Token; async ValueTask CancelIfLifetimeElapses() { try { - var lifetime = Lifetime; - var completed = await Task.WhenAny(lifetime, RebootGate).WaitAsync(combinedCancellationToken); + var completed = await Task.WhenAny(Lifetime, rebootGate).WaitAsync(combinedCancellationToken); Logger.LogDebug( "Server {action}, cancelling pending command: {commandType}", - completed == lifetime + completed != rebootGate ? "process ended" : "rebooting", parameters.CommandType); From 0e7cf63d747eb2905d67bac8e7614aa6dc95ed9c Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:20:11 -0500 Subject: [PATCH 05/21] Remove redundant checks in `SendRawTopic` --- .../Components/Session/SessionController.cs | 35 ++++++------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index e6db00c5459..474b684a93e 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -1097,7 +1097,6 @@ async ValueTask SendRawTopic(string queryString, bool pri } var targetPort = ReattachInformation.Port; - var killedOrRebootedTask = Task.WhenAny(Lifetime, OnReboot); global::Byond.TopicSender.TopicResponse byondResponse = null; var firstSend = true; @@ -1109,43 +1108,31 @@ async ValueTask SendRawTopic(string queryString, bool pri try { firstSend = false; - if (!killedOrRebootedTask.IsCompleted) - { - Logger.LogTrace("Begin topic request"); - byondResponse = await byondTopicSender.SendTopic( - endpoint, - queryString, - cancellationToken); - Logger.LogTrace("End topic request"); - } + Logger.LogTrace("Begin topic request"); + byondResponse = await byondTopicSender.SendTopic( + endpoint, + queryString, + cancellationToken); + Logger.LogTrace("End topic request"); break; } - catch (Exception ex) when (!cancellationToken.IsCancellationRequested) + catch (Exception ex) when (ex is not OperationCanceledException) { Logger.LogWarning(ex, "SendTopic exception!{retryDetails}", priority ? $" {i} attempts remaining." : String.Empty); if (priority && i > 0) - { - var delayTask = asyncDelayer.Delay(TimeSpan.FromSeconds(2), cancellationToken); - await Task.WhenAny(killedOrRebootedTask, delayTask); - } + await asyncDelayer.Delay(TimeSpan.FromSeconds(2), cancellationToken); } } if (byondResponse == null) { if (priority) - if (killedOrRebootedTask.IsCompleted) - Logger.LogWarning( - "Unable to send priority topic \"{queryString}\" DreamDaemon {stateClearAction}!", - queryString, - Lifetime.IsCompleted ? "process ended" : "rebooted"); - else - Logger.LogError( - "Unable to send priority topic \"{queryString}\"!", - queryString); + Logger.LogError( + "Unable to send priority topic \"{queryString}\"!", + queryString); return null; } From 8b9ff88578546a366fc9233fbc5b08733e39c7e9 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:27:48 -0500 Subject: [PATCH 06/21] Make jobs hub test start/stop ordering deterministic Prevents rare race condition --- .../Live/Instance/JobsHubTests.cs | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tests/Tgstation.Server.Tests/Live/Instance/JobsHubTests.cs b/tests/Tgstation.Server.Tests/Live/Instance/JobsHubTests.cs index 330df08e6b6..493e2d1d4f1 100644 --- a/tests/Tgstation.Server.Tests/Live/Instance/JobsHubTests.cs +++ b/tests/Tgstation.Server.Tests/Live/Instance/JobsHubTests.cs @@ -28,7 +28,7 @@ sealed class JobsHubTests : IJobsHub readonly HashSet permlessSeenJobs; - HubConnection conn1, conn2; + HubConnection permedConn, permlessConn; bool permlessIsPermed; long? permlessPsId; @@ -98,26 +98,26 @@ public async Task Run(CancellationToken cancellationToken) }, }; - await using (conn1 = (HubConnection)await permedUser.SubscribeToJobUpdates( + await using (permedConn = (HubConnection)await permedUser.SubscribeToJobUpdates( this, null, null, cancellationToken)) - await using (conn2 = (HubConnection)await permlessUser.SubscribeToJobUpdates( + await using (permlessConn = (HubConnection)await permlessUser.SubscribeToJobUpdates( neverReceiver, null, null, cancellationToken)) { - Console.WriteLine($"Initial conn1: {conn1.ConnectionId}"); - Console.WriteLine($"Initial conn2: {conn2.ConnectionId}"); + Console.WriteLine($"Initial conn1: {permedConn.ConnectionId}"); + Console.WriteLine($"Initial conn2: {permlessConn.ConnectionId}"); - conn1.Reconnected += (newId) => + permedConn.Reconnected += (newId) => { Console.WriteLine($"conn1 reconnected: {newId}"); return Task.CompletedTask; }; - conn2.Reconnected += (newId) => + permlessConn.Reconnected += (newId) => { Console.WriteLine($"conn1 reconnected: {newId}"); return Task.CompletedTask; @@ -215,16 +215,17 @@ static DateTimeOffset PerformDBTruncation(DateTimeOffset original) public void ExpectShutdown() { - Assert.AreEqual(HubConnectionState.Connected, conn1.State); - Assert.AreEqual(HubConnectionState.Connected, conn2.State); + Assert.AreEqual(HubConnectionState.Connected, permedConn.State); + Assert.AreEqual(HubConnectionState.Connected, permlessConn.State); } public async ValueTask WaitForReconnect(CancellationToken cancellationToken) { - await Task.WhenAll(conn1.StopAsync(cancellationToken), conn2.StopAsync(cancellationToken)); + await permlessConn.StopAsync(cancellationToken); + await permedConn.StopAsync(cancellationToken); - Assert.AreEqual(HubConnectionState.Disconnected, conn1.State); - Assert.AreEqual(HubConnectionState.Disconnected, conn2.State); + Assert.AreEqual(HubConnectionState.Disconnected, permedConn.State); + Assert.AreEqual(HubConnectionState.Disconnected, permlessConn.State); // force token refreshs await Task.WhenAll(permedUser.Administration.Read(cancellationToken).AsTask(), permlessUser.Instances.List(null, cancellationToken).AsTask()); @@ -261,12 +262,13 @@ await ic.PermissionSets.Delete(new InstancePermissionSetRequest }, cancellationToken); })); - await Task.WhenAll(conn1.StartAsync(cancellationToken), conn2.StartAsync(cancellationToken)); + await permedConn.StartAsync(cancellationToken); + await permlessConn.StartAsync(cancellationToken); - Assert.AreEqual(HubConnectionState.Connected, conn1.State); - Assert.AreEqual(HubConnectionState.Connected, conn2.State); - Console.WriteLine($"New conn1: {conn1.ConnectionId}"); - Console.WriteLine($"New conn2: {conn2.ConnectionId}"); + Assert.AreEqual(HubConnectionState.Connected, permedConn.State); + Assert.AreEqual(HubConnectionState.Connected, permlessConn.State); + Console.WriteLine($"New conn1: {permedConn.ConnectionId}"); + Console.WriteLine($"New conn2: {permlessConn.ConnectionId}"); } public void CompleteNow() => finishTcs.TrySetResult(); From 6a051f417800488c9b6996f5904bade86817c59b Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:51:32 -0500 Subject: [PATCH 07/21] Fix `NullReferenceException` in `SessionController` --- .../Components/Session/SessionController.cs | 288 +++++++++--------- 1 file changed, 151 insertions(+), 137 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index 474b684a93e..fa908bed544 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -414,143 +414,8 @@ public ValueTask Release() } /// - public async ValueTask SendCommand(TopicParameters parameters, CancellationToken cancellationToken) - { - ArgumentNullException.ThrowIfNull(parameters); - - if (Lifetime.IsCompleted || disposed) - { - Logger.LogWarning( - "Attempted to send a command to an inactive SessionController: {commandType}", - parameters.CommandType); - return null; - } - - if (!DMApiAvailable) - { - Logger.LogTrace("Not sending topic request {commandType} to server without/with incompatible DMAPI!", parameters.CommandType); - return null; - } - - var rebootGate = RebootGate; - var launchResult = await LaunchResult.WaitAsync(cancellationToken); - if (launchResult.ExitCode.HasValue) - { - Logger.LogDebug("Not sending topic request {commandType} to server that failed to launch!", parameters.CommandType); - return null; - } - - // meh, this is kind of a hack, but it works - if (!chatTrackingContext.Active) - { - Logger.LogDebug("Not sending topic request {commandType} to server that is rebooting/starting.", parameters.CommandType); - return null; - } - - using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); - var combinedCancellationToken = cts.Token; - async ValueTask CancelIfLifetimeElapses() - { - try - { - var completed = await Task.WhenAny(Lifetime, rebootGate).WaitAsync(combinedCancellationToken); - - Logger.LogDebug( - "Server {action}, cancelling pending command: {commandType}", - completed != rebootGate - ? "process ended" - : "rebooting", - parameters.CommandType); - cts.Cancel(); - } - catch (OperationCanceledException) - { - // expected, not even worth tracing - } - catch (Exception ex) - { - Logger.LogError(ex, "Error in CancelIfLifetimeElapses!"); - } - } - - TopicResponse fullResponse = null; - var lifetimeWatchingTask = CancelIfLifetimeElapses(); - try - { - var combinedResponse = await SendTopicRequest(parameters, combinedCancellationToken); - - void LogCombinedResponse() - { - if (LogTopicRequests && combinedResponse != null) - Logger.LogTrace("Topic response: {topicString}", combinedResponse.ByondTopicResponse.StringData ?? "(NO STRING DATA)"); - } - - LogCombinedResponse(); - - if (combinedResponse?.InteropResponse?.Chunk != null) - { - Logger.LogTrace("Topic response is chunked..."); - - ChunkData nextChunk = combinedResponse.InteropResponse.Chunk; - do - { - var nextRequest = await ProcessChunk( - (completedResponse, _) => - { - fullResponse = completedResponse; - return ValueTask.FromResult(null); - }, - error => - { - Logger.LogWarning("Topic response chunking error: {message}", error); - return null; - }, - combinedResponse?.InteropResponse?.Chunk, - combinedCancellationToken); - - if (nextRequest != null) - { - nextRequest.PayloadId = nextChunk.PayloadId; - combinedResponse = await SendTopicRequest(nextRequest, combinedCancellationToken); - LogCombinedResponse(); - nextChunk = combinedResponse?.InteropResponse?.Chunk; - } - else - nextChunk = null; - } - while (nextChunk != null); - } - else - fullResponse = combinedResponse?.InteropResponse; - } - catch (OperationCanceledException ex) - { - Logger.LogDebug( - ex, - "Topic request {cancellationType}!", - combinedCancellationToken.IsCancellationRequested - ? cancellationToken.IsCancellationRequested - ? "cancelled" - : "aborted" - : "timed out"); - - // throw only if the original token was the trigger - cancellationToken.ThrowIfCancellationRequested(); - } - finally - { - cts.Cancel(); - await lifetimeWatchingTask; - } - - if (fullResponse?.ErrorMessage != null) - Logger.LogWarning( - "Errored topic response for command {commandType}: {errorMessage}", - parameters.CommandType, - fullResponse.ErrorMessage); - - return fullResponse; - } + public ValueTask SendCommand(TopicParameters parameters, CancellationToken cancellationToken) + => SendCommand(parameters, false, cancellationToken); /// public Task SetPort(ushort port, CancellationToken cancellationToken) @@ -697,6 +562,7 @@ async Task GetLaunchResult( new TopicParameters( assemblyInformationProvider.Version, ReattachInformation.RuntimeInformation.ServerPort), + true, reattachTopicCts.Token); if (reattachResponse != null) @@ -1152,5 +1018,153 @@ async ValueTask SendRawTopic(string queryString, bool pri return new CombinedTopicResponse(byondResponse, interopResponse); } + + /// + /// Sends a command to DreamDaemon through /world/Topic(). + /// + /// The to send. + /// If waiting for the should be bypassed. + /// The for the operation. + /// A resulting in the of /world/Topic(). + async ValueTask SendCommand(TopicParameters parameters, bool bypassLaunchResult, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(parameters); + + if (Lifetime.IsCompleted || disposed) + { + Logger.LogWarning( + "Attempted to send a command to an inactive SessionController: {commandType}", + parameters.CommandType); + return null; + } + + if (!DMApiAvailable) + { + Logger.LogTrace("Not sending topic request {commandType} to server without/with incompatible DMAPI!", parameters.CommandType); + return null; + } + + var rebootGate = RebootGate; + if (!bypassLaunchResult) + { + var launchResult = await LaunchResult.WaitAsync(cancellationToken); + if (launchResult.ExitCode.HasValue) + { + Logger.LogDebug("Not sending topic request {commandType} to server that failed to launch!", parameters.CommandType); + return null; + } + } + + // meh, this is kind of a hack, but it works + if (!chatTrackingContext.Active) + { + Logger.LogDebug("Not sending topic request {commandType} to server that is rebooting/starting.", parameters.CommandType); + return null; + } + + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + var combinedCancellationToken = cts.Token; + async ValueTask CancelIfLifetimeElapses() + { + try + { + var completed = await Task.WhenAny(Lifetime, rebootGate).WaitAsync(combinedCancellationToken); + + Logger.LogDebug( + "Server {action}, cancelling pending command: {commandType}", + completed != rebootGate + ? "process ended" + : "rebooting", + parameters.CommandType); + cts.Cancel(); + } + catch (OperationCanceledException) + { + // expected, not even worth tracing + } + catch (Exception ex) + { + Logger.LogError(ex, "Error in CancelIfLifetimeElapses!"); + } + } + + TopicResponse fullResponse = null; + var lifetimeWatchingTask = CancelIfLifetimeElapses(); + try + { + var combinedResponse = await SendTopicRequest(parameters, combinedCancellationToken); + + void LogCombinedResponse() + { + if (LogTopicRequests && combinedResponse != null) + Logger.LogTrace("Topic response: {topicString}", combinedResponse.ByondTopicResponse.StringData ?? "(NO STRING DATA)"); + } + + LogCombinedResponse(); + + if (combinedResponse?.InteropResponse?.Chunk != null) + { + Logger.LogTrace("Topic response is chunked..."); + + ChunkData nextChunk = combinedResponse.InteropResponse.Chunk; + do + { + var nextRequest = await ProcessChunk( + (completedResponse, _) => + { + fullResponse = completedResponse; + return ValueTask.FromResult(null); + }, + error => + { + Logger.LogWarning("Topic response chunking error: {message}", error); + return null; + }, + combinedResponse?.InteropResponse?.Chunk, + combinedCancellationToken); + + if (nextRequest != null) + { + nextRequest.PayloadId = nextChunk.PayloadId; + combinedResponse = await SendTopicRequest(nextRequest, combinedCancellationToken); + LogCombinedResponse(); + nextChunk = combinedResponse?.InteropResponse?.Chunk; + } + else + nextChunk = null; + } + while (nextChunk != null); + } + else + fullResponse = combinedResponse?.InteropResponse; + } + catch (OperationCanceledException ex) + { + Logger.LogDebug( + ex, + "Topic request {cancellationType}!", + combinedCancellationToken.IsCancellationRequested + ? cancellationToken.IsCancellationRequested + ? "cancelled" + : "aborted" + : "timed out"); + + // throw only if the original token was the trigger + cancellationToken.ThrowIfCancellationRequested(); + } + finally + { + cts.Cancel(); + await lifetimeWatchingTask; + } + + if (fullResponse?.ErrorMessage != null) + Logger.LogWarning( + "Errored topic response for command {commandType}: {errorMessage}", + parameters.CommandType, + fullResponse.ErrorMessage); + + return fullResponse; + } } } From 421733ba296d552f9e922fa6a197c8f60b23c6c7 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:52:59 -0500 Subject: [PATCH 08/21] Add another test logging exception for EFCore cancellation errors --- tests/Tgstation.Server.Tests/Live/HardFailLogger.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs b/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs index af42b145649..41bb831cc8d 100644 --- a/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs +++ b/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs @@ -27,6 +27,7 @@ public void Log(LogLevel logLevel, EventId eventId, TState state, Except && !((exception is BadHttpRequestException) && logMessage.Contains("Unexpected end of request content.")) // canceled request && !logMessage.StartsWith("Error disconnecting connection ") && !(logMessage.StartsWith("An exception occurred while iterating over the results of a query for context type") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException)) + && !(logMessage.StartsWith("An error occurred using the connection to database ") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException)) && !(logMessage.StartsWith("An exception occurred in the database while saving changes for context type") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException))) || (logLevel == LogLevel.Critical && logMessage != "DropDatabase configuration option set! Dropping any existing database...")) { From 0a874c6657fad05876cb72f26edfd687df012cd1 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 22:55:46 -0500 Subject: [PATCH 09/21] Add another error log exception to tests --- tests/Tgstation.Server.Tests/Live/HardFailLogger.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs b/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs index 41bb831cc8d..2fc1d32da01 100644 --- a/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs +++ b/tests/Tgstation.Server.Tests/Live/HardFailLogger.cs @@ -28,6 +28,7 @@ public void Log(LogLevel logLevel, EventId eventId, TState state, Except && !logMessage.StartsWith("Error disconnecting connection ") && !(logMessage.StartsWith("An exception occurred while iterating over the results of a query for context type") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException)) && !(logMessage.StartsWith("An error occurred using the connection to database ") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException)) + && !(logMessage.StartsWith("Error when dispatching 'OnConnectedAsync' on hub") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException)) && !(logMessage.StartsWith("An exception occurred in the database while saving changes for context type") && (exception is OperationCanceledException || exception?.InnerException is OperationCanceledException))) || (logLevel == LogLevel.Critical && logMessage != "DropDatabase configuration option set! Dropping any existing database...")) { From cc4b4de7d241530378ec247acaa44fee8aa22180 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 23:31:16 -0500 Subject: [PATCH 10/21] Wait for DD proccesses to exit after killing --- tests/Tgstation.Server.Tests/Live/TestLiveServer.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs index d35435d4ae4..1afece3b7ab 100644 --- a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs +++ b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs @@ -104,7 +104,10 @@ static void TerminateAllDDs() { foreach (var proc in GetDDProcessesOnPort(null)) using (proc) + { proc.Kill(); + proc.WaitForExit(); + } } static ushort FreeTcpPort(params ushort[] usedPorts) From 13749149db6cb74d6c8bf6aed1bd5fb32f9e53db Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sat, 18 Nov 2023 23:45:45 -0500 Subject: [PATCH 11/21] This seems to be a fairly reproducible topic hang, which is nice to have --- .../Live/TestLiveServer.cs | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs index 1afece3b7ab..cef619e6790 100644 --- a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs +++ b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs @@ -1475,11 +1475,24 @@ await FailFast( var chatReadTask = instanceClient.ChatBots.List(null, cancellationToken); // Check the DMAPI got the channels again https://github.com/tgstation/tgstation-server/issues/1490 - topicRequestResult = await WatchdogTest.StaticTopicClient.SendTopic( - IPAddress.Loopback, - $"tgs_integration_test_tactics7=1", - mainDDPort, - cancellationToken); + var tries = 3; + while (true) + try + { + // HEY I THINK I FOUND A WAY TO RELIABLY REPRODUCE THE TOPIC HANG + // JUST SET A BREAKPOINT IN THE CATCH + // Maybe, with enough investigation, either I can figure out the cause or get Lummox to tell me + topicRequestResult = await WatchdogTest.StaticTopicClient.SendTopic( + IPAddress.Loopback, + $"tgs_integration_test_tactics7=1", + mainDDPort, + cancellationToken); + break; + } + catch (OperationCanceledException) when (--tries > 0) + { + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + } Assert.IsNotNull(topicRequestResult); if(!Int32.TryParse(topicRequestResult.StringData, out var channelsPresent)) From 54ec0a7c98efe3a2a1a1f6ed654abb04caaf204e Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 00:06:35 -0500 Subject: [PATCH 12/21] More `Assert`s, more better --- tests/Tgstation.Server.Tests/Live/Instance/WatchdogTest.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/Tgstation.Server.Tests/Live/Instance/WatchdogTest.cs b/tests/Tgstation.Server.Tests/Live/Instance/WatchdogTest.cs index b352bd28d6b..9104a8201cd 100644 --- a/tests/Tgstation.Server.Tests/Live/Instance/WatchdogTest.cs +++ b/tests/Tgstation.Server.Tests/Live/Instance/WatchdogTest.cs @@ -497,13 +497,14 @@ async Task TestDMApiFreeDeploy(CancellationToken cancellationToken) Assert.AreEqual(false, daemonStatus.SoftRestart); Assert.AreEqual(false, daemonStatus.SoftShutdown); Assert.AreEqual(string.Empty, daemonStatus.AdditionalParameters); + Assert.AreEqual(false, daemonStatus.SoftRestart); var initialCompileJob = daemonStatus.ActiveCompileJob; await CheckDMApiFail(daemonStatus.ActiveCompileJob, cancellationToken); daemonStatus = await DeployTestDme("BasicOperation/basic_operation_test", DreamDaemonSecurity.Trusted, true, cancellationToken); Assert.AreEqual(WatchdogStatus.Online, daemonStatus.Status.Value); - + Assert.AreEqual(false, daemonStatus.SoftRestart); // dme name change triggered, instant reboot Assert.AreEqual(initialCompileJob.Id, daemonStatus.ActiveCompileJob.Id); var newerCompileJob = daemonStatus.StagedCompileJob; @@ -1063,6 +1064,7 @@ async Task RunLongRunningTestThenUpdateWithNewDme(CancellationToken cancellation Assert.IsNull(daemonStatus.StagedCompileJob); Assert.AreEqual(DMApiConstants.InteropVersion, daemonStatus.ActiveCompileJob.DMApiVersion); Assert.AreEqual(DreamDaemonSecurity.Safe, daemonStatus.ActiveCompileJob.MinimumSecurityLevel); + Assert.AreEqual(false, daemonStatus.SoftRestart); var startJob = await StartDD(cancellationToken); @@ -1072,6 +1074,7 @@ async Task RunLongRunningTestThenUpdateWithNewDme(CancellationToken cancellation Assert.AreEqual(WatchdogStatus.Online, daemonStatus.Status.Value); + Assert.AreEqual(true, daemonStatus.SoftRestart); CheckDDPriority(); Assert.AreEqual(initialCompileJob.Id, daemonStatus.ActiveCompileJob.Id); @@ -1085,6 +1088,7 @@ async Task RunLongRunningTestThenUpdateWithNewDme(CancellationToken cancellation daemonStatus = await TellWorldToReboot(cancellationToken); Assert.AreNotEqual(initialCompileJob.Id, daemonStatus.ActiveCompileJob.Id); + Assert.AreEqual(false, daemonStatus.SoftRestart); Assert.IsNull(daemonStatus.StagedCompileJob); await instanceClient.DreamDaemon.Shutdown(cancellationToken); From 2cfdf211b8b3172c92f6fc8a9bae971770486763 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 00:38:01 -0500 Subject: [PATCH 13/21] Fix `ChatManager` `NullReferenceException` --- .../Components/Chat/ChatManager.cs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs index fb1b6176259..6f85282cf35 100644 --- a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs +++ b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs @@ -843,9 +843,12 @@ Tuple GetCommand() } else { - var (helpHandler, _) = GetCommand(); - if (helpHandler != default) + var helpTuple = GetCommand(); + if (helpTuple != default) + { + var (helpHandler, _) = helpTuple; helpText = String.Format(CultureInfo.InvariantCulture, "{0}: {1}{2}", helpHandler.Name, helpHandler.HelpText, helpHandler.AdminOnly ? " - May only be used in admin channels" : String.Empty); + } else helpText = UnknownCommandMessage; } @@ -854,14 +857,16 @@ Tuple GetCommand() return; } - var (commandHandler, trackingContext) = GetCommand(); + var tuple = GetCommand(); - if (commandHandler == default) + if (tuple == default) { await TextReply(UnknownCommandMessage); return; } + var (commandHandler, trackingContext) = tuple; + if (trackingContext?.Active == false) { await TextReply("TGS: The server is rebooting, please try again later"); From 03de22e0b487045245c8dd5a94609c0280facfc2 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 08:54:58 -0500 Subject: [PATCH 14/21] Remove potential GetGuildAsync spam when mapping Discord channels --- .../Chat/Providers/DiscordProvider.cs | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Chat/Providers/DiscordProvider.cs b/src/Tgstation.Server.Host/Components/Chat/Providers/DiscordProvider.cs index de804479533..42c849b276b 100644 --- a/src/Tgstation.Server.Host/Components/Chat/Providers/DiscordProvider.cs +++ b/src/Tgstation.Server.Host/Components/Chat/Providers/DiscordProvider.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Drawing; using System.Globalization; @@ -719,6 +720,7 @@ protected override async ValueTask DisconnectImpl(CancellationToken cancellation var remapRequired = false; var guildsClient = serviceProvider.GetRequiredService(); + var guildTasks = new ConcurrentDictionary>>(); async ValueTask>> GetModelChannelFromDBChannel(Models.ChatChannel channelFromDB) { @@ -750,17 +752,28 @@ protected override async ValueTask DisconnectImpl(CancellationToken cancellation var guildId = discordChannelResponse.Entity.GuildID.Value; - var guildsResponse = await guildsClient.GetGuildAsync( + var added = false; + var guildsResponse = await guildTasks.GetOrAdd( guildId, - false, - cancellationToken); + localGuildId => + { + added = true; + return guildsClient.GetGuildAsync( + localGuildId, + false, + cancellationToken); + }); if (!guildsResponse.IsSuccess) { - Logger.LogWarning( - "Error retrieving discord guild {guildID}: {result}", - guildId, - guildsResponse.LogFormat()); - remapRequired |= true; + if (added) + { + Logger.LogWarning( + "Error retrieving discord guild {guildID}: {result}", + guildId, + guildsResponse.LogFormat()); + remapRequired |= true; + } + return null; } From 7f3888dee83b2901452cb535b3024c435e3995ad Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 08:56:59 -0500 Subject: [PATCH 15/21] Add live tests helper for UseBasicWatchdog --- tests/Tgstation.Server.Tests/Live/TestLiveServer.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs index cef619e6790..60d455c77c9 100644 --- a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs +++ b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs @@ -1235,6 +1235,9 @@ async Task TestTgsInternal(CancellationToken hardCancellationToken) // uncomment to force this test to run with DummyChatProviders // missingChatVarsCount = TotalChatVars; + // uncomment to force this test to run with pasic watchdog + // Environment.SetEnvironmentVariable("General__UseBasicWatchdog", "true"); + if (missingChatVarsCount != 0) { if (missingChatVarsCount != TotalChatVars) From ee8aab06da25223c340e719045165c733e815f96 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 09:20:32 -0500 Subject: [PATCH 16/21] Fix race condition in `ChatManager.ChangeChannels` --- .../Components/Chat/ChatManager.cs | 203 ++++++++++-------- 1 file changed, 115 insertions(+), 88 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs index 6f85282cf35..680318d0877 100644 --- a/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs +++ b/src/Tgstation.Server.Host/Components/Chat/ChatManager.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Globalization; using System.Linq; @@ -59,33 +60,38 @@ sealed class ChatManager : IChatManager, IRestartHandler /// /// Unchanging s in the mapped by . /// - readonly IDictionary builtinCommands; + readonly Dictionary builtinCommands; /// /// Map of s in use, keyed by . /// - readonly IDictionary providers; + readonly Dictionary providers; /// - /// Map of s to s. + /// Map of s used to guard concurrent access to , keyed by . /// - readonly IDictionary mappedChannels; + readonly ConcurrentDictionary changeChannelSemaphores; /// - /// The active s for the . + /// Map of s to s. /// - readonly IList trackingContexts; + readonly Dictionary mappedChannels; /// - /// The for . + /// The active s for the . /// - readonly CancellationTokenSource handlerCts; + readonly List trackingContexts; /// /// The active for the . /// readonly List activeChatBots; + /// + /// The for . + /// + readonly CancellationTokenSource handlerCts; + /// /// Used for various lock statements throughout this . /// @@ -156,6 +162,7 @@ public ChatManager( builtinCommands = new Dictionary(StringComparer.OrdinalIgnoreCase); providers = new Dictionary(); + changeChannelSemaphores = new ConcurrentDictionary(); mappedChannels = new Dictionary(); trackingContexts = new List(); handlerCts = new CancellationTokenSource(); @@ -174,6 +181,9 @@ public async ValueTask DisposeAsync() foreach (var providerKvp in providers) await providerKvp.Value.DisposeAsync(); + foreach (var providerKvp in changeChannelSemaphores) + providerKvp.Value.Dispose(); + await messageSendTask; } @@ -183,80 +193,88 @@ public async ValueTask ChangeChannels(long connectionId, IEnumerable { - logger.LogDebug("Cannot map channels, provider {providerId} disconnected!", connectionId); - return; - } - - var results = await provider.MapChannels(newChannels, cancellationToken); - try + logger.LogTrace("Creating ChangeChannels semaphore for connection ID {connectionId}...", connectionId); + return new SemaphoreSlim(1); + }); + using (await SemaphoreSlimContext.Lock(semaphore, cancellationToken)) { - lock (activeChatBots) - { - var botToUpdate = activeChatBots.FirstOrDefault(bot => bot.Id == connectionId); - if (botToUpdate != null) - botToUpdate.Channels = newChannels - .Select(apiModel => new Models.ChatChannel - { - DiscordChannelId = apiModel.DiscordChannelId, - IrcChannel = apiModel.IrcChannel, - IsAdminChannel = apiModel.IsAdminChannel, - IsUpdatesChannel = apiModel.IsUpdatesChannel, - IsSystemChannel = apiModel.IsSystemChannel, - IsWatchdogChannel = apiModel.IsWatchdogChannel, - Tag = apiModel.Tag, - }) - .ToList(); - } + var provider = await RemoveProviderChannels(connectionId, false, cancellationToken); + if (provider == null) + return; - var newMappings = results.SelectMany( - kvp => kvp.Value.Select( - channelRepresentation => new ChannelMapping - { - IsWatchdogChannel = kvp.Key.IsWatchdogChannel == true, - IsUpdatesChannel = kvp.Key.IsUpdatesChannel == true, - IsAdminChannel = kvp.Key.IsAdminChannel == true, - IsSystemChannel = kvp.Key.IsSystemChannel == true, - ProviderChannelId = channelRepresentation.RealId, - ProviderId = connectionId, - Channel = channelRepresentation, - })); - - ulong baseId; - lock (synchronizationLock) + if (!provider.Connected) { - baseId = channelIdCounter; - channelIdCounter += (ulong)results.Count; + logger.LogDebug("Cannot map channels, provider {providerId} disconnected!", connectionId); + return; } - lock (mappedChannels) + var results = await provider.MapChannels(newChannels, cancellationToken); + try { - lock (providers) - if (!providers.TryGetValue(connectionId, out IProvider verify) || verify != provider) // aborted again - return; - foreach (var newMapping in newMappings) + lock (activeChatBots) { - var newId = baseId++; - logger.LogTrace("Mapping channel {connectionName}:{channelFriendlyName} as {newId}", newMapping.Channel.ConnectionName, newMapping.Channel.FriendlyName, newId); - mappedChannels.Add(newId, newMapping); - newMapping.Channel.RealId = newId; + var botToUpdate = activeChatBots.FirstOrDefault(bot => bot.Id == connectionId); + if (botToUpdate != null) + botToUpdate.Channels = newChannels + .Select(apiModel => new Models.ChatChannel + { + DiscordChannelId = apiModel.DiscordChannelId, + IrcChannel = apiModel.IrcChannel, + IsAdminChannel = apiModel.IsAdminChannel, + IsUpdatesChannel = apiModel.IsUpdatesChannel, + IsSystemChannel = apiModel.IsSystemChannel, + IsWatchdogChannel = apiModel.IsWatchdogChannel, + Tag = apiModel.Tag, + }) + .ToList(); } - } - // we only want to update contexts if everything at startup has connected once already - // otherwise we could send an incomplete channel set to the DMAPI, which will then spout all its queued messages into it instead of all relevant chatbots - // The watchdog can call this if it needs to after starting up - if (initialProviderConnectionsTask.IsCompleted) - await UpdateTrackingContexts(cancellationToken); - } - finally - { - provider.InitialMappingComplete(); + var newMappings = results.SelectMany( + kvp => kvp.Value.Select( + channelRepresentation => new ChannelMapping + { + IsWatchdogChannel = kvp.Key.IsWatchdogChannel == true, + IsUpdatesChannel = kvp.Key.IsUpdatesChannel == true, + IsAdminChannel = kvp.Key.IsAdminChannel == true, + IsSystemChannel = kvp.Key.IsSystemChannel == true, + ProviderChannelId = channelRepresentation.RealId, + ProviderId = connectionId, + Channel = channelRepresentation, + })); + + ulong baseId; + lock (synchronizationLock) + { + baseId = channelIdCounter; + channelIdCounter += (ulong)results.Count; + } + + lock (mappedChannels) + { + lock (providers) + if (!providers.TryGetValue(connectionId, out IProvider verify) || verify != provider) // aborted again + return; + foreach (var newMapping in newMappings) + { + var newId = baseId++; + logger.LogTrace("Mapping channel {connectionName}:{channelFriendlyName} as {newId}", newMapping.Channel.ConnectionName, newMapping.Channel.FriendlyName, newId); + mappedChannels.Add(newId, newMapping); + newMapping.Channel.RealId = newId; + } + } + + // we only want to update contexts if everything at startup has connected once already + // otherwise we could send an incomplete channel set to the DMAPI, which will then spout all its queued messages into it instead of all relevant chatbots + // The watchdog can call this if it needs to after starting up + if (initialProviderConnectionsTask.IsCompleted) + await UpdateTrackingContexts(cancellationToken); + } + finally + { + provider.InitialMappingComplete(); + } } } @@ -538,26 +556,35 @@ public void RegisterCommandHandler(ICustomCommandHandler customCommandHandler) public async Task DeleteConnection(long connectionId, CancellationToken cancellationToken) { logger.LogTrace("DeleteConnection {connectionId}", connectionId); - var provider = await RemoveProviderChannels(connectionId, true, cancellationToken); - if (provider != null) + var hasSemaphore = changeChannelSemaphores.TryRemove(connectionId, out var semaphore); + using (hasSemaphore + ? semaphore + : null) + using (hasSemaphore + ? await SemaphoreSlimContext.Lock(semaphore, cancellationToken) + : null) { - var startTime = DateTimeOffset.UtcNow; - try + var provider = await RemoveProviderChannels(connectionId, true, cancellationToken); + if (provider != null) { - await provider.Disconnect(cancellationToken); - } - catch (Exception ex) - { - logger.LogError(ex, "Error disconnecting connection {connectionId}!", connectionId); - } + var startTime = DateTimeOffset.UtcNow; + try + { + await provider.Disconnect(cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "Error disconnecting connection {connectionId}!", connectionId); + } - await provider.DisposeAsync(); - var duration = DateTimeOffset.UtcNow - startTime; - if (duration.TotalSeconds > 3) - logger.LogWarning("Disconnecting a {providerType} took {totalSeconds}s!", provider.GetType().Name, duration.TotalSeconds); + await provider.DisposeAsync(); + var duration = DateTimeOffset.UtcNow - startTime; + if (duration.TotalSeconds > 3) + logger.LogWarning("Disconnecting a {providerType} took {totalSeconds}s!", provider.GetType().Name, duration.TotalSeconds); + } + else + logger.LogTrace("DeleteConnection: ID {connectionId} doesn't exist!", connectionId); } - else - logger.LogTrace("DeleteConnection: ID {connectionId} doesn't exist!", connectionId); } /// From c649af9e481bdc07b3f16ca75d1cc76ef916db67 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 09:20:50 -0500 Subject: [PATCH 17/21] Version bump to 5.18.2 --- build/Version.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Version.props b/build/Version.props index 28f2e724a5e..a93190cea38 100644 --- a/build/Version.props +++ b/build/Version.props @@ -3,7 +3,7 @@ - 5.18.1 + 5.18.2 4.7.1 9.14.0 7.0.0 From 669cc83992d69515d7d8268e2a9dccc1d7540856 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 10:23:24 -0500 Subject: [PATCH 18/21] Fix using the wrong task when awaiting reboots for topic requests --- .../Components/Session/SessionController.cs | 6 ++--- .../Live/TestLiveServer.cs | 23 ++++--------------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Session/SessionController.cs b/src/Tgstation.Server.Host/Components/Session/SessionController.cs index fa908bed544..d57db476104 100644 --- a/src/Tgstation.Server.Host/Components/Session/SessionController.cs +++ b/src/Tgstation.Server.Host/Components/Session/SessionController.cs @@ -1044,7 +1044,7 @@ async ValueTask SendCommand(TopicParameters parameters, bool bypa return null; } - var rebootGate = RebootGate; + var reboot = OnReboot; if (!bypassLaunchResult) { var launchResult = await LaunchResult.WaitAsync(cancellationToken); @@ -1068,11 +1068,11 @@ async ValueTask CancelIfLifetimeElapses() { try { - var completed = await Task.WhenAny(Lifetime, rebootGate).WaitAsync(combinedCancellationToken); + var completed = await Task.WhenAny(Lifetime, reboot).WaitAsync(combinedCancellationToken); Logger.LogDebug( "Server {action}, cancelling pending command: {commandType}", - completed != rebootGate + completed != reboot ? "process ended" : "rebooting", parameters.CommandType); diff --git a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs index 60d455c77c9..e24fda6f2dd 100644 --- a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs +++ b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs @@ -1478,24 +1478,11 @@ await FailFast( var chatReadTask = instanceClient.ChatBots.List(null, cancellationToken); // Check the DMAPI got the channels again https://github.com/tgstation/tgstation-server/issues/1490 - var tries = 3; - while (true) - try - { - // HEY I THINK I FOUND A WAY TO RELIABLY REPRODUCE THE TOPIC HANG - // JUST SET A BREAKPOINT IN THE CATCH - // Maybe, with enough investigation, either I can figure out the cause or get Lummox to tell me - topicRequestResult = await WatchdogTest.StaticTopicClient.SendTopic( - IPAddress.Loopback, - $"tgs_integration_test_tactics7=1", - mainDDPort, - cancellationToken); - break; - } - catch (OperationCanceledException) when (--tries > 0) - { - await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); - } + topicRequestResult = await WatchdogTest.StaticTopicClient.SendTopic( + IPAddress.Loopback, + $"tgs_integration_test_tactics7=1", + mainDDPort, + cancellationToken); Assert.IsNotNull(topicRequestResult); if(!Int32.TryParse(topicRequestResult.StringData, out var channelsPresent)) From a8c2d49608515e7b18461871a9bc710914bdcbcf Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 10:24:09 -0500 Subject: [PATCH 19/21] Use a float here cause why not? --- tests/DMAPI/LongRunning/Test.dm | 2 +- tests/Tgstation.Server.Tests/Live/TestLiveServer.cs | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/DMAPI/LongRunning/Test.dm b/tests/DMAPI/LongRunning/Test.dm index 57b23805bc9..52e8f73c8cf 100644 --- a/tests/DMAPI/LongRunning/Test.dm +++ b/tests/DMAPI/LongRunning/Test.dm @@ -168,7 +168,7 @@ var/run_bridge_test var/tactics7 = data["tgs_integration_test_tactics7"] if(tactics7) var/list/channels = TgsChatChannelInfo() - return "[length(channels)]" + return length(channels) var/tactics8 = data["tgs_integration_test_tactics8"] if(tactics8) diff --git a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs index e24fda6f2dd..696512cb2ae 100644 --- a/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs +++ b/tests/Tgstation.Server.Tests/Live/TestLiveServer.cs @@ -1485,15 +1485,12 @@ await FailFast( cancellationToken); Assert.IsNotNull(topicRequestResult); - if(!Int32.TryParse(topicRequestResult.StringData, out var channelsPresent)) - { - Assert.Fail("Expected DD to send us an int!"); - } + Assert.IsTrue(topicRequestResult.FloatData.HasValue); var currentChatBots = await chatReadTask; var connectedChannelCount = currentChatBots.Where(x => x.Enabled.Value).SelectMany(x => x.Channels).Count(); - Assert.AreEqual(connectedChannelCount, channelsPresent); + Assert.AreEqual(connectedChannelCount, topicRequestResult.FloatData.Value); await WatchdogTest.TellWorldToReboot2(instanceClient, WatchdogTest.StaticTopicClient, mainDDPort, cancellationToken); From 70edd0a01f2cfc0fecac50a5eaaa43d3b7343a50 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 12:10:36 -0500 Subject: [PATCH 20/21] Fix downloading configuration files locking at the transfer level --- .../Components/StaticFiles/Configuration.cs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/StaticFiles/Configuration.cs b/src/Tgstation.Server.Host/Components/StaticFiles/Configuration.cs index 0a9a356e771..a9406e0e8d8 100644 --- a/src/Tgstation.Server.Host/Components/StaticFiles/Configuration.cs +++ b/src/Tgstation.Server.Host/Components/StaticFiles/Configuration.cs @@ -326,16 +326,10 @@ void GetFileStream() result = ioManager.GetFileStream(path, false); } - using (SemaphoreSlimContext.TryLock(semaphore, out var locked)) - { - if (!locked) - return null; - - if (systemIdentity == null) - await Task.Factory.StartNew(GetFileStream, cancellationToken, DefaultIOManager.BlockingTaskCreationOptions, TaskScheduler.Current); - else - await systemIdentity.RunImpersonated(GetFileStream, cancellationToken); - } + if (systemIdentity == null) + await Task.Factory.StartNew(GetFileStream, cancellationToken, DefaultIOManager.BlockingTaskCreationOptions, TaskScheduler.Current); + else + await systemIdentity.RunImpersonated(GetFileStream, cancellationToken); return result; }, From f4a4e0868faf6f608b08c61de533b25c71154332 Mon Sep 17 00:00:00 2001 From: Jordan Dominion Date: Sun, 19 Nov 2023 17:15:37 -0500 Subject: [PATCH 21/21] Correct overzealous watchdog restarts --- .../Components/Watchdog/BasicWatchdog.cs | 6 +----- src/Tgstation.Server.Host/Components/Watchdog/IWatchdog.cs | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs b/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs index ee110646fad..7483760afb2 100644 --- a/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs +++ b/src/Tgstation.Server.Host/Components/Watchdog/BasicWatchdog.cs @@ -289,11 +289,7 @@ protected virtual ValueTask SessionStartupPersist(CancellationToken cancellation /// The for the operation. /// A resulting in the to take. protected virtual ValueTask HandleNormalReboot(CancellationToken cancellationToken) - { - var settingsUpdatePending = ActiveLaunchParameters != LastLaunchParameters; - var result = settingsUpdatePending ? MonitorAction.Restart : MonitorAction.Continue; - return ValueTask.FromResult(result); - } + => ValueTask.FromResult(MonitorAction.Continue); /// /// Handler for . diff --git a/src/Tgstation.Server.Host/Components/Watchdog/IWatchdog.cs b/src/Tgstation.Server.Host/Components/Watchdog/IWatchdog.cs index 2d8d0f5d240..39600383f3a 100644 --- a/src/Tgstation.Server.Host/Components/Watchdog/IWatchdog.cs +++ b/src/Tgstation.Server.Host/Components/Watchdog/IWatchdog.cs @@ -37,6 +37,7 @@ public interface IWatchdog : IComponentService, IAsyncDisposable, IEventConsumer /// /// The the active server is using. /// + /// This may not be the exact same as but still be associated with the same session. DreamDaemonLaunchParameters LastLaunchParameters { get; } ///