From a26e03e614f71a514bdb301df84ea3b71ba4161f Mon Sep 17 00:00:00 2001 From: Matt Toohey Date: Thu, 6 Jun 2024 09:58:21 +1000 Subject: [PATCH] fix: handle async call not being able to be called (#1663) Fixes case where async call's early failure skipped clean up and retries. Issue steps: - Deploy a module - Deployment and related rows get added to db, but no runner has been assigned yet - Add an async call that calls that module - A controller picks up the async call and attempts it by calling `callWithRequest(...)` - This gets the schema by calling `getActiveSchema` which gets the schema filtering by `minReplicas > 0 and r.assigned = 'assigned'` [(link)](https://github.com/TBD54566975/ftl/blob/matt2e%2Fasync-call-dead-end/backend/controller/sql/queries.sql#L150) - `callWithRequest(...)` fails because this new module is not in the fetched schema - `executeAsyncCalls(...)` return early due to this error, not updating the state of the async call - the lease gets released rather than timing out (due to the defer in `executeAsyncCalls(...)`) - we end up with an async call with state = `executing` and no lease So this async call gets stuck forever. For pubsub this means the subscription stops forever. --- backend/controller/controller.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/controller/controller.go b/backend/controller/controller.go index d2c503c6f8..14f5ec7c5f 100644 --- a/backend/controller/controller.go +++ b/backend/controller/controller.go @@ -1219,19 +1219,21 @@ func (s *Service) executeAsyncCalls(ctx context.Context) (time.Duration, error) Body: call.Request, } resp, err := s.callWithRequest(ctx, connect.NewRequest(req), optional.None[model.RequestKey](), s.config.Advertise.String()) - if err != nil { - return 0, fmt.Errorf("async call failed: %w", err) - } var callResult either.Either[[]byte, string] - if perr := resp.Msg.GetError(); perr != nil { + failed := false + if err != nil { + logger.Warnf("Async call could not be called: %v", err) + callResult = either.RightOf[[]byte](err.Error()) + failed = true + } else if perr := resp.Msg.GetError(); perr != nil { logger.Warnf("Async call failed: %s", perr.Message) callResult = either.RightOf[[]byte](perr.Message) + failed = true } else { logger.Debugf("Async call succeeded") callResult = either.LeftOf[string](resp.Msg.GetBody()) } err = s.dal.CompleteAsyncCall(ctx, call, callResult, func(tx *dal.Tx) error { - failed := resp.Msg.GetError() != nil if failed && call.RemainingAttempts > 0 { // Will retry, do not propagate failure yet. return nil