From aed9777ab2694ccdd0e1ab2d5196fe1615f3ca2f Mon Sep 17 00:00:00 2001 From: Safeer Jiwan Date: Fri, 16 Aug 2024 15:03:24 -0700 Subject: [PATCH] feat: cron using async --- Justfile | 2 +- backend/controller/controller.go | 17 +- backend/controller/cronjobs/cronjobs.go | 474 +-- .../cronjobs/cronjobs_integration_test.go | 65 - backend/controller/cronjobs/cronjobs_test.go | 128 - .../cronjobs/cronjobs_utils_test.go | 263 -- backend/controller/cronjobs/dal.go | 108 + backend/controller/cronjobs/dal/dal.go | 101 - backend/controller/cronjobs/sql/conn.go | 73 + backend/controller/cronjobs/sql/models.go | 3 +- backend/controller/cronjobs/sql/querier.go | 117 +- backend/controller/cronjobs/sql/queries.sql | 77 +- .../controller/cronjobs/sql/queries.sql.go | 2745 ++++++++++++++++- backend/controller/cronjobs/state.go | 83 - backend/controller/dal/async_calls.go | 15 +- backend/controller/sql/models.go | 3 +- backend/controller/sql/querier.go | 8 +- backend/controller/sql/queries.sql | 8 +- backend/controller/sql/queries.sql.go | 332 +- ...0240815164808_async_calls_cron_job_key.sql | 22 + common/configuration/sql/models.go | 3 +- internal/model/cron_job.go | 10 +- sqlc.yaml | 126 +- 23 files changed, 3359 insertions(+), 1424 deletions(-) delete mode 100644 backend/controller/cronjobs/cronjobs_integration_test.go delete mode 100644 backend/controller/cronjobs/cronjobs_test.go delete mode 100644 backend/controller/cronjobs/cronjobs_utils_test.go create mode 100644 backend/controller/cronjobs/dal.go delete mode 100644 backend/controller/cronjobs/dal/dal.go delete mode 100644 backend/controller/cronjobs/state.go create mode 100644 backend/controller/sql/schema/20240815164808_async_calls_cron_job_key.sql diff --git a/Justfile b/Justfile index 5e9df9ca4f..6b3134d91a 100644 --- a/Justfile +++ b/Justfile @@ -78,7 +78,7 @@ init-db: # Regenerate SQLC code (requires init-db to be run first) build-sqlc: - @mk backend/controller/sql/{db.go,models.go,querier.go,queries.sql.go} backend/controller/{cronjobs}/sql/{db.go,models.go,querier.go,queries.sql.go} common/configuration/sql/{db.go,models.go,querier.go,queries.sql.go} : backend/controller/sql/queries.sql backend/controller/{cronjobs}/sql/queries.sql common/configuration/sql/queries.sql backend/controller/sql/schema sqlc.yaml -- "just init-db && sqlc generate" + @mk backend/controller/sql/{db.go,models.go,querier.go,queries.sql.go} backend/controller/cronjobs/sql/{db.go,models.go,querier.go,queries.sql.go} common/configuration/sql/{db.go,models.go,querier.go,queries.sql.go} : backend/controller/sql/queries.sql backend/controller/cronjobs/sql/queries.sql common/configuration/sql/queries.sql backend/controller/sql/schema sqlc.yaml -- "just init-db && sqlc generate" # Build the ZIP files that are embedded in the FTL release binaries build-zips: build-kt-runtime diff --git a/backend/controller/controller.go b/backend/controller/controller.go index 84177fc65c..b19bb610ba 100644 --- a/backend/controller/controller.go +++ b/backend/controller/controller.go @@ -248,9 +248,8 @@ func New(ctx context.Context, conn *sql.DB, config Config, runnerScaling scaling svc.routes.Store(map[string][]dal.Route{}) svc.schema.Store(&schema.Schema{}) - cronSvc := cronjobs.New(ctx, key, svc.config.Advertise.Host, cronjobs.Config{Timeout: config.CronJobTimeout}, conn, svc.tasks, svc.callWithRequest) + cronSvc := cronjobs.New(ctx, key, svc.config.Advertise.Host, conn) svc.cronJobs = cronSvc - svc.controllerListListeners = append(svc.controllerListListeners, cronSvc) pubSub := pubsub.New(ctx, db, svc.tasks, svc) svc.pubSub = pubSub @@ -540,7 +539,7 @@ func (s *Service) ReplaceDeploy(ctx context.Context, c *connect.Request[ftlv1.Re } } - s.cronJobs.CreatedOrReplacedDeloyment(ctx, newDeploymentKey) + s.cronJobs.CreatedOrReplacedDeloyment(ctx) return connect.NewResponse(&ftlv1.ReplaceDeployResponse{}), nil } @@ -1554,6 +1553,9 @@ func (s *Service) catchAsyncCall(ctx context.Context, logger *log.Logger, call * func metadataForAsyncCall(call *dal.AsyncCall) *ftlv1.Metadata { switch origin := call.Origin.(type) { + case dal.AsyncOriginCron: + return &ftlv1.Metadata{} + case dal.AsyncOriginFSM: return &ftlv1.Metadata{ Values: []*ftlv1.Metadata_Pair{ @@ -1581,6 +1583,15 @@ func (s *Service) finaliseAsyncCall(ctx context.Context, tx *dal.Tx, call *dal.A // Allow for handling of completion based on origin switch origin := call.Origin.(type) { + case dal.AsyncOriginCron: + cjk, err := model.ParseCronJobKey(origin.CronJobKey) + if err != nil { + return fmt.Errorf("failed to parse cron job key: %w", err) + } + if err := s.cronJobs.OnJobCompletion(ctx, cjk, failed); err != nil { + return fmt.Errorf("failed to finalize cron async call: %w", err) + } + case dal.AsyncOriginFSM: if err := s.onAsyncFSMCallCompletion(ctx, tx, origin, failed, isFinalResult); err != nil { return fmt.Errorf("failed to finalize FSM async call: %w", err) diff --git a/backend/controller/cronjobs/cronjobs.go b/backend/controller/cronjobs/cronjobs.go index c892c7000f..d6b51cb074 100644 --- a/backend/controller/cronjobs/cronjobs.go +++ b/backend/controller/cronjobs/cronjobs.go @@ -3,126 +3,44 @@ package cronjobs import ( "context" "database/sql" - "encoding/json" "errors" "fmt" "time" - "connectrpc.com/connect" - "github.com/alecthomas/atomic" "github.com/alecthomas/types/optional" - "github.com/alecthomas/types/pubsub" "github.com/benbjohnson/clock" - "github.com/jpillora/backoff" - "github.com/serialx/hashring" - "github.com/TBD54566975/ftl/backend/controller/cronjobs/dal" - parentdal "github.com/TBD54566975/ftl/backend/controller/dal" - "github.com/TBD54566975/ftl/backend/controller/observability" - "github.com/TBD54566975/ftl/backend/controller/scheduledtask" - ftlv1 "github.com/TBD54566975/ftl/backend/protos/xyz/block/ftl/v1" + cronsql "github.com/TBD54566975/ftl/backend/controller/cronjobs/sql" schemapb "github.com/TBD54566975/ftl/backend/protos/xyz/block/ftl/v1/schema" "github.com/TBD54566975/ftl/backend/schema" "github.com/TBD54566975/ftl/internal/cron" "github.com/TBD54566975/ftl/internal/log" "github.com/TBD54566975/ftl/internal/model" - "github.com/TBD54566975/ftl/internal/slices" ) -const ( - controllersPerJob = 2 - jobResetInterval = time.Minute - newJobHashRingOverrideInterval = time.Minute + time.Second*20 -) - -type Config struct { - Timeout time.Duration -} - -//sumtype:decl -type event interface { - // cronJobEvent is a marker to ensure that all events implement the interface. - cronJobEvent() -} - -type syncEvent struct { - jobs []model.CronJob - addedDeploymentKey optional.Option[model.DeploymentKey] -} - -func (syncEvent) cronJobEvent() {} - -type endedJobsEvent struct { - jobs []model.CronJob -} - -func (endedJobsEvent) cronJobEvent() {} - -type updatedHashRingEvent struct{} - -func (updatedHashRingEvent) cronJobEvent() {} - -type hashRingState struct { - hashRing *hashring.HashRing - controllers []parentdal.Controller - idx int -} - -type DAL interface { - GetCronJobs(ctx context.Context) ([]model.CronJob, error) - StartCronJobs(ctx context.Context, jobs []model.CronJob) (attemptedJobs []dal.AttemptedCronJob, err error) - EndCronJob(ctx context.Context, job model.CronJob, next time.Time) (model.CronJob, error) - GetStaleCronJobs(ctx context.Context, duration time.Duration) ([]model.CronJob, error) -} - -type Scheduler interface { - Singleton(retry backoff.Backoff, job scheduledtask.Job) - Parallel(retry backoff.Backoff, job scheduledtask.Job) -} - -type ExecuteCallFunc func(context.Context, *connect.Request[ftlv1.CallRequest], optional.Option[model.RequestKey], optional.Option[model.RequestKey], string) (*connect.Response[ftlv1.CallResponse], error) - type Service struct { - config Config key model.ControllerKey requestSource string - - dal DAL - scheduler Scheduler - call ExecuteCallFunc - - clock clock.Clock - events *pubsub.Topic[event] - - hashRingState atomic.Value[*hashRingState] + dal DAL + clock clock.Clock } -func New(ctx context.Context, key model.ControllerKey, requestSource string, config Config, conn *sql.DB, scheduler Scheduler, call ExecuteCallFunc) *Service { - return NewForTesting(ctx, key, requestSource, config, dal.New(conn), scheduler, call, clock.New()) +func New(ctx context.Context, key model.ControllerKey, requestSource string, conn *sql.DB) *Service { + return NewForTesting(ctx, key, requestSource, *newDAL(conn), clock.New()) } -func NewForTesting(ctx context.Context, key model.ControllerKey, requestSource string, config Config, dal DAL, scheduler Scheduler, call ExecuteCallFunc, clock clock.Clock) *Service { +func NewForTesting(ctx context.Context, key model.ControllerKey, requestSource string, dal DAL, clock clock.Clock) *Service { svc := &Service{ - config: config, key: key, requestSource: requestSource, dal: dal, - scheduler: scheduler, - call: call, clock: clock, - events: pubsub.New[event](), } - svc.UpdatedControllerList(ctx, nil) - - svc.scheduler.Parallel(backoff.Backoff{Min: time.Second, Max: jobResetInterval}, svc.syncJobs) - svc.scheduler.Singleton(backoff.Backoff{Min: time.Second, Max: time.Minute}, svc.killOldJobs) - - go svc.watchForUpdates(ctx) - return svc } func (s *Service) NewCronJobsForModule(ctx context.Context, module *schemapb.Module) ([]model.CronJob, error) { + logger := log.FromContext(ctx).Scope("cron") start := s.clock.Now().UTC() newJobs := []model.CronJob{} merr := []error{} @@ -153,344 +71,132 @@ func (s *Service) NewCronJobsForModule(ctx context.Context, module *schemapb.Mod Schedule: cronStr, StartTime: start, NextExecution: next, - State: model.CronJobStateIdle, // DeploymentKey: Filled in by DAL }) } } + logger.Tracef("Found %d cron jobs", len(newJobs)) if len(merr) > 0 { return nil, errors.Join(merr...) } return newJobs, nil } -// CreatedOrReplacedDeloyment is only called by the responsible controller to its cron service, and will not be received by the other cron services. -// When a controller creates/replaces a deployment, its cron job service is responsible for -// the newly created cron jobs until other controllers have a chance to resync their list of jobs and start sharing responsibility of the new cron jobs. -func (s *Service) CreatedOrReplacedDeloyment(ctx context.Context, newDeploymentKey model.DeploymentKey) { - // Rather than finding old/new cron jobs and updating our state, we can just resync the list of jobs - _ = s.syncJobsWithNewDeploymentKey(ctx, optional.Some(newDeploymentKey)) //nolint:errcheck // TODO(matt2e) is this valid? -} - -// SyncJobs is run periodically via a scheduled task -func (s *Service) syncJobs(ctx context.Context) (time.Duration, error) { - err := s.syncJobsWithNewDeploymentKey(ctx, optional.None[model.DeploymentKey]()) - if err != nil { - return 0, err - } - return jobResetInterval, nil -} - -// syncJobsWithNewDeploymentKey resyncs the list of jobs and marks the deployment key as added so that it can overrule the hash ring for a short time. -func (s *Service) syncJobsWithNewDeploymentKey(ctx context.Context, deploymentKey optional.Option[model.DeploymentKey]) error { - logger := log.FromContext(ctx) - - jobs, err := s.dal.GetCronJobs(ctx) +// CreatedOrReplacedDeloyment is called by the responsible controller to its cron service, we can +// schedule all cron jobs here since the cron_jobs rows are locked within the transaction and the +// controllers won't step on each other. +func (s *Service) CreatedOrReplacedDeloyment(ctx context.Context) { + logger := log.FromContext(ctx).Scope("cron") + logger.Tracef("New deployment; scheduling cron jobs") + _, err := s.scheduleCronJobs(ctx) if err != nil { - logger.Errorf(err, "failed to get cron jobs") - return fmt.Errorf("failed to get cron jobs: %w", err) + logger.Errorf(err, "failed to schedule cron jobs: %v", err) } - s.events.Publish(syncEvent{ - jobs: jobs, - addedDeploymentKey: deploymentKey, - }) - return nil } -func (s *Service) executeJob(ctx context.Context, job model.CronJob) { - logger := log.FromContext(ctx) - requestBody := map[string]any{} - requestJSON, err := json.Marshal(requestBody) - if err != nil { - logger.Errorf(err, "could not build body for cron job: %v", job.Key) - observability.Cron.JobFailedStart(ctx, job) - return - } - - req := connect.NewRequest(&ftlv1.CallRequest{ - Verb: &schemapb.Ref{Module: job.Verb.Module, Name: job.Verb.Name}, - Body: requestJSON, - }) - - requestKey := model.NewRequestKey(model.OriginCron, fmt.Sprintf("%s-%s", job.Verb.Module, job.Verb.Name)) - - callCtx, cancel := context.WithTimeout(ctx, s.config.Timeout) - defer cancel() - observability.Cron.JobStarted(ctx, job) - _, err = s.call(callCtx, req, optional.Some(requestKey), optional.None[model.RequestKey](), s.requestSource) - - // Record execution success/failure metric now and leave post job-execution-action observability to logging - if err != nil { - logger.Errorf(err, "failed to execute cron job %v", job.Key) - observability.Cron.JobFailed(ctx, job) - // Do not return, continue to end the job and schedule the next execution - } else { - observability.Cron.JobSuccess(ctx, job) - } - - schedule, err := cron.Parse(job.Schedule) - if err != nil { - logger.Errorf(err, "failed to parse cron schedule %q", job.Schedule) - return - } - next, err := cron.NextAfter(schedule, s.clock.Now().UTC(), false) - if err != nil { - logger.Errorf(err, "failed to calculate next execution for cron job %v with schedule %q", job.Key, job.Schedule) - return - } +// scheduleCronJobs schedules all cron jobs that are not already scheduled. +func (s *Service) scheduleCronJobs(ctx context.Context) (dur time.Duration, err error) { + logger := log.FromContext(ctx).Scope("cron") + now := s.clock.Now().UTC() + next := now.Add(time.Hour) - updatedJob, err := s.dal.EndCronJob(ctx, job, next) + tx, err := s.dal.Begin(ctx) if err != nil { - logger.Errorf(err, "failed to end cron job %v", job.Key) - } else { - s.events.Publish(endedJobsEvent{ - jobs: []model.CronJob{updatedJob}, - }) + return 0, fmt.Errorf("failed to begin transaction: %w", err) } -} + defer tx.CommitOrRollback(ctx, &err) -// killOldJobs looks for jobs that have been executing for too long. -// A soft timeout should normally occur from the job's context timing out, but there are cases where this does not happen (eg: unresponsive or dead controller) -// In these cases we need a hard timout after an additional grace period. -// To do this, this function resets these job's state to idle and updates the next execution time in the db so the job can be picked up again next time. -func (s *Service) killOldJobs(ctx context.Context) (time.Duration, error) { - logger := log.FromContext(ctx) - staleJobs, err := s.dal.GetStaleCronJobs(ctx, s.config.Timeout+time.Minute) + jobs, err := tx.GetUnscheduledCronJobs(ctx, now) if err != nil { - return 0, err - } else if len(staleJobs) == 0 { - return time.Minute, nil + return 0, fmt.Errorf("failed to get unscheduled cron jobs: %w", err) } - - updatedJobs := []model.CronJob{} - for _, stale := range staleJobs { - start := s.clock.Now().UTC() - pattern, err := cron.Parse(stale.Schedule) - if err != nil { - logger.Errorf(err, "Could not kill stale cron job %q because schedule could not be parsed: %q", stale.Key, stale.Schedule) - continue - } - next, err := cron.NextAfter(pattern, start, false) - if err != nil { - logger.Errorf(err, "Could not kill stale cron job %q because next date could not be calculated: %q", stale.Key, stale.Schedule) - continue - } - - updated, err := s.dal.EndCronJob(ctx, stale, next) + logger.Tracef("Scheduling %d cron jobs", len(jobs)) + for _, job := range jobs { + err = s.scheduleCronJob(ctx, tx, job) if err != nil { - logger.Errorf(err, "Could not kill stale cron job %s because: %v", stale.Key, err) - continue + return 0, fmt.Errorf("failed to schedule cron job %q: %w", job.Key, err) } - logger.Warnf("Killed stale cron job %s", stale.Key) - observability.Cron.JobKilled(ctx, stale) - updatedJobs = append(updatedJobs, updated) } - s.events.Publish(endedJobsEvent{ - jobs: updatedJobs, - }) - - return time.Minute, nil + return next.Sub(now), nil } -// watchForUpdates is the centralized place that handles: -// - the list of known jobs and their state -// - executing jobs when they are due -// - reacting to events that change the list of jobs, deployments or hash ring -// -// State is private to this function to ensure thread safety. -func (s *Service) watchForUpdates(ctx context.Context) { +// OnJobCompletion is called by the controller when a cron job async call completes. We schedule +// the next execution of the cron job here. +func (s *Service) OnJobCompletion(ctx context.Context, key model.CronJobKey, failed bool) (err error) { logger := log.FromContext(ctx).Scope("cron") + logger.Tracef("Cron job %q completed with failed=%v", key, failed) - events := make(chan event, 128) - s.events.Subscribe(events) - defer s.events.Unsubscribe(events) - - state := &state{ - executing: map[string]bool{}, - newJobs: map[string]time.Time{}, - blockedUntil: s.clock.Now(), - } - - for { - now := s.clock.Now() - next := now.Add(time.Hour) // should never be reached, expect a different signal long beforehand - for _, j := range state.jobs { - if possibleNext, err := s.nextAttemptForJob(j, state, now, false); err == nil && possibleNext.Before(next) { - next = possibleNext - } - } - - if next.Before(state.blockedUntil) { - next = state.blockedUntil - logger.Tracef("loop blocked for %v", next.Sub(now)) - } else if next.Sub(now) < time.Second { - next = now.Add(time.Second) - logger.Tracef("loop while gated for 1s") - } else if next.Sub(now) > time.Minute*59 { - logger.Tracef("loop while idling") - } else { - logger.Tracef("loop with next %v, %d jobs", next.Sub(now), len(state.jobs)) - } - - select { - case <-ctx.Done(): - return - case <-s.clock.After(next.Sub(now)): - // Try starting jobs in db - // note that we use next here are the current time - // as if there is a pause of over a second we could miss jobs if we use the current time - // this is very unlikely to happen, but if it did it would be hard to diagnose - jobsToAttempt := slices.Filter(state.jobs, func(j model.CronJob) bool { - if n, err := s.nextAttemptForJob(j, state, next, true); err == nil { - return !n.After(s.clock.Now().UTC()) - } - return false - }) - jobResults, err := s.dal.StartCronJobs(ctx, jobsToAttempt) - if err != nil { - logger.Errorf(err, "failed to start cron jobs in db") - state.blockedUntil = s.clock.Now().Add(time.Second * 5) - continue - } - - // Start jobs that were successfully updated - updatedJobs := []model.CronJob{} - removedDeploymentKeys := map[string]model.DeploymentKey{} - - for _, job := range jobResults { - updatedJobs = append(updatedJobs, job.CronJob) - if !job.DidStartExecution { - continue - } - if !job.HasMinReplicas { - // We successfully updated the db to start this job but the deployment has min replicas set to 0 - // We need to update the db to end this job - removedDeploymentKeys[job.DeploymentKey.String()] = job.DeploymentKey - _, err := s.dal.EndCronJob(ctx, job.CronJob, next) - if err != nil { - logger.Errorf(err, "failed to end cron job %s", job.Key.String()) - } - continue - } - logger.Infof("executing job %v", job.Key) - state.startedExecutingJob(job.CronJob) - go s.executeJob(ctx, job.CronJob) - } - - // Update job list - state.updateJobs(updatedJobs) - for _, key := range removedDeploymentKeys { - state.removeDeploymentKey(key) - } - case e := <-events: - switch event := e.(type) { - case syncEvent: - logger.Tracef("syncing job list: %d jobs", len(event.jobs)) - state.sync(event.jobs, event.addedDeploymentKey) - case endedJobsEvent: - logger.Tracef("updating %d jobs", len(event.jobs)) - state.updateJobs(event.jobs) - case updatedHashRingEvent: - // do another cycle through the loop to see if new jobs need to be scheduled - } - } + tx, err := s.dal.Begin(ctx) + if err != nil { + return fmt.Errorf("failed to begin transaction: %w", err) } -} + defer tx.CommitOrRollback(ctx, &err) -func (s *Service) nextAttemptForJob(job model.CronJob, state *state, currentTime time.Time, allowsNow bool) (time.Time, error) { - currentTime = currentTime.UTC() - if !s.isResponsibleForJob(job, state) { - return s.clock.Now(), fmt.Errorf("controller is not responsible for job") + job, err := tx.GetCronJobByKey(ctx, key) + if err != nil { + return fmt.Errorf("failed to get cron job %q: %w", key, err) } - if job.State == model.CronJobStateExecuting { - if state.isExecutingInCurrentController(job) { - // no need to schedule this job until it finishes - return s.clock.Now(), fmt.Errorf("controller is already waiting for job to finish") - } - // We don't know when the other controller that is executing this job will finish it - // So we should optimistically attempt it when the next execution date is due assuming the job finishes - pattern, err := cron.Parse(job.Schedule) - if err != nil { - return s.clock.Now(), fmt.Errorf("failed to parse cron schedule %q", job.Schedule) - } - next, err := cron.NextAfter(pattern, currentTime, allowsNow) - if err == nil { - return next, nil - } + err = s.scheduleCronJob(ctx, tx, job) + if err != nil { + return fmt.Errorf("failed to schedule cron job %q: %w", key, err) } - return job.NextExecution, nil + return nil } -// UpdatedControllerList synchronises the hash ring with the active controllers. -func (s *Service) UpdatedControllerList(ctx context.Context, controllers []parentdal.Controller) { +// scheduleCronJob schedules the next execution of a single cron job. +func (s *Service) scheduleCronJob(ctx context.Context, tx *Tx, job model.CronJob) error { logger := log.FromContext(ctx).Scope("cron") - controllerIdx := -1 - for idx, controller := range controllers { - if controller.Key.String() == s.key.String() { - controllerIdx = idx - break - } - } - if controllerIdx == -1 { - logger.Tracef("controller %q not found in list of controllers", s.key) + now := s.clock.Now().UTC() + pending, err := tx.db.IsCronJobPending(ctx, job.Key, now) + if err != nil { + return fmt.Errorf("failed to check if cron job %q is pending: %w", job.Key, err) } - - oldState := s.hashRingState.Load() - if oldState != nil && len(oldState.controllers) == len(controllers) { - hasChanged := false - for idx, new := range controllers { - old := oldState.controllers[idx] - if new.Key.String() != old.Key.String() { - hasChanged = true - break - } - } - if !hasChanged { - return - } + if pending { + logger.Tracef("Attempt to schedule cron job %q which is already pending", job.Key) + return nil } - hashRing := hashring.New(slices.Map(controllers, func(c parentdal.Controller) string { return c.Key.String() })) - s.hashRingState.Store(&hashRingState{ - hashRing: hashRing, - controllers: controllers, - idx: controllerIdx, - }) - - s.events.Publish(updatedHashRingEvent{}) -} - -// isResponsibleForJob indicates whether a this service should be responsible for attempting jobs, -// or if enough other controllers will handle it. This allows us to spread the job load across controllers. -func (s *Service) isResponsibleForJob(job model.CronJob, state *state) bool { - if state.isJobTooNewForHashRing(job) { - return true + pattern, err := cron.Parse(job.Schedule) + if err != nil { + return fmt.Errorf("failed to parse cron schedule %q: %w", job.Schedule, err) } - hashringState := s.hashRingState.Load() - if hashringState == nil { - return true + originTime := job.StartTime + if t, ok := job.LastExecution.Get(); ok { + originTime = t } - initialKey, ok := hashringState.hashRing.GetNode(job.Key.String()) - if !ok { - return true + nextAttemptForJob, err := cron.NextAfter(pattern, originTime, false) + if err != nil { + return fmt.Errorf("failed to calculate next execution for cron job %q with schedule %q: %w", job.Key, job.Schedule, err) + } + if nextAttemptForJob.Before(now) { + nextAttemptForJob = now + } + + logger.Tracef("Scheduling cron job %q async_call execution at %s", job.Key, nextAttemptForJob) + _, err = tx.db.CreateAsyncCall(ctx, cronsql.CreateAsyncCallParams{ + Verb: schema.RefKey{Module: job.Verb.Module, Name: job.Verb.Name}, + Origin: fmt.Sprintf("cron:%s", job.Key), + Request: []byte(`{}`), + RemainingAttempts: 0, + Backoff: 0, + MaxBackoff: 0, + CronJobKey: optional.Some(job.Key), + ScheduledAt: nextAttemptForJob, + }) + if err != nil { + return fmt.Errorf("failed to create async call for job %q: %w", job.Key, err) } - - initialIdx := -1 - for idx, controller := range hashringState.controllers { - if controller.Key.String() == initialKey { - initialIdx = idx - break - } + futureAttemptForJob, err := cron.NextAfter(pattern, nextAttemptForJob, false) + if err != nil { + return fmt.Errorf("failed to calculate future execution for cron job %q with schedule %q: %w", job.Key, job.Schedule, err) } - if initialIdx == -1 { - return true + logger.Tracef("Updating cron job %q with last attempt at %s and next attempt at %s", job.Key, nextAttemptForJob, futureAttemptForJob) + err = tx.db.UpdateCronJobExecution(ctx, nextAttemptForJob, futureAttemptForJob, job.Key) + if err != nil { + return fmt.Errorf("failed to update cron job %q: %w", job.Key, err) } - if initialIdx+controllersPerJob > len(hashringState.controllers) { - // wraps around - return hashringState.idx >= initialIdx || hashringState.idx < (initialIdx+controllersPerJob)-len(hashringState.controllers) - } - return hashringState.idx >= initialIdx && hashringState.idx < initialIdx+controllersPerJob + return nil } diff --git a/backend/controller/cronjobs/cronjobs_integration_test.go b/backend/controller/cronjobs/cronjobs_integration_test.go deleted file mode 100644 index 5cbc009774..0000000000 --- a/backend/controller/cronjobs/cronjobs_integration_test.go +++ /dev/null @@ -1,65 +0,0 @@ -//go:build integration - -package cronjobs - -import ( - "context" - "os" - "path/filepath" - "testing" - "time" - - "github.com/alecthomas/assert/v2" - "github.com/alecthomas/types/optional" - "github.com/benbjohnson/clock" - - db "github.com/TBD54566975/ftl/backend/controller/cronjobs/dal" - parentdb "github.com/TBD54566975/ftl/backend/controller/dal" - "github.com/TBD54566975/ftl/backend/controller/sql/sqltest" - in "github.com/TBD54566975/ftl/integration" - "github.com/TBD54566975/ftl/internal/log" -) - -func TestServiceWithRealDal(t *testing.T) { - t.Parallel() - ctx := log.ContextWithNewDefaultLogger(context.Background()) - ctx, cancel := context.WithCancel(ctx) - t.Cleanup(cancel) - - conn := sqltest.OpenForTesting(ctx, t) - dal := db.New(conn) - parentDAL, err := parentdb.New(ctx, conn, optional.None[string]()) - assert.NoError(t, err) - - // Using a real clock because real db queries use db clock - // delay until we are on an odd second - clk := clock.New() - if clk.Now().Second()%2 == 0 { - time.Sleep(time.Second - time.Duration(clk.Now().Nanosecond())*time.Nanosecond) - } else { - time.Sleep(2*time.Second - time.Duration(clk.Now().Nanosecond())*time.Nanosecond) - } - - testServiceWithDal(ctx, t, dal, parentDAL, clk) -} - -func TestCron(t *testing.T) { - dir := t.TempDir() - // Due to some MacOS magic, /tmp differs between this test code and the - // executing module, so we need to pass the file path as an environment - // variable. - tmpFile := filepath.Join(dir, "cron.txt") - t.Setenv("DEST_FILE", tmpFile) - - t.Cleanup(func() { _ = os.Remove(tmpFile) }) - - in.Run(t, - in.WithLanguages("go", "java"), - in.CopyModule("cron"), - in.Deploy("cron"), - func(t testing.TB, ic in.TestContext) { - _, err := os.Stat(tmpFile) - assert.NoError(t, err) - }, - ) -} diff --git a/backend/controller/cronjobs/cronjobs_test.go b/backend/controller/cronjobs/cronjobs_test.go deleted file mode 100644 index 715476c932..0000000000 --- a/backend/controller/cronjobs/cronjobs_test.go +++ /dev/null @@ -1,128 +0,0 @@ -package cronjobs - -import ( - "context" - "sync" - "testing" - "time" - - "connectrpc.com/connect" - "github.com/alecthomas/assert/v2" - "github.com/alecthomas/types/optional" - "github.com/benbjohnson/clock" - xslices "golang.org/x/exp/slices" - - db "github.com/TBD54566975/ftl/backend/controller/dal" - "github.com/TBD54566975/ftl/backend/controller/sql/sqltest" - ftlv1 "github.com/TBD54566975/ftl/backend/protos/xyz/block/ftl/v1" - "github.com/TBD54566975/ftl/backend/schema" - "github.com/TBD54566975/ftl/internal/log" - "github.com/TBD54566975/ftl/internal/model" - "github.com/TBD54566975/ftl/internal/slices" -) - -func TestServiceWithMockDal(t *testing.T) { - t.Skip("TODO: sometimes blocks on CI. Discussion in issue #1368") - t.Parallel() - ctx := log.ContextWithNewDefaultLogger(context.Background()) - ctx, cancel := context.WithCancel(ctx) - t.Cleanup(cancel) - - clk := clock.NewMock() - clk.Add(time.Second) // half way between cron job executions - - mockDal := &mockDAL{ - clock: clk, - lock: sync.Mutex{}, - attemptCountMap: map[string]int{}, - } - conn := sqltest.OpenForTesting(ctx, t) - parentDAL, err := db.New(ctx, conn, optional.None[string]()) - assert.NoError(t, err) - - testServiceWithDal(ctx, t, mockDal, parentDAL, clk) -} - -func TestHashRing(t *testing.T) { - if testing.Short() { - t.SkipNow() - } - // This test uses multiple mock clocks to progress time for each controller individually - // This allows us to compare attempts for each cron job and know which controller attempted it - t.Parallel() - ctx := log.ContextWithNewDefaultLogger(context.Background()) - ctx, cancel := context.WithCancel(ctx) - t.Cleanup(cancel) - - mockDal := &mockDAL{ - clock: clock.NewMock(), - lock: sync.Mutex{}, - attemptCountMap: map[string]int{}, - } - moduleName := "initial" - jobsToCreate := newJobs(t, moduleName, "*/10 * * * * * *", mockDal.clock, 100) - - deploymentKey, err := mockDal.CreateDeployment(ctx, "go", &schema.Module{ - Name: moduleName, - }, []db.DeploymentArtefact{}, []db.IngressRoutingEntry{}, jobsToCreate) - assert.NoError(t, err) - - err = mockDal.ReplaceDeployment(ctx, deploymentKey, 1) - assert.NoError(t, err) - - controllers := newControllers(ctx, 20, mockDal, func() clock.Clock { return clock.NewMock() }, func(ctx context.Context, r *connect.Request[ftlv1.CallRequest], o optional.Option[model.RequestKey], p optional.Option[model.RequestKey], s string) (*connect.Response[ftlv1.CallResponse], error) { - return &connect.Response[ftlv1.CallResponse]{}, nil - }) - - // This should give time for each controller to start watching its own mock clock - // If we don;t wait here, we might hit a race condition outlined in issue #1368 - time.Sleep(time.Millisecond * 100) - - // progress time for each controller one at a time, noting which verbs got attempted each time - // to build a map of verb to controller keys - controllersForVerbs := map[string][]model.ControllerKey{} - for _, c := range controllers { - mockDal.lock.Lock() - beforeAttemptCount := map[string]int{} - for k, v := range mockDal.attemptCountMap { - beforeAttemptCount[k] = v - } - mockDal.lock.Unlock() - - c.mockClock.Add(time.Second * 15) - time.Sleep(time.Millisecond * 100) - - mockDal.lock.Lock() - for k, v := range mockDal.attemptCountMap { - if beforeAttemptCount[k] == v { - continue - } - controllersForVerbs[k] = append(controllersForVerbs[k], c.key) - } - mockDal.lock.Unlock() - } - - // Check if each job has the same key list - // Theoretically this is is possible for all jobs to have the same assigned controllers, but with 100 jobs and 20 controllers, this is unlikely - keys := []string{} - hasFoundNonMatchingKeys := false - for v, k := range controllersForVerbs { - assert.Equal(t, len(k), 2, "expected verb %s to be attempted by 2 controllers", v) - - kStrs := slices.Map(k, func(k model.ControllerKey) string { return k.String() }) - xslices.Sort(kStrs) - if len(keys) == 0 { - keys = kStrs - continue - } - - if hasFoundNonMatchingKeys == false { - for keyIdx, keyStr := range kStrs { - if keys[keyIdx] != keyStr { - hasFoundNonMatchingKeys = true - } - } - } - } - assert.True(t, hasFoundNonMatchingKeys, "expected at least one verb to have different controllers assigned") -} diff --git a/backend/controller/cronjobs/cronjobs_utils_test.go b/backend/controller/cronjobs/cronjobs_utils_test.go deleted file mode 100644 index 5e9d64bd59..0000000000 --- a/backend/controller/cronjobs/cronjobs_utils_test.go +++ /dev/null @@ -1,263 +0,0 @@ -package cronjobs - -import ( - "context" - "fmt" - "strconv" - "sync" - "testing" - "time" - - "connectrpc.com/connect" - "github.com/alecthomas/assert/v2" - "github.com/alecthomas/types/optional" - "github.com/benbjohnson/clock" - "github.com/jpillora/backoff" - - cronjobsdb "github.com/TBD54566975/ftl/backend/controller/cronjobs/dal" - parentdb "github.com/TBD54566975/ftl/backend/controller/dal" - "github.com/TBD54566975/ftl/backend/controller/scheduledtask" - ftlv1 "github.com/TBD54566975/ftl/backend/protos/xyz/block/ftl/v1" - "github.com/TBD54566975/ftl/backend/schema" - "github.com/TBD54566975/ftl/internal/cron" - "github.com/TBD54566975/ftl/internal/model" - "github.com/TBD54566975/ftl/internal/slices" -) - -type ParentDAL interface { - CreateDeployment(ctx context.Context, language string, moduleSchema *schema.Module, artefacts []parentdb.DeploymentArtefact, ingressRoutes []parentdb.IngressRoutingEntry, cronJobs []model.CronJob) (key model.DeploymentKey, err error) - ReplaceDeployment(ctx context.Context, newDeploymentKey model.DeploymentKey, minReplicas int) (err error) -} - -type mockDAL struct { - lock sync.Mutex - clock clock.Clock - jobs []model.CronJob - attemptCountMap map[string]int -} - -var _ ParentDAL = &mockDAL{} -var _ DAL = &mockDAL{} - -func (d *mockDAL) CreateDeployment(ctx context.Context, language string, moduleSchema *schema.Module, artefacts []parentdb.DeploymentArtefact, ingressRoutes []parentdb.IngressRoutingEntry, cronJobs []model.CronJob) (key model.DeploymentKey, err error) { - deploymentKey := model.NewDeploymentKey(moduleSchema.Name) - d.jobs = []model.CronJob{} - for _, job := range cronJobs { - job.DeploymentKey = deploymentKey - d.jobs = append(d.jobs, job) - } - return deploymentKey, nil -} - -func (d *mockDAL) ReplaceDeployment(ctx context.Context, newDeploymentKey model.DeploymentKey, minReplicas int) (err error) { - return nil -} - -func (d *mockDAL) GetCronJobs(ctx context.Context) ([]model.CronJob, error) { - d.lock.Lock() - defer d.lock.Unlock() - - return d.jobs, nil -} - -func (d *mockDAL) indexForJob(job model.CronJob) (int, error) { - for i, j := range d.jobs { - if j.Key.String() == job.Key.String() { - return i, nil - } - } - return -1, fmt.Errorf("job not found") -} - -func (d *mockDAL) StartCronJobs(ctx context.Context, jobs []model.CronJob) (attemptedJobs []cronjobsdb.AttemptedCronJob, err error) { - d.lock.Lock() - defer d.lock.Unlock() - - attemptedJobs = []cronjobsdb.AttemptedCronJob{} - now := d.clock.Now() - - for _, inputJob := range jobs { - i, err := d.indexForJob(inputJob) - if err != nil { - return nil, err - } - job := d.jobs[i] - if !job.NextExecution.After(now) && job.State == model.CronJobStateIdle { - job.State = model.CronJobStateExecuting - job.StartTime = d.clock.Now() - d.jobs[i] = job - attemptedJobs = append(attemptedJobs, cronjobsdb.AttemptedCronJob{ - CronJob: job, - DidStartExecution: true, - HasMinReplicas: true, - }) - } else { - attemptedJobs = append(attemptedJobs, cronjobsdb.AttemptedCronJob{ - CronJob: job, - DidStartExecution: false, - HasMinReplicas: true, - }) - } - d.attemptCountMap[job.Key.String()]++ - } - return attemptedJobs, nil -} - -func (d *mockDAL) EndCronJob(ctx context.Context, job model.CronJob, next time.Time) (model.CronJob, error) { - d.lock.Lock() - defer d.lock.Unlock() - - i, err := d.indexForJob(job) - if err != nil { - return model.CronJob{}, err - } - internalJob := d.jobs[i] - if internalJob.State != model.CronJobStateExecuting { - return model.CronJob{}, fmt.Errorf("job can not be stopped, it isnt running") - } - if internalJob.StartTime != job.StartTime { - return model.CronJob{}, fmt.Errorf("job can not be stopped, start time does not match") - } - internalJob.State = model.CronJobStateIdle - internalJob.NextExecution = next - d.jobs[i] = internalJob - return internalJob, nil -} - -func (d *mockDAL) GetStaleCronJobs(ctx context.Context, duration time.Duration) ([]model.CronJob, error) { - d.lock.Lock() - defer d.lock.Unlock() - - return slices.Filter(d.jobs, func(job model.CronJob) bool { - return d.clock.Now().After(job.StartTime.Add(duration)) - }), nil -} - -type mockScheduler struct { -} - -func (s *mockScheduler) Singleton(retry backoff.Backoff, job scheduledtask.Job) { - // do nothing -} - -func (s *mockScheduler) Parallel(retry backoff.Backoff, job scheduledtask.Job) { - // do nothing -} - -type controller struct { - key model.ControllerKey - dal DAL - clock clock.Clock - mockClock *clock.Mock // only set when clock is a mock - cronJobs *Service -} - -func newJobs(t *testing.T, moduleName string, cronPattern string, clock clock.Clock, count int) []model.CronJob { - t.Helper() - newJobs := []model.CronJob{} - for i := range count { - now := clock.Now() - pattern, err := cron.Parse(cronPattern) - assert.NoError(t, err) - next, err := cron.NextAfter(pattern, now, false) - assert.NoError(t, err) - newJobs = append(newJobs, model.CronJob{ - Key: model.NewCronJobKey(moduleName, fmt.Sprintf("verb%d", i)), - Verb: schema.Ref{Module: moduleName, Name: fmt.Sprintf("verb%d", i)}, - Schedule: pattern.String(), - StartTime: now, - NextExecution: next, - State: model.CronJobStateIdle, - }) - } - return newJobs -} - -func newControllers(ctx context.Context, count int, dal DAL, clockFactory func() clock.Clock, call ExecuteCallFunc) []*controller { - controllers := []*controller{} - for i := range count { - key := model.NewControllerKey("localhost", strconv.Itoa(8080+i)) - clk := clockFactory() - controller := &controller{ - key: key, - dal: dal, - clock: clk, - cronJobs: NewForTesting(ctx, - key, "test.com", - Config{Timeout: time.Minute * 5}, - dal, - &mockScheduler{}, - call, - clk), - } - if mockClock, ok := clk.(*clock.Mock); ok { - controller.mockClock = mockClock - } - controllers = append(controllers, controller) - } - - time.Sleep(time.Millisecond * 100) - - for _, c := range controllers { - s := c.cronJobs - go func() { - s.UpdatedControllerList(ctx, slices.Map(controllers, func(ctrl *controller) parentdb.Controller { - return parentdb.Controller{ - Key: ctrl.key, - } - })) - _, _ = s.syncJobs(ctx) //nolint:errcheck - }() - } - - time.Sleep(time.Millisecond * 100) - - return controllers -} - -// should be called when clk is half way between cron job executions (ie on an odd second) -func testServiceWithDal(ctx context.Context, t *testing.T, dal DAL, parentDAL ParentDAL, clk clock.Clock) { - t.Helper() - - verbCallCount := map[string]int{} - verbCallCountLock := sync.Mutex{} - - moduleName := "initial" - jobsToCreate := newJobs(t, moduleName, "*/2 * * * * * *", clk, 20) - - deploymentKey, err := parentDAL.CreateDeployment(ctx, "go", &schema.Module{ - Name: moduleName, - }, []parentdb.DeploymentArtefact{}, []parentdb.IngressRoutingEntry{}, jobsToCreate) - assert.NoError(t, err) - - err = parentDAL.ReplaceDeployment(ctx, deploymentKey, 1) - assert.NoError(t, err) - - _ = newControllers(ctx, 5, dal, func() clock.Clock { return clk }, func(ctx context.Context, r *connect.Request[ftlv1.CallRequest], o optional.Option[model.RequestKey], p optional.Option[model.RequestKey], s string) (*connect.Response[ftlv1.CallResponse], error) { - verbRef := schema.RefFromProto(r.Msg.Verb) - - verbCallCountLock.Lock() - verbCallCount[verbRef.Name]++ - verbCallCountLock.Unlock() - - return &connect.Response[ftlv1.CallResponse]{}, nil - }) - - if mockClock, ok := clk.(*clock.Mock); ok { - // We don't need to wait in real-time - time.Sleep(time.Millisecond * 100) - for range 3 { - mockClock.Add(time.Second * 2) - time.Sleep(time.Millisecond * 100) - } - } else { - time.Sleep(time.Second * 2 * 3) - } - - verbCallCountLock.Lock() - for _, j := range jobsToCreate { - count := verbCallCount[j.Verb.Name] - assert.Equal(t, count, 3, "expected verb %s to be called 3 times", j.Verb.Name) - } - verbCallCountLock.Unlock() -} diff --git a/backend/controller/cronjobs/dal.go b/backend/controller/cronjobs/dal.go new file mode 100644 index 0000000000..c31a2b7783 --- /dev/null +++ b/backend/controller/cronjobs/dal.go @@ -0,0 +1,108 @@ +package cronjobs + +import ( + "context" + "fmt" + "time" + + "github.com/TBD54566975/ftl/backend/controller/cronjobs/sql" + dalerrs "github.com/TBD54566975/ftl/backend/dal" + "github.com/TBD54566975/ftl/backend/schema" + "github.com/TBD54566975/ftl/internal/model" + "github.com/TBD54566975/ftl/internal/slices" +) + +type DAL struct { + db sql.DBI +} + +func newDAL(conn sql.ConnI) *DAL { + return &DAL{db: sql.NewDB(conn)} +} + +type Tx struct { + *DAL +} + +func (d *DAL) Begin(ctx context.Context) (*Tx, error) { + tx, err := d.db.Begin(ctx) + if err != nil { + return nil, fmt.Errorf("failed to begin transaction: %w", dalerrs.TranslatePGError(err)) + } + return &Tx{DAL: &DAL{db: tx}}, nil +} + +func (t *Tx) CommitOrRollback(ctx context.Context, err *error) { + tx, ok := t.db.(*sql.Tx) + if !ok { + panic("inconceivable") + } + tx.CommitOrRollback(ctx, err) +} + +func (t *Tx) Commit(ctx context.Context) error { + tx, ok := t.db.(*sql.Tx) + if !ok { + panic("inconcievable") + } + err := tx.Commit(ctx) + if err != nil { + return fmt.Errorf("failed to commit transaction: %w", dalerrs.TranslatePGError(err)) + } + return nil +} + +func (t *Tx) Rollback(ctx context.Context) error { + tx, ok := t.db.(*sql.Tx) + if !ok { + panic("inconcievable") + } + err := tx.Rollback(ctx) + if err != nil { + return fmt.Errorf("failed to rollback transaction: %w", dalerrs.TranslatePGError(err)) + } + return nil +} + +func cronJobFromGetByKeyRow(row sql.GetCronJobByKeyRow) model.CronJob { + return model.CronJob{ + Key: row.Key, + DeploymentKey: row.DeploymentKey, + Verb: schema.Ref{Module: row.Module, Name: row.Verb}, + Schedule: row.Schedule, + StartTime: row.StartTime, + NextExecution: row.NextExecution, + LastExecution: row.LastExecution, + } +} + +func cronJobFromGetUnscheduledRow(row sql.GetUnscheduledCronJobsRow) model.CronJob { + return model.CronJob{ + Key: row.Key, + DeploymentKey: row.DeploymentKey, + Verb: schema.Ref{Module: row.Module, Name: row.Verb}, + Schedule: row.Schedule, + StartTime: row.StartTime, + NextExecution: row.NextExecution, + LastExecution: row.LastExecution, + } +} + +// GetUnscheduledCronJobs returns all cron jobs with start_time before provided startTime for +// deployments with min replicas > 0 with no async calls after last_execution +func (d *DAL) GetUnscheduledCronJobs(ctx context.Context, startTime time.Time) ([]model.CronJob, error) { + rows, err := d.db.GetUnscheduledCronJobs(ctx, startTime) + if err != nil { + return nil, fmt.Errorf("failed to get cron jobs: %w", dalerrs.TranslatePGError(err)) + } + return slices.Map(rows, cronJobFromGetUnscheduledRow), nil +} + +// GetCronJobByKey returns a cron job by its key +func (d *DAL) GetCronJobByKey(ctx context.Context, key model.CronJobKey) (model.CronJob, error) { + row, err := d.db.GetCronJobByKey(ctx, key) + if err != nil { + return model.CronJob{}, fmt.Errorf("failed to get cron job %q: %w", key, dalerrs.TranslatePGError(err)) + } + return cronJobFromGetByKeyRow(row), nil +} diff --git a/backend/controller/cronjobs/dal/dal.go b/backend/controller/cronjobs/dal/dal.go deleted file mode 100644 index 9499717fe6..0000000000 --- a/backend/controller/cronjobs/dal/dal.go +++ /dev/null @@ -1,101 +0,0 @@ -// Package dal provides a data abstraction layer for cron jobs -package dal - -import ( - "context" - "fmt" - "time" - - "github.com/TBD54566975/ftl/backend/controller/cronjobs/sql" - "github.com/TBD54566975/ftl/backend/controller/sql/sqltypes" - dalerrs "github.com/TBD54566975/ftl/backend/dal" - "github.com/TBD54566975/ftl/backend/schema" - "github.com/TBD54566975/ftl/internal/model" - "github.com/TBD54566975/ftl/internal/slices" -) - -type DAL struct { - db sql.DBI -} - -func New(conn sql.ConnI) *DAL { - return &DAL{db: sql.NewDB(conn)} -} - -func cronJobFromRow(row sql.GetCronJobsRow) model.CronJob { - return model.CronJob{ - Key: row.Key, - DeploymentKey: row.DeploymentKey, - Verb: schema.Ref{Module: row.Module, Name: row.Verb}, - Schedule: row.Schedule, - StartTime: row.StartTime, - NextExecution: row.NextExecution, - State: row.State, - } -} - -// GetCronJobs returns all cron jobs for deployments with min replicas > 0 -func (d *DAL) GetCronJobs(ctx context.Context) ([]model.CronJob, error) { - rows, err := d.db.GetCronJobs(ctx) - if err != nil { - return nil, fmt.Errorf("failed to get cron jobs: %w", dalerrs.TranslatePGError(err)) - } - return slices.Map(rows, cronJobFromRow), nil -} - -type AttemptedCronJob struct { - DidStartExecution bool - HasMinReplicas bool - model.CronJob -} - -// StartCronJobs returns a full list of results so that the caller can update their list of jobs whether or not they successfully updated the row -func (d *DAL) StartCronJobs(ctx context.Context, jobs []model.CronJob) (attemptedJobs []AttemptedCronJob, err error) { - if len(jobs) == 0 { - return nil, nil - } - rows, err := d.db.StartCronJobs(ctx, slices.Map(jobs, func(job model.CronJob) string { return job.Key.String() })) - if err != nil { - return nil, fmt.Errorf("failed to start cron jobs: %w", dalerrs.TranslatePGError(err)) - } - - attemptedJobs = []AttemptedCronJob{} - for _, row := range rows { - job := AttemptedCronJob{ - CronJob: model.CronJob{ - Key: row.Key, - DeploymentKey: row.DeploymentKey, - Verb: schema.Ref{Module: row.Module, Name: row.Verb}, - Schedule: row.Schedule, - StartTime: row.StartTime, - NextExecution: row.NextExecution, - State: row.State, - }, - DidStartExecution: row.Updated, - HasMinReplicas: row.HasMinReplicas, - } - attemptedJobs = append(attemptedJobs, job) - } - return attemptedJobs, nil -} - -// EndCronJob sets the status from executing to idle and updates the next execution time -// Can be called on the successful completion of a job, or if the job failed to execute (error or timeout) -func (d *DAL) EndCronJob(ctx context.Context, job model.CronJob, next time.Time) (model.CronJob, error) { - row, err := d.db.EndCronJob(ctx, next, job.Key, job.StartTime) - if err != nil { - return model.CronJob{}, fmt.Errorf("failed to end cron job: %w", dalerrs.TranslatePGError(err)) - } - return cronJobFromRow(sql.GetCronJobsRow(row)), nil -} - -// GetStaleCronJobs returns a list of cron jobs that have been executing longer than the duration -func (d *DAL) GetStaleCronJobs(ctx context.Context, duration time.Duration) ([]model.CronJob, error) { - rows, err := d.db.GetStaleCronJobs(ctx, sqltypes.Duration(duration)) - if err != nil { - return nil, fmt.Errorf("failed to get stale cron jobs: %w", dalerrs.TranslatePGError(err)) - } - return slices.Map(rows, func(row sql.GetStaleCronJobsRow) model.CronJob { - return cronJobFromRow(sql.GetCronJobsRow(row)) - }), nil -} diff --git a/backend/controller/cronjobs/sql/conn.go b/backend/controller/cronjobs/sql/conn.go index 065487cefa..62699e9ef6 100644 --- a/backend/controller/cronjobs/sql/conn.go +++ b/backend/controller/cronjobs/sql/conn.go @@ -1,12 +1,21 @@ package sql +import ( + "context" + "database/sql" + "errors" + "fmt" +) + type DBI interface { Querier Conn() ConnI + Begin(ctx context.Context) (*Tx, error) } type ConnI interface { DBTX + Begin() (*sql.Tx, error) } type DB struct { @@ -19,3 +28,67 @@ func NewDB(conn ConnI) *DB { } func (d *DB) Conn() ConnI { return d.conn } + +func (d *DB) Begin(ctx context.Context) (*Tx, error) { + tx, err := d.conn.Begin() + if err != nil { + return nil, fmt.Errorf("beginning transaction: %w", err) + } + return &Tx{tx: tx, Queries: New(tx)}, nil +} + +type noopSubConn struct { + DBTX +} + +func (noopSubConn) Begin() (*sql.Tx, error) { + return nil, errors.New("sql: not implemented") +} + +type Tx struct { + tx *sql.Tx + *Queries +} + +func (t *Tx) Conn() ConnI { return noopSubConn{t.tx} } + +func (t *Tx) Tx() *sql.Tx { return t.tx } + +func (t *Tx) Begin(ctx context.Context) (*Tx, error) { + return nil, fmt.Errorf("cannot nest transactions") +} + +func (t *Tx) Commit(ctx context.Context) error { + err := t.tx.Commit() + if err != nil { + return fmt.Errorf("committing transaction: %w", err) + } + + return nil +} + +func (t *Tx) Rollback(ctx context.Context) error { + err := t.tx.Rollback() + if err != nil { + return fmt.Errorf("rolling back transaction: %w", err) + } + + return nil +} + +// CommitOrRollback can be used in a defer statement to commit or rollback a +// transaction depending on whether the enclosing function returned an error. +// +// func myFunc() (err error) { +// tx, err := db.Begin(ctx) +// if err != nil { return err } +// defer tx.CommitOrRollback(ctx, &err) +// ... +// } +func (t *Tx) CommitOrRollback(ctx context.Context, err *error) { + if *err != nil { + *err = errors.Join(*err, t.Rollback(ctx)) + } else { + *err = t.Commit(ctx) + } +} diff --git a/backend/controller/cronjobs/sql/models.go b/backend/controller/cronjobs/sql/models.go index 0c59dbef5c..883d140976 100644 --- a/backend/controller/cronjobs/sql/models.go +++ b/backend/controller/cronjobs/sql/models.go @@ -388,6 +388,7 @@ type AsyncCall struct { Catching bool ParentRequestKey optional.Option[string] TraceContext pqtype.NullRawMessage + CronJobKey optional.Option[model.CronJobKey] } type Controller struct { @@ -407,8 +408,8 @@ type CronJob struct { Schedule string StartTime time.Time NextExecution time.Time - State model.CronJobState ModuleName string + LastExecution optional.Option[time.Time] } type Deployment struct { diff --git a/backend/controller/cronjobs/sql/querier.go b/backend/controller/cronjobs/sql/querier.go index 2bfa010982..19868fd5b9 100644 --- a/backend/controller/cronjobs/sql/querier.go +++ b/backend/controller/cronjobs/sql/querier.go @@ -6,18 +6,127 @@ package sql import ( "context" + "encoding/json" "time" + "github.com/TBD54566975/ftl/backend/controller/leases" "github.com/TBD54566975/ftl/backend/controller/sql/sqltypes" + "github.com/TBD54566975/ftl/backend/schema" "github.com/TBD54566975/ftl/internal/model" + "github.com/alecthomas/types/optional" + "github.com/google/uuid" + "github.com/sqlc-dev/pqtype" ) type Querier interface { + // Reserve a pending async call for execution, returning the associated lease + // reservation key and accompanying metadata. + AcquireAsyncCall(ctx context.Context, ttl sqltypes.Duration) (AcquireAsyncCallRow, error) + AssociateArtefactWithDeployment(ctx context.Context, arg AssociateArtefactWithDeploymentParams) error + AsyncCallQueueDepth(ctx context.Context) (int64, error) + BeginConsumingTopicEvent(ctx context.Context, subscription model.SubscriptionKey, event model.TopicEventKey) error + CompleteEventForSubscription(ctx context.Context, name string, module string) error + // Create a new artefact and return the artefact ID. + CreateArtefact(ctx context.Context, digest []byte, content []byte) (int64, error) + CreateAsyncCall(ctx context.Context, arg CreateAsyncCallParams) (int64, error) CreateCronJob(ctx context.Context, arg CreateCronJobParams) error - EndCronJob(ctx context.Context, nextExecution time.Time, key model.CronJobKey, startTime time.Time) (EndCronJobRow, error) - GetCronJobs(ctx context.Context) ([]GetCronJobsRow, error) - GetStaleCronJobs(ctx context.Context, dollar_1 sqltypes.Duration) ([]GetStaleCronJobsRow, error) - StartCronJobs(ctx context.Context, keys []string) ([]StartCronJobsRow, error) + CreateDeployment(ctx context.Context, moduleName string, schema []byte, key model.DeploymentKey) error + CreateIngressRoute(ctx context.Context, arg CreateIngressRouteParams) error + CreateOnlyEncryptionKey(ctx context.Context, key []byte) error + CreateRequest(ctx context.Context, origin Origin, key model.RequestKey, sourceAddr string) error + DeleteOldTimelineEvents(ctx context.Context, timeout sqltypes.Duration, type_ EventType) (int64, error) + DeleteSubscribers(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriberKey, error) + DeleteSubscriptions(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriptionKey, error) + DeregisterRunner(ctx context.Context, key model.RunnerKey) (int64, error) + ExpireLeases(ctx context.Context) (int64, error) + ExpireRunnerReservations(ctx context.Context) (int64, error) + FailAsyncCall(ctx context.Context, error string, iD int64) (bool, error) + FailAsyncCallWithRetry(ctx context.Context, arg FailAsyncCallWithRetryParams) (bool, error) + FailFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) + // Mark an FSM transition as completed, updating the current state and clearing the async call ID. + FinishFSMTransition(ctx context.Context, fsm schema.RefKey, key string) (bool, error) + GetActiveControllers(ctx context.Context) ([]Controller, error) + GetActiveDeploymentSchemas(ctx context.Context) ([]GetActiveDeploymentSchemasRow, error) + GetActiveDeployments(ctx context.Context) ([]GetActiveDeploymentsRow, error) + GetActiveIngressRoutes(ctx context.Context) ([]GetActiveIngressRoutesRow, error) + GetActiveRunners(ctx context.Context) ([]GetActiveRunnersRow, error) + GetArtefactContentRange(ctx context.Context, start int32, count int32, iD int64) ([]byte, error) + // Return the digests that exist in the database. + GetArtefactDigests(ctx context.Context, digests [][]byte) ([]GetArtefactDigestsRow, error) + GetCronJobByKey(ctx context.Context, key model.CronJobKey) (GetCronJobByKeyRow, error) + GetDeployment(ctx context.Context, key model.DeploymentKey) (GetDeploymentRow, error) + // Get all artefacts matching the given digests. + GetDeploymentArtefacts(ctx context.Context, deploymentID int64) ([]GetDeploymentArtefactsRow, error) + GetDeploymentsByID(ctx context.Context, ids []int64) ([]Deployment, error) + // Get deployments that have a mismatch between the number of assigned and required replicas. + GetDeploymentsNeedingReconciliation(ctx context.Context) ([]GetDeploymentsNeedingReconciliationRow, error) + // Get all deployments that have artefacts matching the given digests. + GetDeploymentsWithArtefacts(ctx context.Context, digests [][]byte, schema []byte, count int64) ([]GetDeploymentsWithArtefactsRow, error) + GetDeploymentsWithMinReplicas(ctx context.Context) ([]GetDeploymentsWithMinReplicasRow, error) + GetExistingDeploymentForModule(ctx context.Context, name string) (GetExistingDeploymentForModuleRow, error) + GetFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (FsmInstance, error) + GetIdleRunners(ctx context.Context, labels json.RawMessage, limit int64) ([]Runner, error) + // Get the runner endpoints corresponding to the given ingress route. + GetIngressRoutes(ctx context.Context, method string) ([]GetIngressRoutesRow, error) + GetLeaseInfo(ctx context.Context, key leases.Key) (GetLeaseInfoRow, error) + GetModulesByID(ctx context.Context, ids []int64) ([]Module, error) + GetNextEventForSubscription(ctx context.Context, consumptionDelay sqltypes.Duration, topic model.TopicKey, cursor optional.Option[model.TopicEventKey]) (GetNextEventForSubscriptionRow, error) + GetOnlyEncryptionKey(ctx context.Context) ([]byte, error) + GetProcessList(ctx context.Context) ([]GetProcessListRow, error) + GetRandomSubscriber(ctx context.Context, key model.SubscriptionKey) (GetRandomSubscriberRow, error) + // Retrieve routing information for a runner. + GetRouteForRunner(ctx context.Context, key model.RunnerKey) (GetRouteForRunnerRow, error) + GetRoutingTable(ctx context.Context, modules []string) ([]GetRoutingTableRow, error) + GetRunner(ctx context.Context, key model.RunnerKey) (GetRunnerRow, error) + GetRunnerState(ctx context.Context, key model.RunnerKey) (RunnerState, error) + GetRunnersForDeployment(ctx context.Context, key model.DeploymentKey) ([]GetRunnersForDeploymentRow, error) + GetSchemaForDeployment(ctx context.Context, key model.DeploymentKey) (*schema.Module, error) + GetSubscription(ctx context.Context, column1 string, column2 string) (TopicSubscription, error) + // Results may not be ready to be scheduled yet due to event consumption delay + // Sorting ensures that brand new events (that may not be ready for consumption) + // don't prevent older events from being consumed + GetSubscriptionsNeedingUpdate(ctx context.Context) ([]GetSubscriptionsNeedingUpdateRow, error) + GetTopic(ctx context.Context, dollar_1 int64) (Topic, error) + GetTopicEvent(ctx context.Context, dollar_1 int64) (TopicEvent, error) + GetUnscheduledCronJobs(ctx context.Context, startTime time.Time) ([]GetUnscheduledCronJobsRow, error) + InsertSubscriber(ctx context.Context, arg InsertSubscriberParams) error + InsertTimelineCallEvent(ctx context.Context, arg InsertTimelineCallEventParams) error + InsertTimelineDeploymentCreatedEvent(ctx context.Context, arg InsertTimelineDeploymentCreatedEventParams) error + InsertTimelineDeploymentUpdatedEvent(ctx context.Context, arg InsertTimelineDeploymentUpdatedEventParams) error + InsertTimelineEvent(ctx context.Context, arg InsertTimelineEventParams) error + InsertTimelineLogEvent(ctx context.Context, arg InsertTimelineLogEventParams) error + IsCronJobPending(ctx context.Context, key model.CronJobKey, startTime time.Time) (bool, error) + // Mark any controller entries that haven't been updated recently as dead. + KillStaleControllers(ctx context.Context, timeout sqltypes.Duration) (int64, error) + KillStaleRunners(ctx context.Context, timeout sqltypes.Duration) (int64, error) + LoadAsyncCall(ctx context.Context, id int64) (AsyncCall, error) + NewLease(ctx context.Context, key leases.Key, ttl sqltypes.Duration, metadata pqtype.NullRawMessage) (uuid.UUID, error) + PopNextFSMEvent(ctx context.Context, fsm schema.RefKey, instanceKey string) (FsmNextEvent, error) + PublishEventForTopic(ctx context.Context, arg PublishEventForTopicParams) error + ReleaseLease(ctx context.Context, idempotencyKey uuid.UUID, key leases.Key) (bool, error) + RenewLease(ctx context.Context, ttl sqltypes.Duration, idempotencyKey uuid.UUID, key leases.Key) (bool, error) + // Find an idle runner and reserve it for the given deployment. + ReserveRunner(ctx context.Context, reservationTimeout time.Time, deploymentKey model.DeploymentKey, labels json.RawMessage) (Runner, error) + SetDeploymentDesiredReplicas(ctx context.Context, key model.DeploymentKey, minReplicas int32) error + SetNextFSMEvent(ctx context.Context, arg SetNextFSMEventParams) (int64, error) + SetSubscriptionCursor(ctx context.Context, column1 model.SubscriptionKey, column2 model.TopicEventKey) error + // Start a new FSM transition, populating the destination state and async call ID. + // + // "key" is the unique identifier for the FSM execution. + StartFSMTransition(ctx context.Context, arg StartFSMTransitionParams) (FsmInstance, error) + SucceedAsyncCall(ctx context.Context, response []byte, iD int64) (bool, error) + SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) + UpdateCronJobExecution(ctx context.Context, lastExecution time.Time, nextExecution time.Time, key model.CronJobKey) error + UpsertController(ctx context.Context, key model.ControllerKey, endpoint string) (int64, error) + UpsertModule(ctx context.Context, language string, name string) (int64, error) + // Upsert a runner and return the deployment ID that it is assigned to, if any. + // If the deployment key is null, then deployment_rel.id will be null, + // otherwise we try to retrieve the deployments.id using the key. If + // there is no corresponding deployment, then the deployment ID is -1 + // and the parent statement will fail due to a foreign key constraint. + UpsertRunner(ctx context.Context, arg UpsertRunnerParams) (optional.Option[int64], error) + UpsertSubscription(ctx context.Context, arg UpsertSubscriptionParams) (UpsertSubscriptionRow, error) + UpsertTopic(ctx context.Context, arg UpsertTopicParams) error } var _ Querier = (*Queries)(nil) diff --git a/backend/controller/cronjobs/sql/queries.sql b/backend/controller/cronjobs/sql/queries.sql index b16589ff2c..f17aba697f 100644 --- a/backend/controller/cronjobs/sql/queries.sql +++ b/backend/controller/cronjobs/sql/queries.sql @@ -1,8 +1,36 @@ --- name: GetCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state +-- name: GetUnscheduledCronJobs :many +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution FROM cron_jobs j INNER JOIN deployments d on j.deployment_id = d.id -WHERE d.min_replicas > 0; +WHERE d.min_replicas > 0 + AND j.start_time < sqlc.arg('start_time')::TIMESTAMPTZ + AND ( + j.last_execution IS NULL + OR NOT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE + ac.cron_job_key = j.key + AND ac.scheduled_at > j.last_execution::TIMESTAMPTZ + ) + ) +FOR UPDATE SKIP LOCKED; + +-- name: IsCronJobPending :one +SELECT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE ac.cron_job_key = sqlc.arg('key')::cron_job_key + AND ac.scheduled_at > sqlc.arg('start_time')::TIMESTAMPTZ + AND ac.state = 'pending' +) AS pending; + +-- name: GetCronJobByKey :one +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution +FROM cron_jobs j + INNER JOIN deployments d on j.deployment_id = d.id +WHERE j.key = sqlc.arg('key')::cron_job_key +FOR UPDATE SKIP LOCKED; -- name: CreateCronJob :exec INSERT INTO cron_jobs (key, deployment_id, module_name, verb, schedule, start_time, next_execution) @@ -15,45 +43,8 @@ INSERT INTO cron_jobs (key, deployment_id, module_name, verb, schedule, start_ti sqlc.arg('start_time')::TIMESTAMPTZ, sqlc.arg('next_execution')::TIMESTAMPTZ); --- name: StartCronJobs :many -WITH updates AS ( - UPDATE cron_jobs - SET state = 'executing', - start_time = (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - WHERE key = ANY (sqlc.arg('keys')) - AND state = 'idle' - AND start_time < next_execution - AND (next_execution AT TIME ZONE 'utc') < (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - RETURNING id, key, state, start_time, next_execution) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, - COALESCE(u.start_time, j.start_time) as start_time, - COALESCE(u.next_execution, j.next_execution) as next_execution, - COALESCE(u.state, j.state) as state, - d.min_replicas > 0 as has_min_replicas, - CASE WHEN u.key IS NULL THEN FALSE ELSE TRUE END as updated -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id - LEFT JOIN updates u on j.id = u.id -WHERE j.key = ANY (sqlc.arg('keys')); - --- name: EndCronJob :one -WITH j AS ( +-- name: UpdateCronJobExecution :exec UPDATE cron_jobs - SET state = 'idle', + SET last_execution = sqlc.arg('last_execution')::TIMESTAMPTZ, next_execution = sqlc.arg('next_execution')::TIMESTAMPTZ - WHERE key = sqlc.arg('key')::cron_job_key - AND state = 'executing' - AND start_time = sqlc.arg('start_time')::TIMESTAMPTZ - RETURNING * -) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state - FROM j - INNER JOIN deployments d on j.deployment_id = d.id - LIMIT 1; - --- name: GetStaleCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id -WHERE state = 'executing' - AND start_time < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL; + WHERE key = sqlc.arg('key')::cron_job_key; \ No newline at end of file diff --git a/backend/controller/cronjobs/sql/queries.sql.go b/backend/controller/cronjobs/sql/queries.sql.go index 1064924f55..bc48f6fb4a 100644 --- a/backend/controller/cronjobs/sql/queries.sql.go +++ b/backend/controller/cronjobs/sql/queries.sql.go @@ -7,13 +7,250 @@ package sql import ( "context" + "encoding/json" "time" + "github.com/TBD54566975/ftl/backend/controller/leases" "github.com/TBD54566975/ftl/backend/controller/sql/sqltypes" + "github.com/TBD54566975/ftl/backend/schema" "github.com/TBD54566975/ftl/internal/model" + "github.com/alecthomas/types/optional" + "github.com/google/uuid" "github.com/lib/pq" + "github.com/sqlc-dev/pqtype" ) +const acquireAsyncCall = `-- name: AcquireAsyncCall :one +WITH pending_calls AS ( + SELECT id + FROM async_calls + WHERE state = 'pending' AND scheduled_at <= (NOW() AT TIME ZONE 'utc') + ORDER BY created_at +), async_call AS ( + SELECT id + FROM pending_calls + LIMIT 1 + FOR UPDATE SKIP LOCKED +), lease AS ( + INSERT INTO leases (idempotency_key, key, expires_at) + SELECT gen_random_uuid(), '/system/async_call/' || (SELECT id FROM async_call), (NOW() AT TIME ZONE 'utc') + $1::interval + WHERE (SELECT id FROM async_call) IS NOT NULL + RETURNING id, idempotency_key, key, created_at, expires_at, metadata +) +UPDATE async_calls +SET state = 'executing', lease_id = (SELECT id FROM lease) +WHERE id = (SELECT id FROM async_call) +RETURNING + id AS async_call_id, + (SELECT idempotency_key FROM lease) AS lease_idempotency_key, + (SELECT key FROM lease) AS lease_key, + (SELECT count(*) FROM pending_calls) AS queue_depth, + origin, + verb, + catch_verb, + request, + scheduled_at, + remaining_attempts, + error, + backoff, + max_backoff, + parent_request_key, + trace_context, + catching +` + +type AcquireAsyncCallRow struct { + AsyncCallID int64 + LeaseIdempotencyKey uuid.UUID + LeaseKey leases.Key + QueueDepth int64 + Origin string + Verb schema.RefKey + CatchVerb optional.Option[schema.RefKey] + Request []byte + ScheduledAt time.Time + RemainingAttempts int32 + Error optional.Option[string] + Backoff sqltypes.Duration + MaxBackoff sqltypes.Duration + ParentRequestKey optional.Option[string] + TraceContext pqtype.NullRawMessage + Catching bool +} + +// Reserve a pending async call for execution, returning the associated lease +// reservation key and accompanying metadata. +func (q *Queries) AcquireAsyncCall(ctx context.Context, ttl sqltypes.Duration) (AcquireAsyncCallRow, error) { + row := q.db.QueryRowContext(ctx, acquireAsyncCall, ttl) + var i AcquireAsyncCallRow + err := row.Scan( + &i.AsyncCallID, + &i.LeaseIdempotencyKey, + &i.LeaseKey, + &i.QueueDepth, + &i.Origin, + &i.Verb, + &i.CatchVerb, + &i.Request, + &i.ScheduledAt, + &i.RemainingAttempts, + &i.Error, + &i.Backoff, + &i.MaxBackoff, + &i.ParentRequestKey, + &i.TraceContext, + &i.Catching, + ) + return i, err +} + +const associateArtefactWithDeployment = `-- name: AssociateArtefactWithDeployment :exec +INSERT INTO deployment_artefacts (deployment_id, artefact_id, executable, path) +VALUES ((SELECT id FROM deployments WHERE key = $1::deployment_key), $2, $3, $4) +` + +type AssociateArtefactWithDeploymentParams struct { + Key model.DeploymentKey + ArtefactID int64 + Executable bool + Path string +} + +func (q *Queries) AssociateArtefactWithDeployment(ctx context.Context, arg AssociateArtefactWithDeploymentParams) error { + _, err := q.db.ExecContext(ctx, associateArtefactWithDeployment, + arg.Key, + arg.ArtefactID, + arg.Executable, + arg.Path, + ) + return err +} + +const asyncCallQueueDepth = `-- name: AsyncCallQueueDepth :one +SELECT count(*) +FROM async_calls +WHERE state = 'pending' AND scheduled_at <= (NOW() AT TIME ZONE 'utc') +` + +func (q *Queries) AsyncCallQueueDepth(ctx context.Context) (int64, error) { + row := q.db.QueryRowContext(ctx, asyncCallQueueDepth) + var count int64 + err := row.Scan(&count) + return count, err +} + +const beginConsumingTopicEvent = `-- name: BeginConsumingTopicEvent :exec +WITH event AS ( + SELECT id, created_at, key, topic_id, payload, caller, request_key, trace_context + FROM topic_events + WHERE "key" = $2::topic_event_key +) +UPDATE topic_subscriptions +SET state = 'executing', + cursor = (SELECT id FROM event) +WHERE key = $1::subscription_key +` + +func (q *Queries) BeginConsumingTopicEvent(ctx context.Context, subscription model.SubscriptionKey, event model.TopicEventKey) error { + _, err := q.db.ExecContext(ctx, beginConsumingTopicEvent, subscription, event) + return err +} + +const completeEventForSubscription = `-- name: CompleteEventForSubscription :exec +WITH module AS ( + SELECT id + FROM modules + WHERE name = $2::TEXT +) +UPDATE topic_subscriptions +SET state = 'idle' +WHERE name = $1::TEXT + AND module_id = (SELECT id FROM module) +` + +func (q *Queries) CompleteEventForSubscription(ctx context.Context, name string, module string) error { + _, err := q.db.ExecContext(ctx, completeEventForSubscription, name, module) + return err +} + +const createArtefact = `-- name: CreateArtefact :one +INSERT INTO artefacts (digest, content) +VALUES ($1, $2) +ON CONFLICT (digest) DO NOTHING +RETURNING id +` + +// Create a new artefact and return the artefact ID. +func (q *Queries) CreateArtefact(ctx context.Context, digest []byte, content []byte) (int64, error) { + row := q.db.QueryRowContext(ctx, createArtefact, digest, content) + var id int64 + err := row.Scan(&id) + return id, err +} + +const createAsyncCall = `-- name: CreateAsyncCall :one +INSERT INTO async_calls ( + scheduled_at, + verb, + origin, + request, + remaining_attempts, + backoff, + max_backoff, + catch_verb, + parent_request_key, + trace_context, + cron_job_key +) +VALUES ( + $1::TIMESTAMPTZ, + $2, + $3, + $4, + $5, + $6::interval, + $7::interval, + $8, + $9, + $10::jsonb, + $11 +) +RETURNING id +` + +type CreateAsyncCallParams struct { + ScheduledAt time.Time + Verb schema.RefKey + Origin string + Request []byte + RemainingAttempts int32 + Backoff sqltypes.Duration + MaxBackoff sqltypes.Duration + CatchVerb optional.Option[schema.RefKey] + ParentRequestKey optional.Option[string] + TraceContext json.RawMessage + CronJobKey optional.Option[model.CronJobKey] +} + +func (q *Queries) CreateAsyncCall(ctx context.Context, arg CreateAsyncCallParams) (int64, error) { + row := q.db.QueryRowContext(ctx, createAsyncCall, + arg.ScheduledAt, + arg.Verb, + arg.Origin, + arg.Request, + arg.RemainingAttempts, + arg.Backoff, + arg.MaxBackoff, + arg.CatchVerb, + arg.ParentRequestKey, + arg.TraceContext, + arg.CronJobKey, + ) + var id int64 + err := row.Scan(&id) + return id, err +} + const createCronJob = `-- name: CreateCronJob :exec INSERT INTO cron_jobs (key, deployment_id, module_name, verb, schedule, start_time, next_execution) VALUES ( @@ -49,85 +286,339 @@ func (q *Queries) CreateCronJob(ctx context.Context, arg CreateCronJobParams) er return err } -const endCronJob = `-- name: EndCronJob :one -WITH j AS ( -UPDATE cron_jobs - SET state = 'idle', - next_execution = $1::TIMESTAMPTZ - WHERE key = $2::cron_job_key - AND state = 'executing' - AND start_time = $3::TIMESTAMPTZ - RETURNING id, key, deployment_id, verb, schedule, start_time, next_execution, state, module_name -) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state - FROM j - INNER JOIN deployments d on j.deployment_id = d.id - LIMIT 1 +const createDeployment = `-- name: CreateDeployment :exec +INSERT INTO deployments (module_id, "schema", "key") +VALUES ((SELECT id FROM modules WHERE name = $1::TEXT LIMIT 1), $2::BYTEA, $3::deployment_key) ` -type EndCronJobRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState +func (q *Queries) CreateDeployment(ctx context.Context, moduleName string, schema []byte, key model.DeploymentKey) error { + _, err := q.db.ExecContext(ctx, createDeployment, moduleName, schema, key) + return err } -func (q *Queries) EndCronJob(ctx context.Context, nextExecution time.Time, key model.CronJobKey, startTime time.Time) (EndCronJobRow, error) { - row := q.db.QueryRowContext(ctx, endCronJob, nextExecution, key, startTime) - var i EndCronJobRow - err := row.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, +const createIngressRoute = `-- name: CreateIngressRoute :exec +INSERT INTO ingress_routes (deployment_id, module, verb, method, path) +VALUES ((SELECT id FROM deployments WHERE key = $1::deployment_key LIMIT 1), $2, $3, $4, $5) +` + +type CreateIngressRouteParams struct { + Key model.DeploymentKey + Module string + Verb string + Method string + Path string +} + +func (q *Queries) CreateIngressRoute(ctx context.Context, arg CreateIngressRouteParams) error { + _, err := q.db.ExecContext(ctx, createIngressRoute, + arg.Key, + arg.Module, + arg.Verb, + arg.Method, + arg.Path, ) - return i, err + return err } -const getCronJobs = `-- name: GetCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id -WHERE d.min_replicas > 0 +const createOnlyEncryptionKey = `-- name: CreateOnlyEncryptionKey :exec +INSERT INTO encryption_keys (id, key) +VALUES (1, $1) ` -type GetCronJobsRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState +func (q *Queries) CreateOnlyEncryptionKey(ctx context.Context, key []byte) error { + _, err := q.db.ExecContext(ctx, createOnlyEncryptionKey, key) + return err +} + +const createRequest = `-- name: CreateRequest :exec +INSERT INTO requests (origin, "key", source_addr) +VALUES ($1, $2, $3) +` + +func (q *Queries) CreateRequest(ctx context.Context, origin Origin, key model.RequestKey, sourceAddr string) error { + _, err := q.db.ExecContext(ctx, createRequest, origin, key, sourceAddr) + return err +} + +const deleteOldTimelineEvents = `-- name: DeleteOldTimelineEvents :one +WITH deleted AS ( + DELETE FROM timeline + WHERE time_stamp < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL + AND type = $2 + RETURNING 1 +) +SELECT COUNT(*) +FROM deleted +` + +func (q *Queries) DeleteOldTimelineEvents(ctx context.Context, timeout sqltypes.Duration, type_ EventType) (int64, error) { + row := q.db.QueryRowContext(ctx, deleteOldTimelineEvents, timeout, type_) + var count int64 + err := row.Scan(&count) + return count, err +} + +const deleteSubscribers = `-- name: DeleteSubscribers :many +DELETE FROM topic_subscribers +WHERE deployment_id IN ( + SELECT deployments.id + FROM deployments + WHERE deployments.key = $1::deployment_key +) +RETURNING topic_subscribers.key +` + +func (q *Queries) DeleteSubscribers(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriberKey, error) { + rows, err := q.db.QueryContext(ctx, deleteSubscribers, deployment) + if err != nil { + return nil, err + } + defer rows.Close() + var items []model.SubscriberKey + for rows.Next() { + var key model.SubscriberKey + if err := rows.Scan(&key); err != nil { + return nil, err + } + items = append(items, key) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const deleteSubscriptions = `-- name: DeleteSubscriptions :many +DELETE FROM topic_subscriptions +WHERE deployment_id IN ( + SELECT deployments.id + FROM deployments + WHERE deployments.key = $1::deployment_key +) +RETURNING topic_subscriptions.key +` + +func (q *Queries) DeleteSubscriptions(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriptionKey, error) { + rows, err := q.db.QueryContext(ctx, deleteSubscriptions, deployment) + if err != nil { + return nil, err + } + defer rows.Close() + var items []model.SubscriptionKey + for rows.Next() { + var key model.SubscriptionKey + if err := rows.Scan(&key); err != nil { + return nil, err + } + items = append(items, key) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const deregisterRunner = `-- name: DeregisterRunner :one +WITH matches AS ( + UPDATE runners + SET state = 'dead', + deployment_id = NULL + WHERE key = $1::runner_key + RETURNING 1) +SELECT COUNT(*) +FROM matches +` + +func (q *Queries) DeregisterRunner(ctx context.Context, key model.RunnerKey) (int64, error) { + row := q.db.QueryRowContext(ctx, deregisterRunner, key) + var count int64 + err := row.Scan(&count) + return count, err +} + +const expireLeases = `-- name: ExpireLeases :one +WITH expired AS ( + DELETE FROM leases + WHERE expires_at < NOW() AT TIME ZONE 'utc' + RETURNING 1 +) +SELECT COUNT(*) +FROM expired +` + +func (q *Queries) ExpireLeases(ctx context.Context) (int64, error) { + row := q.db.QueryRowContext(ctx, expireLeases) + var count int64 + err := row.Scan(&count) + return count, err +} + +const expireRunnerReservations = `-- name: ExpireRunnerReservations :one +WITH rows AS ( + UPDATE runners + SET state = 'idle', + deployment_id = NULL, + reservation_timeout = NULL + WHERE state = 'reserved' + AND reservation_timeout < (NOW() AT TIME ZONE 'utc') + RETURNING 1) +SELECT COUNT(*) +FROM rows +` + +func (q *Queries) ExpireRunnerReservations(ctx context.Context) (int64, error) { + row := q.db.QueryRowContext(ctx, expireRunnerReservations) + var count int64 + err := row.Scan(&count) + return count, err +} + +const failAsyncCall = `-- name: FailAsyncCall :one +UPDATE async_calls +SET + state = 'error'::async_call_state, + error = $1::TEXT +WHERE id = $2 +RETURNING true +` + +func (q *Queries) FailAsyncCall(ctx context.Context, error string, iD int64) (bool, error) { + row := q.db.QueryRowContext(ctx, failAsyncCall, error, iD) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const failAsyncCallWithRetry = `-- name: FailAsyncCallWithRetry :one +WITH updated AS ( + UPDATE async_calls + SET state = 'error'::async_call_state, + error = $7::TEXT + WHERE id = $8::BIGINT + RETURNING id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context, cron_job_key +) +INSERT INTO async_calls ( + verb, + origin, + request, + catch_verb, + remaining_attempts, + backoff, + max_backoff, + scheduled_at, + catching, + error +) +SELECT + updated.verb, + updated.origin, + updated.request, + updated.catch_verb, + $1, + $2::interval, + $3::interval, + $4::TIMESTAMPTZ, + $5::bool, + $6 +FROM updated +RETURNING true +` + +type FailAsyncCallWithRetryParams struct { + RemainingAttempts int32 + Backoff sqltypes.Duration + MaxBackoff sqltypes.Duration + ScheduledAt time.Time + Catching bool + OriginalError optional.Option[string] + Error string + ID int64 +} + +func (q *Queries) FailAsyncCallWithRetry(ctx context.Context, arg FailAsyncCallWithRetryParams) (bool, error) { + row := q.db.QueryRowContext(ctx, failAsyncCallWithRetry, + arg.RemainingAttempts, + arg.Backoff, + arg.MaxBackoff, + arg.ScheduledAt, + arg.Catching, + arg.OriginalError, + arg.Error, + arg.ID, + ) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const failFSMInstance = `-- name: FailFSMInstance :one +UPDATE fsm_instances +SET + current_state = NULL, + async_call_id = NULL, + status = 'failed'::fsm_status, + updated_at = NOW() AT TIME ZONE 'utc' +WHERE + fsm = $1::schema_ref AND key = $2::TEXT +RETURNING true +` + +func (q *Queries) FailFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) { + row := q.db.QueryRowContext(ctx, failFSMInstance, fsm, key) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const finishFSMTransition = `-- name: FinishFSMTransition :one +UPDATE fsm_instances +SET + current_state = destination_state, + destination_state = NULL, + async_call_id = NULL, + updated_at = NOW() AT TIME ZONE 'utc' +WHERE + fsm = $1::schema_ref AND key = $2::TEXT +RETURNING true +` + +// Mark an FSM transition as completed, updating the current state and clearing the async call ID. +func (q *Queries) FinishFSMTransition(ctx context.Context, fsm schema.RefKey, key string) (bool, error) { + row := q.db.QueryRowContext(ctx, finishFSMTransition, fsm, key) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err } -func (q *Queries) GetCronJobs(ctx context.Context) ([]GetCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, getCronJobs) +const getActiveControllers = `-- name: GetActiveControllers :many +SELECT id, key, created, last_seen, state, endpoint +FROM controller c +WHERE c.state <> 'dead' +ORDER BY c.key +` + +func (q *Queries) GetActiveControllers(ctx context.Context) ([]Controller, error) { + rows, err := q.db.QueryContext(ctx, getActiveControllers) if err != nil { return nil, err } defer rows.Close() - var items []GetCronJobsRow + var items []Controller for rows.Next() { - var i GetCronJobsRow + var i Controller if err := rows.Scan( + &i.ID, &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, + &i.Created, + &i.LastSeen, &i.State, + &i.Endpoint, ); err != nil { return nil, err } @@ -142,43 +633,119 @@ func (q *Queries) GetCronJobs(ctx context.Context) ([]GetCronJobsRow, error) { return items, nil } -const getStaleCronJobs = `-- name: GetStaleCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id -WHERE state = 'executing' - AND start_time < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL +const getActiveDeploymentSchemas = `-- name: GetActiveDeploymentSchemas :many +SELECT key, schema FROM deployments WHERE min_replicas > 0 ` -type GetStaleCronJobsRow struct { - Key model.CronJobKey +type GetActiveDeploymentSchemasRow struct { + Key model.DeploymentKey + Schema *schema.Module +} + +func (q *Queries) GetActiveDeploymentSchemas(ctx context.Context) ([]GetActiveDeploymentSchemasRow, error) { + rows, err := q.db.QueryContext(ctx, getActiveDeploymentSchemas) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetActiveDeploymentSchemasRow + for rows.Next() { + var i GetActiveDeploymentSchemasRow + if err := rows.Scan(&i.Key, &i.Schema); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getActiveDeployments = `-- name: GetActiveDeployments :many +SELECT d.id, d.created_at, d.module_id, d.key, d.schema, d.labels, d.min_replicas, m.name AS module_name, m.language, COUNT(r.id) AS replicas +FROM deployments d + JOIN modules m ON d.module_id = m.id + JOIN runners r ON d.id = r.deployment_id +WHERE min_replicas > 0 AND r.state = 'assigned' +GROUP BY d.id, m.name, m.language +HAVING COUNT(r.id) > 0 +` + +type GetActiveDeploymentsRow struct { + Deployment Deployment + ModuleName string + Language string + Replicas int64 +} + +func (q *Queries) GetActiveDeployments(ctx context.Context) ([]GetActiveDeploymentsRow, error) { + rows, err := q.db.QueryContext(ctx, getActiveDeployments) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetActiveDeploymentsRow + for rows.Next() { + var i GetActiveDeploymentsRow + if err := rows.Scan( + &i.Deployment.ID, + &i.Deployment.CreatedAt, + &i.Deployment.ModuleID, + &i.Deployment.Key, + &i.Deployment.Schema, + &i.Deployment.Labels, + &i.Deployment.MinReplicas, + &i.ModuleName, + &i.Language, + &i.Replicas, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getActiveIngressRoutes = `-- name: GetActiveIngressRoutes :many +SELECT d.key AS deployment_key, ir.module, ir.verb, ir.method, ir.path +FROM ingress_routes ir + INNER JOIN deployments d ON ir.deployment_id = d.id +WHERE d.min_replicas > 0 +` + +type GetActiveIngressRoutesRow struct { DeploymentKey model.DeploymentKey Module string Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState + Method string + Path string } -func (q *Queries) GetStaleCronJobs(ctx context.Context, dollar_1 sqltypes.Duration) ([]GetStaleCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, getStaleCronJobs, dollar_1) +func (q *Queries) GetActiveIngressRoutes(ctx context.Context) ([]GetActiveIngressRoutesRow, error) { + rows, err := q.db.QueryContext(ctx, getActiveIngressRoutes) if err != nil { return nil, err } defer rows.Close() - var items []GetStaleCronJobsRow + var items []GetActiveIngressRoutesRow for rows.Next() { - var i GetStaleCronJobsRow + var i GetActiveIngressRoutesRow if err := rows.Scan( - &i.Key, &i.DeploymentKey, &i.Module, &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, + &i.Method, + &i.Path, ); err != nil { return nil, err } @@ -193,61 +760,49 @@ func (q *Queries) GetStaleCronJobs(ctx context.Context, dollar_1 sqltypes.Durati return items, nil } -const startCronJobs = `-- name: StartCronJobs :many -WITH updates AS ( - UPDATE cron_jobs - SET state = 'executing', - start_time = (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - WHERE key = ANY ($1) - AND state = 'idle' - AND start_time < next_execution - AND (next_execution AT TIME ZONE 'utc') < (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - RETURNING id, key, state, start_time, next_execution) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, - COALESCE(u.start_time, j.start_time) as start_time, - COALESCE(u.next_execution, j.next_execution) as next_execution, - COALESCE(u.state, j.state) as state, - d.min_replicas > 0 as has_min_replicas, - CASE WHEN u.key IS NULL THEN FALSE ELSE TRUE END as updated -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id - LEFT JOIN updates u on j.id = u.id -WHERE j.key = ANY ($1) +const getActiveRunners = `-- name: GetActiveRunners :many +SELECT DISTINCT ON (r.key) r.key AS runner_key, + r.endpoint, + r.state, + r.labels, + r.last_seen, + r.module_name, + COALESCE(CASE + WHEN r.deployment_id IS NOT NULL + THEN d.key END, NULL) AS deployment_key +FROM runners r + LEFT JOIN deployments d on d.id = r.deployment_id +WHERE r.state <> 'dead' +ORDER BY r.key ` -type StartCronJobsRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState - HasMinReplicas bool - Updated bool +type GetActiveRunnersRow struct { + RunnerKey model.RunnerKey + Endpoint string + State RunnerState + Labels json.RawMessage + LastSeen time.Time + ModuleName optional.Option[string] + DeploymentKey optional.Option[string] } -func (q *Queries) StartCronJobs(ctx context.Context, keys []string) ([]StartCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, startCronJobs, pq.Array(keys)) +func (q *Queries) GetActiveRunners(ctx context.Context) ([]GetActiveRunnersRow, error) { + rows, err := q.db.QueryContext(ctx, getActiveRunners) if err != nil { return nil, err } defer rows.Close() - var items []StartCronJobsRow + var items []GetActiveRunnersRow for rows.Next() { - var i StartCronJobsRow + var i GetActiveRunnersRow if err := rows.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, + &i.RunnerKey, + &i.Endpoint, &i.State, - &i.HasMinReplicas, - &i.Updated, + &i.Labels, + &i.LastSeen, + &i.ModuleName, + &i.DeploymentKey, ); err != nil { return nil, err } @@ -261,3 +816,1943 @@ func (q *Queries) StartCronJobs(ctx context.Context, keys []string) ([]StartCron } return items, nil } + +const getArtefactContentRange = `-- name: GetArtefactContentRange :one +SELECT SUBSTRING(a.content FROM $1 FOR $2)::BYTEA AS content +FROM artefacts a +WHERE a.id = $3 +` + +func (q *Queries) GetArtefactContentRange(ctx context.Context, start int32, count int32, iD int64) ([]byte, error) { + row := q.db.QueryRowContext(ctx, getArtefactContentRange, start, count, iD) + var content []byte + err := row.Scan(&content) + return content, err +} + +const getArtefactDigests = `-- name: GetArtefactDigests :many +SELECT id, digest +FROM artefacts +WHERE digest = ANY ($1::bytea[]) +` + +type GetArtefactDigestsRow struct { + ID int64 + Digest []byte +} + +// Return the digests that exist in the database. +func (q *Queries) GetArtefactDigests(ctx context.Context, digests [][]byte) ([]GetArtefactDigestsRow, error) { + rows, err := q.db.QueryContext(ctx, getArtefactDigests, pq.Array(digests)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetArtefactDigestsRow + for rows.Next() { + var i GetArtefactDigestsRow + if err := rows.Scan(&i.ID, &i.Digest); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getCronJobByKey = `-- name: GetCronJobByKey :one +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution +FROM cron_jobs j + INNER JOIN deployments d on j.deployment_id = d.id +WHERE j.key = $1::cron_job_key +FOR UPDATE SKIP LOCKED +` + +type GetCronJobByKeyRow struct { + Key model.CronJobKey + DeploymentKey model.DeploymentKey + Module string + Verb string + Schedule string + StartTime time.Time + NextExecution time.Time + LastExecution optional.Option[time.Time] +} + +func (q *Queries) GetCronJobByKey(ctx context.Context, key model.CronJobKey) (GetCronJobByKeyRow, error) { + row := q.db.QueryRowContext(ctx, getCronJobByKey, key) + var i GetCronJobByKeyRow + err := row.Scan( + &i.Key, + &i.DeploymentKey, + &i.Module, + &i.Verb, + &i.Schedule, + &i.StartTime, + &i.NextExecution, + &i.LastExecution, + ) + return i, err +} + +const getDeployment = `-- name: GetDeployment :one +SELECT d.id, d.created_at, d.module_id, d.key, d.schema, d.labels, d.min_replicas, m.language, m.name AS module_name, d.min_replicas +FROM deployments d + INNER JOIN modules m ON m.id = d.module_id +WHERE d.key = $1::deployment_key +` + +type GetDeploymentRow struct { + Deployment Deployment + Language string + ModuleName string + MinReplicas int32 +} + +func (q *Queries) GetDeployment(ctx context.Context, key model.DeploymentKey) (GetDeploymentRow, error) { + row := q.db.QueryRowContext(ctx, getDeployment, key) + var i GetDeploymentRow + err := row.Scan( + &i.Deployment.ID, + &i.Deployment.CreatedAt, + &i.Deployment.ModuleID, + &i.Deployment.Key, + &i.Deployment.Schema, + &i.Deployment.Labels, + &i.Deployment.MinReplicas, + &i.Language, + &i.ModuleName, + &i.MinReplicas, + ) + return i, err +} + +const getDeploymentArtefacts = `-- name: GetDeploymentArtefacts :many +SELECT da.created_at, artefact_id AS id, executable, path, digest, executable +FROM deployment_artefacts da + INNER JOIN artefacts ON artefacts.id = da.artefact_id +WHERE deployment_id = $1 +` + +type GetDeploymentArtefactsRow struct { + CreatedAt time.Time + ID int64 + Executable bool + Path string + Digest []byte + Executable_2 bool +} + +// Get all artefacts matching the given digests. +func (q *Queries) GetDeploymentArtefacts(ctx context.Context, deploymentID int64) ([]GetDeploymentArtefactsRow, error) { + rows, err := q.db.QueryContext(ctx, getDeploymentArtefacts, deploymentID) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetDeploymentArtefactsRow + for rows.Next() { + var i GetDeploymentArtefactsRow + if err := rows.Scan( + &i.CreatedAt, + &i.ID, + &i.Executable, + &i.Path, + &i.Digest, + &i.Executable_2, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getDeploymentsByID = `-- name: GetDeploymentsByID :many +SELECT id, created_at, module_id, key, schema, labels, min_replicas +FROM deployments +WHERE id = ANY ($1::BIGINT[]) +` + +func (q *Queries) GetDeploymentsByID(ctx context.Context, ids []int64) ([]Deployment, error) { + rows, err := q.db.QueryContext(ctx, getDeploymentsByID, pq.Array(ids)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []Deployment + for rows.Next() { + var i Deployment + if err := rows.Scan( + &i.ID, + &i.CreatedAt, + &i.ModuleID, + &i.Key, + &i.Schema, + &i.Labels, + &i.MinReplicas, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getDeploymentsNeedingReconciliation = `-- name: GetDeploymentsNeedingReconciliation :many +SELECT d.key AS deployment_key, + m.name AS module_name, + m.language AS language, + COUNT(r.id) AS assigned_runners_count, + d.min_replicas::BIGINT AS required_runners_count +FROM deployments d + LEFT JOIN runners r ON d.id = r.deployment_id AND r.state <> 'dead' + JOIN modules m ON d.module_id = m.id +GROUP BY d.key, d.min_replicas, m.name, m.language +HAVING COUNT(r.id) <> d.min_replicas +` + +type GetDeploymentsNeedingReconciliationRow struct { + DeploymentKey model.DeploymentKey + ModuleName string + Language string + AssignedRunnersCount int64 + RequiredRunnersCount int64 +} + +// Get deployments that have a mismatch between the number of assigned and required replicas. +func (q *Queries) GetDeploymentsNeedingReconciliation(ctx context.Context) ([]GetDeploymentsNeedingReconciliationRow, error) { + rows, err := q.db.QueryContext(ctx, getDeploymentsNeedingReconciliation) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetDeploymentsNeedingReconciliationRow + for rows.Next() { + var i GetDeploymentsNeedingReconciliationRow + if err := rows.Scan( + &i.DeploymentKey, + &i.ModuleName, + &i.Language, + &i.AssignedRunnersCount, + &i.RequiredRunnersCount, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getDeploymentsWithArtefacts = `-- name: GetDeploymentsWithArtefacts :many +SELECT d.id, d.created_at, d.key as deployment_key, d.schema, m.name AS module_name +FROM deployments d + INNER JOIN modules m ON d.module_id = m.id +WHERE EXISTS (SELECT 1 + FROM deployment_artefacts da + INNER JOIN artefacts a ON da.artefact_id = a.id + WHERE a.digest = ANY ($1::bytea[]) + AND da.deployment_id = d.id + AND d.schema = $2::BYTEA + HAVING COUNT(*) = $3::BIGINT -- Number of unique digests provided +) +` + +type GetDeploymentsWithArtefactsRow struct { + ID int64 + CreatedAt time.Time + DeploymentKey model.DeploymentKey + Schema *schema.Module + ModuleName string +} + +// Get all deployments that have artefacts matching the given digests. +func (q *Queries) GetDeploymentsWithArtefacts(ctx context.Context, digests [][]byte, schema []byte, count int64) ([]GetDeploymentsWithArtefactsRow, error) { + rows, err := q.db.QueryContext(ctx, getDeploymentsWithArtefacts, pq.Array(digests), schema, count) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetDeploymentsWithArtefactsRow + for rows.Next() { + var i GetDeploymentsWithArtefactsRow + if err := rows.Scan( + &i.ID, + &i.CreatedAt, + &i.DeploymentKey, + &i.Schema, + &i.ModuleName, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getDeploymentsWithMinReplicas = `-- name: GetDeploymentsWithMinReplicas :many +SELECT d.id, d.created_at, d.module_id, d.key, d.schema, d.labels, d.min_replicas, m.name AS module_name, m.language +FROM deployments d + INNER JOIN modules m on d.module_id = m.id +WHERE min_replicas > 0 +ORDER BY d.key +` + +type GetDeploymentsWithMinReplicasRow struct { + Deployment Deployment + ModuleName string + Language string +} + +func (q *Queries) GetDeploymentsWithMinReplicas(ctx context.Context) ([]GetDeploymentsWithMinReplicasRow, error) { + rows, err := q.db.QueryContext(ctx, getDeploymentsWithMinReplicas) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetDeploymentsWithMinReplicasRow + for rows.Next() { + var i GetDeploymentsWithMinReplicasRow + if err := rows.Scan( + &i.Deployment.ID, + &i.Deployment.CreatedAt, + &i.Deployment.ModuleID, + &i.Deployment.Key, + &i.Deployment.Schema, + &i.Deployment.Labels, + &i.Deployment.MinReplicas, + &i.ModuleName, + &i.Language, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getExistingDeploymentForModule = `-- name: GetExistingDeploymentForModule :one +SELECT d.id, created_at, module_id, key, schema, labels, min_replicas, m.id, language, name +FROM deployments d + INNER JOIN modules m on d.module_id = m.id +WHERE m.name = $1 + AND min_replicas > 0 +LIMIT 1 +` + +type GetExistingDeploymentForModuleRow struct { + ID int64 + CreatedAt time.Time + ModuleID int64 + Key model.DeploymentKey + Schema *schema.Module + Labels json.RawMessage + MinReplicas int32 + ID_2 int64 + Language string + Name string +} + +func (q *Queries) GetExistingDeploymentForModule(ctx context.Context, name string) (GetExistingDeploymentForModuleRow, error) { + row := q.db.QueryRowContext(ctx, getExistingDeploymentForModule, name) + var i GetExistingDeploymentForModuleRow + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.ModuleID, + &i.Key, + &i.Schema, + &i.Labels, + &i.MinReplicas, + &i.ID_2, + &i.Language, + &i.Name, + ) + return i, err +} + +const getFSMInstance = `-- name: GetFSMInstance :one +SELECT id, created_at, fsm, key, status, current_state, destination_state, async_call_id, updated_at +FROM fsm_instances +WHERE fsm = $1::schema_ref AND key = $2 +` + +func (q *Queries) GetFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (FsmInstance, error) { + row := q.db.QueryRowContext(ctx, getFSMInstance, fsm, key) + var i FsmInstance + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.Fsm, + &i.Key, + &i.Status, + &i.CurrentState, + &i.DestinationState, + &i.AsyncCallID, + &i.UpdatedAt, + ) + return i, err +} + +const getIdleRunners = `-- name: GetIdleRunners :many +SELECT id, key, created, last_seen, reservation_timeout, state, endpoint, module_name, deployment_id, labels +FROM runners +WHERE labels @> $1::jsonb + AND state = 'idle' +LIMIT $2 +` + +func (q *Queries) GetIdleRunners(ctx context.Context, labels json.RawMessage, limit int64) ([]Runner, error) { + rows, err := q.db.QueryContext(ctx, getIdleRunners, labels, limit) + if err != nil { + return nil, err + } + defer rows.Close() + var items []Runner + for rows.Next() { + var i Runner + if err := rows.Scan( + &i.ID, + &i.Key, + &i.Created, + &i.LastSeen, + &i.ReservationTimeout, + &i.State, + &i.Endpoint, + &i.ModuleName, + &i.DeploymentID, + &i.Labels, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getIngressRoutes = `-- name: GetIngressRoutes :many +SELECT r.key AS runner_key, d.key AS deployment_key, endpoint, ir.path, ir.module, ir.verb +FROM ingress_routes ir + INNER JOIN runners r ON ir.deployment_id = r.deployment_id + INNER JOIN deployments d ON ir.deployment_id = d.id +WHERE r.state = 'assigned' + AND ir.method = $1 +` + +type GetIngressRoutesRow struct { + RunnerKey model.RunnerKey + DeploymentKey model.DeploymentKey + Endpoint string + Path string + Module string + Verb string +} + +// Get the runner endpoints corresponding to the given ingress route. +func (q *Queries) GetIngressRoutes(ctx context.Context, method string) ([]GetIngressRoutesRow, error) { + rows, err := q.db.QueryContext(ctx, getIngressRoutes, method) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetIngressRoutesRow + for rows.Next() { + var i GetIngressRoutesRow + if err := rows.Scan( + &i.RunnerKey, + &i.DeploymentKey, + &i.Endpoint, + &i.Path, + &i.Module, + &i.Verb, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getLeaseInfo = `-- name: GetLeaseInfo :one +SELECT expires_at, metadata FROM leases WHERE key = $1::lease_key +` + +type GetLeaseInfoRow struct { + ExpiresAt time.Time + Metadata pqtype.NullRawMessage +} + +func (q *Queries) GetLeaseInfo(ctx context.Context, key leases.Key) (GetLeaseInfoRow, error) { + row := q.db.QueryRowContext(ctx, getLeaseInfo, key) + var i GetLeaseInfoRow + err := row.Scan(&i.ExpiresAt, &i.Metadata) + return i, err +} + +const getModulesByID = `-- name: GetModulesByID :many +SELECT id, language, name +FROM modules +WHERE id = ANY ($1::BIGINT[]) +` + +func (q *Queries) GetModulesByID(ctx context.Context, ids []int64) ([]Module, error) { + rows, err := q.db.QueryContext(ctx, getModulesByID, pq.Array(ids)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []Module + for rows.Next() { + var i Module + if err := rows.Scan(&i.ID, &i.Language, &i.Name); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getNextEventForSubscription = `-- name: GetNextEventForSubscription :one +WITH cursor AS ( + SELECT + created_at, + id + FROM topic_events + WHERE "key" = $3::topic_event_key +) +SELECT events."key" as event, + events.payload, + events.created_at, + events.caller, + events.request_key, + events.trace_context, + NOW() - events.created_at >= $1::interval AS ready +FROM topics +LEFT JOIN topic_events as events ON events.topic_id = topics.id +WHERE topics.key = $2::topic_key + AND (events.created_at, events.id) > (SELECT COALESCE(MAX(cursor.created_at), '1900-01-01'), COALESCE(MAX(cursor.id), 0) FROM cursor) +ORDER BY events.created_at, events.id +LIMIT 1 +` + +type GetNextEventForSubscriptionRow struct { + Event optional.Option[model.TopicEventKey] + Payload []byte + CreatedAt optional.Option[time.Time] + Caller optional.Option[string] + RequestKey optional.Option[string] + TraceContext pqtype.NullRawMessage + Ready bool +} + +func (q *Queries) GetNextEventForSubscription(ctx context.Context, consumptionDelay sqltypes.Duration, topic model.TopicKey, cursor optional.Option[model.TopicEventKey]) (GetNextEventForSubscriptionRow, error) { + row := q.db.QueryRowContext(ctx, getNextEventForSubscription, consumptionDelay, topic, cursor) + var i GetNextEventForSubscriptionRow + err := row.Scan( + &i.Event, + &i.Payload, + &i.CreatedAt, + &i.Caller, + &i.RequestKey, + &i.TraceContext, + &i.Ready, + ) + return i, err +} + +const getOnlyEncryptionKey = `-- name: GetOnlyEncryptionKey :one +SELECT key +FROM encryption_keys +WHERE id = 1 +` + +func (q *Queries) GetOnlyEncryptionKey(ctx context.Context) ([]byte, error) { + row := q.db.QueryRowContext(ctx, getOnlyEncryptionKey) + var key []byte + err := row.Scan(&key) + return key, err +} + +const getProcessList = `-- name: GetProcessList :many +SELECT d.min_replicas, + d.key AS deployment_key, + d.labels deployment_labels, + r.key AS runner_key, + r.endpoint, + r.labels AS runner_labels +FROM deployments d + LEFT JOIN runners r on d.id = r.deployment_id AND r.state != 'dead' +WHERE d.min_replicas > 0 +ORDER BY d.key +` + +type GetProcessListRow struct { + MinReplicas int32 + DeploymentKey model.DeploymentKey + DeploymentLabels json.RawMessage + RunnerKey optional.Option[model.RunnerKey] + Endpoint optional.Option[string] + RunnerLabels pqtype.NullRawMessage +} + +func (q *Queries) GetProcessList(ctx context.Context) ([]GetProcessListRow, error) { + rows, err := q.db.QueryContext(ctx, getProcessList) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetProcessListRow + for rows.Next() { + var i GetProcessListRow + if err := rows.Scan( + &i.MinReplicas, + &i.DeploymentKey, + &i.DeploymentLabels, + &i.RunnerKey, + &i.Endpoint, + &i.RunnerLabels, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getRandomSubscriber = `-- name: GetRandomSubscriber :one +SELECT + subscribers.sink as sink, + subscribers.retry_attempts as retry_attempts, + subscribers.backoff as backoff, + subscribers.max_backoff as max_backoff, + subscribers.catch_verb as catch_verb +FROM topic_subscribers as subscribers +JOIN topic_subscriptions ON subscribers.topic_subscriptions_id = topic_subscriptions.id +WHERE topic_subscriptions.key = $1::subscription_key +ORDER BY RANDOM() +LIMIT 1 +` + +type GetRandomSubscriberRow struct { + Sink schema.RefKey + RetryAttempts int32 + Backoff sqltypes.Duration + MaxBackoff sqltypes.Duration + CatchVerb optional.Option[schema.RefKey] +} + +func (q *Queries) GetRandomSubscriber(ctx context.Context, key model.SubscriptionKey) (GetRandomSubscriberRow, error) { + row := q.db.QueryRowContext(ctx, getRandomSubscriber, key) + var i GetRandomSubscriberRow + err := row.Scan( + &i.Sink, + &i.RetryAttempts, + &i.Backoff, + &i.MaxBackoff, + &i.CatchVerb, + ) + return i, err +} + +const getRouteForRunner = `-- name: GetRouteForRunner :one +SELECT endpoint, r.key AS runner_key, r.module_name, d.key deployment_key, r.state +FROM runners r + LEFT JOIN deployments d on r.deployment_id = d.id +WHERE r.key = $1::runner_key +` + +type GetRouteForRunnerRow struct { + Endpoint string + RunnerKey model.RunnerKey + ModuleName optional.Option[string] + DeploymentKey optional.Option[model.DeploymentKey] + State RunnerState +} + +// Retrieve routing information for a runner. +func (q *Queries) GetRouteForRunner(ctx context.Context, key model.RunnerKey) (GetRouteForRunnerRow, error) { + row := q.db.QueryRowContext(ctx, getRouteForRunner, key) + var i GetRouteForRunnerRow + err := row.Scan( + &i.Endpoint, + &i.RunnerKey, + &i.ModuleName, + &i.DeploymentKey, + &i.State, + ) + return i, err +} + +const getRoutingTable = `-- name: GetRoutingTable :many +SELECT endpoint, r.key AS runner_key, r.module_name, d.key deployment_key +FROM runners r + LEFT JOIN deployments d on r.deployment_id = d.id +WHERE state = 'assigned' + AND (COALESCE(cardinality($1::TEXT[]), 0) = 0 + OR module_name = ANY ($1::TEXT[])) +` + +type GetRoutingTableRow struct { + Endpoint string + RunnerKey model.RunnerKey + ModuleName optional.Option[string] + DeploymentKey optional.Option[model.DeploymentKey] +} + +func (q *Queries) GetRoutingTable(ctx context.Context, modules []string) ([]GetRoutingTableRow, error) { + rows, err := q.db.QueryContext(ctx, getRoutingTable, pq.Array(modules)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetRoutingTableRow + for rows.Next() { + var i GetRoutingTableRow + if err := rows.Scan( + &i.Endpoint, + &i.RunnerKey, + &i.ModuleName, + &i.DeploymentKey, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getRunner = `-- name: GetRunner :one +SELECT DISTINCT ON (r.key) r.key AS runner_key, + r.endpoint, + r.state, + r.labels, + r.last_seen, + r.module_name, + COALESCE(CASE + WHEN r.deployment_id IS NOT NULL + THEN d.key END, NULL) AS deployment_key +FROM runners r + LEFT JOIN deployments d on d.id = r.deployment_id OR r.deployment_id IS NULL +WHERE r.key = $1::runner_key +` + +type GetRunnerRow struct { + RunnerKey model.RunnerKey + Endpoint string + State RunnerState + Labels json.RawMessage + LastSeen time.Time + ModuleName optional.Option[string] + DeploymentKey optional.Option[string] +} + +func (q *Queries) GetRunner(ctx context.Context, key model.RunnerKey) (GetRunnerRow, error) { + row := q.db.QueryRowContext(ctx, getRunner, key) + var i GetRunnerRow + err := row.Scan( + &i.RunnerKey, + &i.Endpoint, + &i.State, + &i.Labels, + &i.LastSeen, + &i.ModuleName, + &i.DeploymentKey, + ) + return i, err +} + +const getRunnerState = `-- name: GetRunnerState :one +SELECT state +FROM runners +WHERE key = $1::runner_key +` + +func (q *Queries) GetRunnerState(ctx context.Context, key model.RunnerKey) (RunnerState, error) { + row := q.db.QueryRowContext(ctx, getRunnerState, key) + var state RunnerState + err := row.Scan(&state) + return state, err +} + +const getRunnersForDeployment = `-- name: GetRunnersForDeployment :many +SELECT r.id, r.key, created, last_seen, reservation_timeout, state, endpoint, module_name, deployment_id, r.labels, d.id, created_at, module_id, d.key, schema, d.labels, min_replicas +FROM runners r + INNER JOIN deployments d on r.deployment_id = d.id +WHERE state = 'assigned' + AND d.key = $1::deployment_key +` + +type GetRunnersForDeploymentRow struct { + ID int64 + Key model.RunnerKey + Created time.Time + LastSeen time.Time + ReservationTimeout optional.Option[time.Time] + State RunnerState + Endpoint string + ModuleName optional.Option[string] + DeploymentID optional.Option[int64] + Labels json.RawMessage + ID_2 int64 + CreatedAt time.Time + ModuleID int64 + Key_2 model.DeploymentKey + Schema *schema.Module + Labels_2 json.RawMessage + MinReplicas int32 +} + +func (q *Queries) GetRunnersForDeployment(ctx context.Context, key model.DeploymentKey) ([]GetRunnersForDeploymentRow, error) { + rows, err := q.db.QueryContext(ctx, getRunnersForDeployment, key) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetRunnersForDeploymentRow + for rows.Next() { + var i GetRunnersForDeploymentRow + if err := rows.Scan( + &i.ID, + &i.Key, + &i.Created, + &i.LastSeen, + &i.ReservationTimeout, + &i.State, + &i.Endpoint, + &i.ModuleName, + &i.DeploymentID, + &i.Labels, + &i.ID_2, + &i.CreatedAt, + &i.ModuleID, + &i.Key_2, + &i.Schema, + &i.Labels_2, + &i.MinReplicas, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getSchemaForDeployment = `-- name: GetSchemaForDeployment :one +SELECT schema FROM deployments WHERE key = $1::deployment_key +` + +func (q *Queries) GetSchemaForDeployment(ctx context.Context, key model.DeploymentKey) (*schema.Module, error) { + row := q.db.QueryRowContext(ctx, getSchemaForDeployment, key) + var schema *schema.Module + err := row.Scan(&schema) + return schema, err +} + +const getSubscription = `-- name: GetSubscription :one +WITH module AS ( + SELECT id + FROM modules + WHERE name = $2::TEXT +) +SELECT id, key, created_at, topic_id, module_id, deployment_id, name, cursor, state +FROM topic_subscriptions +WHERE name = $1::TEXT + AND module_id = (SELECT id FROM module) +` + +func (q *Queries) GetSubscription(ctx context.Context, column1 string, column2 string) (TopicSubscription, error) { + row := q.db.QueryRowContext(ctx, getSubscription, column1, column2) + var i TopicSubscription + err := row.Scan( + &i.ID, + &i.Key, + &i.CreatedAt, + &i.TopicID, + &i.ModuleID, + &i.DeploymentID, + &i.Name, + &i.Cursor, + &i.State, + ) + return i, err +} + +const getSubscriptionsNeedingUpdate = `-- name: GetSubscriptionsNeedingUpdate :many +SELECT + subs.key::subscription_key as key, + curser.key as cursor, + topics.key::topic_key as topic, + subs.name +FROM topic_subscriptions subs +LEFT JOIN topics ON subs.topic_id = topics.id +LEFT JOIN topic_events curser ON subs.cursor = curser.id +WHERE subs.cursor IS DISTINCT FROM topics.head + AND subs.state = 'idle' +ORDER BY curser.created_at +LIMIT 3 +FOR UPDATE OF subs SKIP LOCKED +` + +type GetSubscriptionsNeedingUpdateRow struct { + Key model.SubscriptionKey + Cursor optional.Option[model.TopicEventKey] + Topic model.TopicKey + Name string +} + +// Results may not be ready to be scheduled yet due to event consumption delay +// Sorting ensures that brand new events (that may not be ready for consumption) +// don't prevent older events from being consumed +func (q *Queries) GetSubscriptionsNeedingUpdate(ctx context.Context) ([]GetSubscriptionsNeedingUpdateRow, error) { + rows, err := q.db.QueryContext(ctx, getSubscriptionsNeedingUpdate) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetSubscriptionsNeedingUpdateRow + for rows.Next() { + var i GetSubscriptionsNeedingUpdateRow + if err := rows.Scan( + &i.Key, + &i.Cursor, + &i.Topic, + &i.Name, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getTopic = `-- name: GetTopic :one +SELECT id, key, created_at, module_id, name, type, head +FROM topics +WHERE id = $1::BIGINT +` + +func (q *Queries) GetTopic(ctx context.Context, dollar_1 int64) (Topic, error) { + row := q.db.QueryRowContext(ctx, getTopic, dollar_1) + var i Topic + err := row.Scan( + &i.ID, + &i.Key, + &i.CreatedAt, + &i.ModuleID, + &i.Name, + &i.Type, + &i.Head, + ) + return i, err +} + +const getTopicEvent = `-- name: GetTopicEvent :one +SELECT id, created_at, key, topic_id, payload, caller, request_key, trace_context +FROM topic_events +WHERE id = $1::BIGINT +` + +func (q *Queries) GetTopicEvent(ctx context.Context, dollar_1 int64) (TopicEvent, error) { + row := q.db.QueryRowContext(ctx, getTopicEvent, dollar_1) + var i TopicEvent + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.Key, + &i.TopicID, + &i.Payload, + &i.Caller, + &i.RequestKey, + &i.TraceContext, + ) + return i, err +} + +const getUnscheduledCronJobs = `-- name: GetUnscheduledCronJobs :many +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution +FROM cron_jobs j + INNER JOIN deployments d on j.deployment_id = d.id +WHERE d.min_replicas > 0 + AND j.start_time < $1::TIMESTAMPTZ + AND ( + j.last_execution IS NULL + OR NOT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE + ac.cron_job_key = j.key + AND ac.scheduled_at > j.last_execution::TIMESTAMPTZ + ) + ) +FOR UPDATE SKIP LOCKED +` + +type GetUnscheduledCronJobsRow struct { + Key model.CronJobKey + DeploymentKey model.DeploymentKey + Module string + Verb string + Schedule string + StartTime time.Time + NextExecution time.Time + LastExecution optional.Option[time.Time] +} + +func (q *Queries) GetUnscheduledCronJobs(ctx context.Context, startTime time.Time) ([]GetUnscheduledCronJobsRow, error) { + rows, err := q.db.QueryContext(ctx, getUnscheduledCronJobs, startTime) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetUnscheduledCronJobsRow + for rows.Next() { + var i GetUnscheduledCronJobsRow + if err := rows.Scan( + &i.Key, + &i.DeploymentKey, + &i.Module, + &i.Verb, + &i.Schedule, + &i.StartTime, + &i.NextExecution, + &i.LastExecution, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const insertSubscriber = `-- name: InsertSubscriber :exec +INSERT INTO topic_subscribers ( + key, + topic_subscriptions_id, + deployment_id, + sink, + retry_attempts, + backoff, + max_backoff, + catch_verb +) +VALUES ( + $1::subscriber_key, + ( + SELECT topic_subscriptions.id as id + FROM topic_subscriptions + INNER JOIN modules ON topic_subscriptions.module_id = modules.id + WHERE modules.name = $2::TEXT + AND topic_subscriptions.name = $3::TEXT + ), + (SELECT id FROM deployments WHERE key = $4::deployment_key), + $5, + $6, + $7::interval, + $8::interval, + $9 +) +` + +type InsertSubscriberParams struct { + Key model.SubscriberKey + Module string + SubscriptionName string + Deployment model.DeploymentKey + Sink schema.RefKey + RetryAttempts int32 + Backoff sqltypes.Duration + MaxBackoff sqltypes.Duration + CatchVerb optional.Option[schema.RefKey] +} + +func (q *Queries) InsertSubscriber(ctx context.Context, arg InsertSubscriberParams) error { + _, err := q.db.ExecContext(ctx, insertSubscriber, + arg.Key, + arg.Module, + arg.SubscriptionName, + arg.Deployment, + arg.Sink, + arg.RetryAttempts, + arg.Backoff, + arg.MaxBackoff, + arg.CatchVerb, + ) + return err +} + +const insertTimelineCallEvent = `-- name: InsertTimelineCallEvent :exec +INSERT INTO timeline ( + deployment_id, + request_id, + parent_request_id, + time_stamp, + type, + custom_key_1, + custom_key_2, + custom_key_3, + custom_key_4, + payload +) +VALUES ( + (SELECT id FROM deployments WHERE deployments.key = $1::deployment_key), + (CASE + WHEN $2::TEXT IS NULL THEN NULL + ELSE (SELECT id FROM requests ir WHERE ir.key = $2::TEXT) + END), + (CASE + WHEN $3::TEXT IS NULL THEN NULL + ELSE (SELECT id FROM requests ir WHERE ir.key = $3::TEXT) + END), + $4::TIMESTAMPTZ, + 'call', + $5::TEXT, + $6::TEXT, + $7::TEXT, + $8::TEXT, + $9 +) +` + +type InsertTimelineCallEventParams struct { + DeploymentKey model.DeploymentKey + RequestKey optional.Option[string] + ParentRequestKey optional.Option[string] + TimeStamp time.Time + SourceModule optional.Option[string] + SourceVerb optional.Option[string] + DestModule string + DestVerb string + Payload []byte +} + +func (q *Queries) InsertTimelineCallEvent(ctx context.Context, arg InsertTimelineCallEventParams) error { + _, err := q.db.ExecContext(ctx, insertTimelineCallEvent, + arg.DeploymentKey, + arg.RequestKey, + arg.ParentRequestKey, + arg.TimeStamp, + arg.SourceModule, + arg.SourceVerb, + arg.DestModule, + arg.DestVerb, + arg.Payload, + ) + return err +} + +const insertTimelineDeploymentCreatedEvent = `-- name: InsertTimelineDeploymentCreatedEvent :exec +INSERT INTO timeline ( + deployment_id, + type, + custom_key_1, + custom_key_2, + payload +) +VALUES ( + ( + SELECT id + FROM deployments + WHERE deployments.key = $1::deployment_key + ), + 'deployment_created', + $2::TEXT, + $3::TEXT, + $4 +) +` + +type InsertTimelineDeploymentCreatedEventParams struct { + DeploymentKey model.DeploymentKey + Language string + ModuleName string + Payload []byte +} + +func (q *Queries) InsertTimelineDeploymentCreatedEvent(ctx context.Context, arg InsertTimelineDeploymentCreatedEventParams) error { + _, err := q.db.ExecContext(ctx, insertTimelineDeploymentCreatedEvent, + arg.DeploymentKey, + arg.Language, + arg.ModuleName, + arg.Payload, + ) + return err +} + +const insertTimelineDeploymentUpdatedEvent = `-- name: InsertTimelineDeploymentUpdatedEvent :exec +INSERT INTO timeline ( + deployment_id, + type, + custom_key_1, + custom_key_2, + payload +) +VALUES ( + ( + SELECT id + FROM deployments + WHERE deployments.key = $1::deployment_key + ), + 'deployment_updated', + $2::TEXT, + $3::TEXT, + $4 +) +` + +type InsertTimelineDeploymentUpdatedEventParams struct { + DeploymentKey model.DeploymentKey + Language string + ModuleName string + Payload []byte +} + +func (q *Queries) InsertTimelineDeploymentUpdatedEvent(ctx context.Context, arg InsertTimelineDeploymentUpdatedEventParams) error { + _, err := q.db.ExecContext(ctx, insertTimelineDeploymentUpdatedEvent, + arg.DeploymentKey, + arg.Language, + arg.ModuleName, + arg.Payload, + ) + return err +} + +const insertTimelineEvent = `-- name: InsertTimelineEvent :exec +INSERT INTO timeline (deployment_id, request_id, parent_request_id, type, + custom_key_1, custom_key_2, custom_key_3, custom_key_4, + payload) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) +RETURNING id +` + +type InsertTimelineEventParams struct { + DeploymentID int64 + RequestID optional.Option[int64] + ParentRequestID optional.Option[string] + Type EventType + CustomKey1 optional.Option[string] + CustomKey2 optional.Option[string] + CustomKey3 optional.Option[string] + CustomKey4 optional.Option[string] + Payload []byte +} + +func (q *Queries) InsertTimelineEvent(ctx context.Context, arg InsertTimelineEventParams) error { + _, err := q.db.ExecContext(ctx, insertTimelineEvent, + arg.DeploymentID, + arg.RequestID, + arg.ParentRequestID, + arg.Type, + arg.CustomKey1, + arg.CustomKey2, + arg.CustomKey3, + arg.CustomKey4, + arg.Payload, + ) + return err +} + +const insertTimelineLogEvent = `-- name: InsertTimelineLogEvent :exec +INSERT INTO timeline ( + deployment_id, + request_id, + time_stamp, + custom_key_1, + type, + payload +) +VALUES ( + (SELECT id FROM deployments d WHERE d.key = $1::deployment_key LIMIT 1), + ( + CASE + WHEN $2::TEXT IS NULL THEN NULL + ELSE (SELECT id FROM requests ir WHERE ir.key = $2::TEXT LIMIT 1) + END + ), + $3::TIMESTAMPTZ, + $4::INT, + 'log', + $5 +) +` + +type InsertTimelineLogEventParams struct { + DeploymentKey model.DeploymentKey + RequestKey optional.Option[string] + TimeStamp time.Time + Level int32 + Payload []byte +} + +func (q *Queries) InsertTimelineLogEvent(ctx context.Context, arg InsertTimelineLogEventParams) error { + _, err := q.db.ExecContext(ctx, insertTimelineLogEvent, + arg.DeploymentKey, + arg.RequestKey, + arg.TimeStamp, + arg.Level, + arg.Payload, + ) + return err +} + +const isCronJobPending = `-- name: IsCronJobPending :one +SELECT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE ac.cron_job_key = $1::cron_job_key + AND ac.scheduled_at > $2::TIMESTAMPTZ + AND ac.state = 'pending' +) AS pending +` + +func (q *Queries) IsCronJobPending(ctx context.Context, key model.CronJobKey, startTime time.Time) (bool, error) { + row := q.db.QueryRowContext(ctx, isCronJobPending, key, startTime) + var pending bool + err := row.Scan(&pending) + return pending, err +} + +const killStaleControllers = `-- name: KillStaleControllers :one +WITH matches AS ( + UPDATE controller + SET state = 'dead' + WHERE state <> 'dead' AND last_seen < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL + RETURNING 1) +SELECT COUNT(*) +FROM matches +` + +// Mark any controller entries that haven't been updated recently as dead. +func (q *Queries) KillStaleControllers(ctx context.Context, timeout sqltypes.Duration) (int64, error) { + row := q.db.QueryRowContext(ctx, killStaleControllers, timeout) + var count int64 + err := row.Scan(&count) + return count, err +} + +const killStaleRunners = `-- name: KillStaleRunners :one +WITH matches AS ( + UPDATE runners + SET state = 'dead', + deployment_id = NULL + WHERE state <> 'dead' AND last_seen < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL + RETURNING 1) +SELECT COUNT(*) +FROM matches +` + +func (q *Queries) KillStaleRunners(ctx context.Context, timeout sqltypes.Duration) (int64, error) { + row := q.db.QueryRowContext(ctx, killStaleRunners, timeout) + var count int64 + err := row.Scan(&count) + return count, err +} + +const loadAsyncCall = `-- name: LoadAsyncCall :one +SELECT id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context, cron_job_key +FROM async_calls +WHERE id = $1 +` + +func (q *Queries) LoadAsyncCall(ctx context.Context, id int64) (AsyncCall, error) { + row := q.db.QueryRowContext(ctx, loadAsyncCall, id) + var i AsyncCall + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.LeaseID, + &i.Verb, + &i.State, + &i.Origin, + &i.ScheduledAt, + &i.Request, + &i.Response, + &i.Error, + &i.RemainingAttempts, + &i.Backoff, + &i.MaxBackoff, + &i.CatchVerb, + &i.Catching, + &i.ParentRequestKey, + &i.TraceContext, + &i.CronJobKey, + ) + return i, err +} + +const newLease = `-- name: NewLease :one +INSERT INTO leases ( + idempotency_key, + key, + expires_at, + metadata +) +VALUES ( + gen_random_uuid(), + $1::lease_key, + (NOW() AT TIME ZONE 'utc') + $2::interval, + $3::JSONB +) +RETURNING idempotency_key +` + +func (q *Queries) NewLease(ctx context.Context, key leases.Key, ttl sqltypes.Duration, metadata pqtype.NullRawMessage) (uuid.UUID, error) { + row := q.db.QueryRowContext(ctx, newLease, key, ttl, metadata) + var idempotency_key uuid.UUID + err := row.Scan(&idempotency_key) + return idempotency_key, err +} + +const popNextFSMEvent = `-- name: PopNextFSMEvent :one +DELETE FROM fsm_next_event +WHERE fsm_instance_id = ( + SELECT id + FROM fsm_instances + WHERE fsm = $1::schema_ref AND key = $2 +) +RETURNING id, created_at, fsm_instance_id, next_state, request, request_type +` + +func (q *Queries) PopNextFSMEvent(ctx context.Context, fsm schema.RefKey, instanceKey string) (FsmNextEvent, error) { + row := q.db.QueryRowContext(ctx, popNextFSMEvent, fsm, instanceKey) + var i FsmNextEvent + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.FsmInstanceID, + &i.NextState, + &i.Request, + &i.RequestType, + ) + return i, err +} + +const publishEventForTopic = `-- name: PublishEventForTopic :exec +INSERT INTO topic_events ( + "key", + topic_id, + caller, + payload, + request_key, + trace_context + ) +VALUES ( + $1::topic_event_key, + ( + SELECT topics.id + FROM topics + INNER JOIN modules ON topics.module_id = modules.id + WHERE modules.name = $2::TEXT + AND topics.name = $3::TEXT + ), + $4::TEXT, + $5, + $6::TEXT, + $7::jsonb +) +` + +type PublishEventForTopicParams struct { + Key model.TopicEventKey + Module string + Topic string + Caller string + Payload []byte + RequestKey string + TraceContext json.RawMessage +} + +func (q *Queries) PublishEventForTopic(ctx context.Context, arg PublishEventForTopicParams) error { + _, err := q.db.ExecContext(ctx, publishEventForTopic, + arg.Key, + arg.Module, + arg.Topic, + arg.Caller, + arg.Payload, + arg.RequestKey, + arg.TraceContext, + ) + return err +} + +const releaseLease = `-- name: ReleaseLease :one +DELETE FROM leases +WHERE idempotency_key = $1 AND key = $2::lease_key +RETURNING true +` + +func (q *Queries) ReleaseLease(ctx context.Context, idempotencyKey uuid.UUID, key leases.Key) (bool, error) { + row := q.db.QueryRowContext(ctx, releaseLease, idempotencyKey, key) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const renewLease = `-- name: RenewLease :one +UPDATE leases +SET expires_at = (NOW() AT TIME ZONE 'utc') + $1::interval +WHERE idempotency_key = $2 AND key = $3::lease_key +RETURNING true +` + +func (q *Queries) RenewLease(ctx context.Context, ttl sqltypes.Duration, idempotencyKey uuid.UUID, key leases.Key) (bool, error) { + row := q.db.QueryRowContext(ctx, renewLease, ttl, idempotencyKey, key) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const reserveRunner = `-- name: ReserveRunner :one +UPDATE runners +SET state = 'reserved', + reservation_timeout = $1::timestamptz, + -- If a deployment is not found, then the deployment ID is -1 + -- and the update will fail due to a FK constraint. + deployment_id = COALESCE((SELECT id + FROM deployments d + WHERE d.key = $2::deployment_key + LIMIT 1), -1) +WHERE id = (SELECT id + FROM runners r + WHERE r.state = 'idle' + AND r.labels @> $3::jsonb + LIMIT 1 FOR UPDATE SKIP LOCKED) +RETURNING runners.id, runners.key, runners.created, runners.last_seen, runners.reservation_timeout, runners.state, runners.endpoint, runners.module_name, runners.deployment_id, runners.labels +` + +// Find an idle runner and reserve it for the given deployment. +func (q *Queries) ReserveRunner(ctx context.Context, reservationTimeout time.Time, deploymentKey model.DeploymentKey, labels json.RawMessage) (Runner, error) { + row := q.db.QueryRowContext(ctx, reserveRunner, reservationTimeout, deploymentKey, labels) + var i Runner + err := row.Scan( + &i.ID, + &i.Key, + &i.Created, + &i.LastSeen, + &i.ReservationTimeout, + &i.State, + &i.Endpoint, + &i.ModuleName, + &i.DeploymentID, + &i.Labels, + ) + return i, err +} + +const setDeploymentDesiredReplicas = `-- name: SetDeploymentDesiredReplicas :exec +UPDATE deployments +SET min_replicas = $2 +WHERE key = $1::deployment_key +RETURNING 1 +` + +func (q *Queries) SetDeploymentDesiredReplicas(ctx context.Context, key model.DeploymentKey, minReplicas int32) error { + _, err := q.db.ExecContext(ctx, setDeploymentDesiredReplicas, key, minReplicas) + return err +} + +const setNextFSMEvent = `-- name: SetNextFSMEvent :one +INSERT INTO fsm_next_event (fsm_instance_id, next_state, request, request_type) +VALUES ( + (SELECT id FROM fsm_instances WHERE fsm = $1::schema_ref AND key = $2), + $3, + $4, + $5::schema_type +) +RETURNING id +` + +type SetNextFSMEventParams struct { + Fsm schema.RefKey + InstanceKey string + Event schema.RefKey + Request []byte + RequestType Type +} + +func (q *Queries) SetNextFSMEvent(ctx context.Context, arg SetNextFSMEventParams) (int64, error) { + row := q.db.QueryRowContext(ctx, setNextFSMEvent, + arg.Fsm, + arg.InstanceKey, + arg.Event, + arg.Request, + arg.RequestType, + ) + var id int64 + err := row.Scan(&id) + return id, err +} + +const setSubscriptionCursor = `-- name: SetSubscriptionCursor :exec +WITH event AS ( + SELECT id, created_at, key, topic_id, payload + FROM topic_events + WHERE "key" = $2::topic_event_key +) +UPDATE topic_subscriptions +SET cursor = (SELECT id FROM event) +WHERE key = $1::subscription_key +` + +func (q *Queries) SetSubscriptionCursor(ctx context.Context, column1 model.SubscriptionKey, column2 model.TopicEventKey) error { + _, err := q.db.ExecContext(ctx, setSubscriptionCursor, column1, column2) + return err +} + +const startFSMTransition = `-- name: StartFSMTransition :one +INSERT INTO fsm_instances ( + fsm, + key, + destination_state, + async_call_id +) VALUES ( + $1, + $2, + $3::schema_ref, + $4::BIGINT +) +ON CONFLICT(fsm, key) DO +UPDATE SET + destination_state = $3::schema_ref, + async_call_id = $4::BIGINT, + updated_at = NOW() AT TIME ZONE 'utc' +WHERE + fsm_instances.async_call_id IS NULL + AND fsm_instances.destination_state IS NULL +RETURNING id, created_at, fsm, key, status, current_state, destination_state, async_call_id, updated_at +` + +type StartFSMTransitionParams struct { + Fsm schema.RefKey + Key string + DestinationState schema.RefKey + AsyncCallID int64 +} + +// Start a new FSM transition, populating the destination state and async call ID. +// +// "key" is the unique identifier for the FSM execution. +func (q *Queries) StartFSMTransition(ctx context.Context, arg StartFSMTransitionParams) (FsmInstance, error) { + row := q.db.QueryRowContext(ctx, startFSMTransition, + arg.Fsm, + arg.Key, + arg.DestinationState, + arg.AsyncCallID, + ) + var i FsmInstance + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.Fsm, + &i.Key, + &i.Status, + &i.CurrentState, + &i.DestinationState, + &i.AsyncCallID, + &i.UpdatedAt, + ) + return i, err +} + +const succeedAsyncCall = `-- name: SucceedAsyncCall :one +UPDATE async_calls +SET + state = 'success'::async_call_state, + response = $1, + error = null +WHERE id = $2 +RETURNING true +` + +func (q *Queries) SucceedAsyncCall(ctx context.Context, response []byte, iD int64) (bool, error) { + row := q.db.QueryRowContext(ctx, succeedAsyncCall, response, iD) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const succeedFSMInstance = `-- name: SucceedFSMInstance :one +UPDATE fsm_instances +SET + current_state = destination_state, + destination_state = NULL, + async_call_id = NULL, + status = 'completed'::fsm_status, + updated_at = NOW() AT TIME ZONE 'utc' +WHERE + fsm = $1::schema_ref AND key = $2::TEXT +RETURNING true +` + +func (q *Queries) SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) { + row := q.db.QueryRowContext(ctx, succeedFSMInstance, fsm, key) + var column_1 bool + err := row.Scan(&column_1) + return column_1, err +} + +const updateCronJobExecution = `-- name: UpdateCronJobExecution :exec +UPDATE cron_jobs + SET last_execution = $1::TIMESTAMPTZ, + next_execution = $2::TIMESTAMPTZ + WHERE key = $3::cron_job_key +` + +func (q *Queries) UpdateCronJobExecution(ctx context.Context, lastExecution time.Time, nextExecution time.Time, key model.CronJobKey) error { + _, err := q.db.ExecContext(ctx, updateCronJobExecution, lastExecution, nextExecution, key) + return err +} + +const upsertController = `-- name: UpsertController :one +INSERT INTO controller (key, endpoint) +VALUES ($1, $2) +ON CONFLICT (key) DO UPDATE SET state = 'live', + endpoint = $2, + last_seen = NOW() AT TIME ZONE 'utc' +RETURNING id +` + +func (q *Queries) UpsertController(ctx context.Context, key model.ControllerKey, endpoint string) (int64, error) { + row := q.db.QueryRowContext(ctx, upsertController, key, endpoint) + var id int64 + err := row.Scan(&id) + return id, err +} + +const upsertModule = `-- name: UpsertModule :one +INSERT INTO modules (language, name) +VALUES ($1, $2) +ON CONFLICT (name) DO UPDATE SET language = $1 +RETURNING id +` + +func (q *Queries) UpsertModule(ctx context.Context, language string, name string) (int64, error) { + row := q.db.QueryRowContext(ctx, upsertModule, language, name) + var id int64 + err := row.Scan(&id) + return id, err +} + +const upsertRunner = `-- name: UpsertRunner :one +WITH deployment_rel AS ( + SELECT CASE + WHEN $5::deployment_key IS NULL + THEN NULL + ELSE COALESCE((SELECT id + FROM deployments d + WHERE d.key = $5::deployment_key + LIMIT 1), -1) END AS id) +INSERT +INTO runners (key, endpoint, state, labels, deployment_id, last_seen) +VALUES ($1, + $2, + $3, + $4, + (SELECT id FROM deployment_rel), + NOW() AT TIME ZONE 'utc') +ON CONFLICT (key) DO UPDATE SET endpoint = $2, + state = $3, + labels = $4, + deployment_id = (SELECT id FROM deployment_rel), + last_seen = NOW() AT TIME ZONE 'utc' +RETURNING deployment_id +` + +type UpsertRunnerParams struct { + Key model.RunnerKey + Endpoint string + State RunnerState + Labels json.RawMessage + DeploymentKey optional.Option[model.DeploymentKey] +} + +// Upsert a runner and return the deployment ID that it is assigned to, if any. +// If the deployment key is null, then deployment_rel.id will be null, +// otherwise we try to retrieve the deployments.id using the key. If +// there is no corresponding deployment, then the deployment ID is -1 +// and the parent statement will fail due to a foreign key constraint. +func (q *Queries) UpsertRunner(ctx context.Context, arg UpsertRunnerParams) (optional.Option[int64], error) { + row := q.db.QueryRowContext(ctx, upsertRunner, + arg.Key, + arg.Endpoint, + arg.State, + arg.Labels, + arg.DeploymentKey, + ) + var deployment_id optional.Option[int64] + err := row.Scan(&deployment_id) + return deployment_id, err +} + +const upsertSubscription = `-- name: UpsertSubscription :one +INSERT INTO topic_subscriptions ( + key, + topic_id, + module_id, + deployment_id, + name) +VALUES ( + $1::subscription_key, + ( + SELECT topics.id as id + FROM topics + INNER JOIN modules ON topics.module_id = modules.id + WHERE modules.name = $2::TEXT + AND topics.name = $3::TEXT + ), + (SELECT id FROM modules WHERE name = $4::TEXT), + (SELECT id FROM deployments WHERE key = $5::deployment_key), + $6::TEXT +) +ON CONFLICT (name, module_id) DO +UPDATE SET + topic_id = excluded.topic_id, + deployment_id = (SELECT id FROM deployments WHERE key = $5::deployment_key) +RETURNING + id, + CASE + WHEN xmax = 0 THEN true + ELSE false + END AS inserted +` + +type UpsertSubscriptionParams struct { + Key model.SubscriptionKey + TopicModule string + TopicName string + Module string + Deployment model.DeploymentKey + Name string +} + +type UpsertSubscriptionRow struct { + ID int64 + Inserted bool +} + +func (q *Queries) UpsertSubscription(ctx context.Context, arg UpsertSubscriptionParams) (UpsertSubscriptionRow, error) { + row := q.db.QueryRowContext(ctx, upsertSubscription, + arg.Key, + arg.TopicModule, + arg.TopicName, + arg.Module, + arg.Deployment, + arg.Name, + ) + var i UpsertSubscriptionRow + err := row.Scan(&i.ID, &i.Inserted) + return i, err +} + +const upsertTopic = `-- name: UpsertTopic :exec +INSERT INTO topics (key, module_id, name, type) +VALUES ( + $1::topic_key, + (SELECT id FROM modules WHERE name = $2::TEXT LIMIT 1), + $3::TEXT, + $4::TEXT +) +ON CONFLICT (name, module_id) DO +UPDATE SET + type = $4::TEXT +RETURNING id +` + +type UpsertTopicParams struct { + Topic model.TopicKey + Module string + Name string + EventType string +} + +func (q *Queries) UpsertTopic(ctx context.Context, arg UpsertTopicParams) error { + _, err := q.db.ExecContext(ctx, upsertTopic, + arg.Topic, + arg.Module, + arg.Name, + arg.EventType, + ) + return err +} diff --git a/backend/controller/cronjobs/state.go b/backend/controller/cronjobs/state.go deleted file mode 100644 index cda09241d1..0000000000 --- a/backend/controller/cronjobs/state.go +++ /dev/null @@ -1,83 +0,0 @@ -package cronjobs - -import ( - "time" - - "github.com/alecthomas/types/optional" - - "github.com/TBD54566975/ftl/internal/model" - "github.com/TBD54566975/ftl/internal/slices" -) - -// state models the state of the cron job service's private state for scheduling jobs and reacting to events -type state struct { - jobs []model.CronJob - - // Used to determine if this controller is currently executing a job - executing map[string]bool - - // Newly created jobs should be attempted by the controller that created them until other controllers - // have a chance to resync their job lists and share responsibilities through the hash ring - newJobs map[string]time.Time - - // We delay any job attempts in case of db errors to avoid hammering the db in a tight loop - blockedUntil time.Time -} - -func (s *state) isExecutingInCurrentController(job model.CronJob) bool { - return s.executing[job.Key.String()] -} - -func (s *state) startedExecutingJob(job model.CronJob) { - s.executing[job.Key.String()] = true -} - -func (s *state) isJobTooNewForHashRing(job model.CronJob) bool { - if t, ok := s.newJobs[job.Key.String()]; ok { - if time.Since(t) < newJobHashRingOverrideInterval { - return true - } - delete(s.newJobs, job.Key.String()) - } - return false -} - -func (s *state) sync(jobs []model.CronJob, newDeploymentKey optional.Option[model.DeploymentKey]) { - s.jobs = make([]model.CronJob, len(jobs)) - copy(s.jobs, jobs) - for _, job := range s.jobs { - if job.State != model.CronJobStateExecuting { - delete(s.executing, job.Key.String()) - } - if newKey, ok := newDeploymentKey.Get(); ok && job.DeploymentKey.String() == newKey.String() { - // This job is new and should be attempted by the current controller - s.newJobs[job.Key.String()] = time.Now() - } - } -} - -func (s *state) updateJobs(jobs []model.CronJob) { - updatedJobMap := jobMap(jobs) - for idx, old := range s.jobs { - if updated, exists := updatedJobMap[old.Key.String()]; exists { - s.jobs[idx] = updated - if updated.State != model.CronJobStateExecuting { - delete(s.executing, updated.Key.String()) - } - } - } -} - -func (s *state) removeDeploymentKey(key model.DeploymentKey) { - s.jobs = slices.Filter(s.jobs, func(j model.CronJob) bool { - return j.DeploymentKey.String() != key.String() - }) -} - -func jobMap(jobs []model.CronJob) map[string]model.CronJob { - m := map[string]model.CronJob{} - for _, job := range jobs { - m[job.Key.String()] = job - } - return m -} diff --git a/backend/controller/dal/async_calls.go b/backend/controller/dal/async_calls.go index e4ab2f4688..277189e683 100644 --- a/backend/controller/dal/async_calls.go +++ b/backend/controller/dal/async_calls.go @@ -22,7 +22,7 @@ type asyncOriginParseRoot struct { } var asyncOriginParser = participle.MustBuild[asyncOriginParseRoot]( - participle.Union[AsyncOrigin](AsyncOriginFSM{}, AsyncOriginPubSub{}), + participle.Union[AsyncOrigin](AsyncOriginCron{}, AsyncOriginFSM{}, AsyncOriginPubSub{}), ) // AsyncOrigin is a sum type representing the originator of an async call. @@ -35,6 +35,19 @@ type AsyncOrigin interface { String() string } +// AsyncOriginCron represents the context for the originator of a cron async call. +// +// It is in the form cron:. +type AsyncOriginCron struct { + CronJobKey string `parser:"'cron' ':' @(~EOF)+"` +} + +var _ AsyncOrigin = AsyncOriginCron{} + +func (AsyncOriginCron) asyncOrigin() {} +func (a AsyncOriginCron) Origin() string { return "cron" } +func (a AsyncOriginCron) String() string { return fmt.Sprintf("cron:%s", a.CronJobKey) } + // AsyncOriginFSM represents the context for the originator of an FSM async call. // // It is in the form fsm:.: diff --git a/backend/controller/sql/models.go b/backend/controller/sql/models.go index 0c59dbef5c..883d140976 100644 --- a/backend/controller/sql/models.go +++ b/backend/controller/sql/models.go @@ -388,6 +388,7 @@ type AsyncCall struct { Catching bool ParentRequestKey optional.Option[string] TraceContext pqtype.NullRawMessage + CronJobKey optional.Option[model.CronJobKey] } type Controller struct { @@ -407,8 +408,8 @@ type CronJob struct { Schedule string StartTime time.Time NextExecution time.Time - State model.CronJobState ModuleName string + LastExecution optional.Option[time.Time] } type Deployment struct { diff --git a/backend/controller/sql/querier.go b/backend/controller/sql/querier.go index a48ae4034b..19868fd5b9 100644 --- a/backend/controller/sql/querier.go +++ b/backend/controller/sql/querier.go @@ -38,7 +38,6 @@ type Querier interface { DeleteSubscribers(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriberKey, error) DeleteSubscriptions(ctx context.Context, deployment model.DeploymentKey) ([]model.SubscriptionKey, error) DeregisterRunner(ctx context.Context, key model.RunnerKey) (int64, error) - EndCronJob(ctx context.Context, nextExecution time.Time, key model.CronJobKey, startTime time.Time) (EndCronJobRow, error) ExpireLeases(ctx context.Context) (int64, error) ExpireRunnerReservations(ctx context.Context) (int64, error) FailAsyncCall(ctx context.Context, error string, iD int64) (bool, error) @@ -54,7 +53,7 @@ type Querier interface { GetArtefactContentRange(ctx context.Context, start int32, count int32, iD int64) ([]byte, error) // Return the digests that exist in the database. GetArtefactDigests(ctx context.Context, digests [][]byte) ([]GetArtefactDigestsRow, error) - GetCronJobs(ctx context.Context) ([]GetCronJobsRow, error) + GetCronJobByKey(ctx context.Context, key model.CronJobKey) (GetCronJobByKeyRow, error) GetDeployment(ctx context.Context, key model.DeploymentKey) (GetDeploymentRow, error) // Get all artefacts matching the given digests. GetDeploymentArtefacts(ctx context.Context, deploymentID int64) ([]GetDeploymentArtefactsRow, error) @@ -82,7 +81,6 @@ type Querier interface { GetRunnerState(ctx context.Context, key model.RunnerKey) (RunnerState, error) GetRunnersForDeployment(ctx context.Context, key model.DeploymentKey) ([]GetRunnersForDeploymentRow, error) GetSchemaForDeployment(ctx context.Context, key model.DeploymentKey) (*schema.Module, error) - GetStaleCronJobs(ctx context.Context, dollar_1 sqltypes.Duration) ([]GetStaleCronJobsRow, error) GetSubscription(ctx context.Context, column1 string, column2 string) (TopicSubscription, error) // Results may not be ready to be scheduled yet due to event consumption delay // Sorting ensures that brand new events (that may not be ready for consumption) @@ -90,12 +88,14 @@ type Querier interface { GetSubscriptionsNeedingUpdate(ctx context.Context) ([]GetSubscriptionsNeedingUpdateRow, error) GetTopic(ctx context.Context, dollar_1 int64) (Topic, error) GetTopicEvent(ctx context.Context, dollar_1 int64) (TopicEvent, error) + GetUnscheduledCronJobs(ctx context.Context, startTime time.Time) ([]GetUnscheduledCronJobsRow, error) InsertSubscriber(ctx context.Context, arg InsertSubscriberParams) error InsertTimelineCallEvent(ctx context.Context, arg InsertTimelineCallEventParams) error InsertTimelineDeploymentCreatedEvent(ctx context.Context, arg InsertTimelineDeploymentCreatedEventParams) error InsertTimelineDeploymentUpdatedEvent(ctx context.Context, arg InsertTimelineDeploymentUpdatedEventParams) error InsertTimelineEvent(ctx context.Context, arg InsertTimelineEventParams) error InsertTimelineLogEvent(ctx context.Context, arg InsertTimelineLogEventParams) error + IsCronJobPending(ctx context.Context, key model.CronJobKey, startTime time.Time) (bool, error) // Mark any controller entries that haven't been updated recently as dead. KillStaleControllers(ctx context.Context, timeout sqltypes.Duration) (int64, error) KillStaleRunners(ctx context.Context, timeout sqltypes.Duration) (int64, error) @@ -110,13 +110,13 @@ type Querier interface { SetDeploymentDesiredReplicas(ctx context.Context, key model.DeploymentKey, minReplicas int32) error SetNextFSMEvent(ctx context.Context, arg SetNextFSMEventParams) (int64, error) SetSubscriptionCursor(ctx context.Context, column1 model.SubscriptionKey, column2 model.TopicEventKey) error - StartCronJobs(ctx context.Context, keys []string) ([]StartCronJobsRow, error) // Start a new FSM transition, populating the destination state and async call ID. // // "key" is the unique identifier for the FSM execution. StartFSMTransition(ctx context.Context, arg StartFSMTransitionParams) (FsmInstance, error) SucceedAsyncCall(ctx context.Context, response []byte, iD int64) (bool, error) SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) + UpdateCronJobExecution(ctx context.Context, lastExecution time.Time, nextExecution time.Time, key model.CronJobKey) error UpsertController(ctx context.Context, key model.ControllerKey, endpoint string) (int64, error) UpsertModule(ctx context.Context, language string, name string) (int64, error) // Upsert a runner and return the deployment ID that it is assigned to, if any. diff --git a/backend/controller/sql/queries.sql b/backend/controller/sql/queries.sql index 705667f21c..81fdfd1f2f 100644 --- a/backend/controller/sql/queries.sql +++ b/backend/controller/sql/queries.sql @@ -471,6 +471,7 @@ SELECT expires_at, metadata FROM leases WHERE key = @key::lease_key; -- name: CreateAsyncCall :one INSERT INTO async_calls ( + scheduled_at, verb, origin, request, @@ -479,9 +480,11 @@ INSERT INTO async_calls ( max_backoff, catch_verb, parent_request_key, - trace_context + trace_context, + cron_job_key ) VALUES ( + @scheduled_at::TIMESTAMPTZ, @verb, @origin, @request, @@ -490,7 +493,8 @@ VALUES ( @max_backoff::interval, @catch_verb, @parent_request_key, - @trace_context::jsonb + @trace_context::jsonb, + @cron_job_key ) RETURNING id; diff --git a/backend/controller/sql/queries.sql.go b/backend/controller/sql/queries.sql.go index 84d5ca7f5d..bc48f6fb4a 100644 --- a/backend/controller/sql/queries.sql.go +++ b/backend/controller/sql/queries.sql.go @@ -190,6 +190,7 @@ func (q *Queries) CreateArtefact(ctx context.Context, digest []byte, content []b const createAsyncCall = `-- name: CreateAsyncCall :one INSERT INTO async_calls ( + scheduled_at, verb, origin, request, @@ -198,23 +199,27 @@ INSERT INTO async_calls ( max_backoff, catch_verb, parent_request_key, - trace_context + trace_context, + cron_job_key ) VALUES ( - $1, + $1::TIMESTAMPTZ, $2, $3, $4, - $5::interval, + $5, $6::interval, - $7, + $7::interval, $8, - $9::jsonb + $9, + $10::jsonb, + $11 ) RETURNING id ` type CreateAsyncCallParams struct { + ScheduledAt time.Time Verb schema.RefKey Origin string Request []byte @@ -224,10 +229,12 @@ type CreateAsyncCallParams struct { CatchVerb optional.Option[schema.RefKey] ParentRequestKey optional.Option[string] TraceContext json.RawMessage + CronJobKey optional.Option[model.CronJobKey] } func (q *Queries) CreateAsyncCall(ctx context.Context, arg CreateAsyncCallParams) (int64, error) { row := q.db.QueryRowContext(ctx, createAsyncCall, + arg.ScheduledAt, arg.Verb, arg.Origin, arg.Request, @@ -237,6 +244,7 @@ func (q *Queries) CreateAsyncCall(ctx context.Context, arg CreateAsyncCallParams arg.CatchVerb, arg.ParentRequestKey, arg.TraceContext, + arg.CronJobKey, ) var id int64 err := row.Scan(&id) @@ -434,49 +442,6 @@ func (q *Queries) DeregisterRunner(ctx context.Context, key model.RunnerKey) (in return count, err } -const endCronJob = `-- name: EndCronJob :one -WITH j AS ( -UPDATE cron_jobs - SET state = 'idle', - next_execution = $1::TIMESTAMPTZ - WHERE key = $2::cron_job_key - AND state = 'executing' - AND start_time = $3::TIMESTAMPTZ - RETURNING id, key, deployment_id, verb, schedule, start_time, next_execution, state, module_name -) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state - FROM j - INNER JOIN deployments d on j.deployment_id = d.id - LIMIT 1 -` - -type EndCronJobRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState -} - -func (q *Queries) EndCronJob(ctx context.Context, nextExecution time.Time, key model.CronJobKey, startTime time.Time) (EndCronJobRow, error) { - row := q.db.QueryRowContext(ctx, endCronJob, nextExecution, key, startTime) - var i EndCronJobRow - err := row.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, - ) - return i, err -} - const expireLeases = `-- name: ExpireLeases :one WITH expired AS ( DELETE FROM leases @@ -536,7 +501,7 @@ WITH updated AS ( SET state = 'error'::async_call_state, error = $7::TEXT WHERE id = $8::BIGINT - RETURNING id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context + RETURNING id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context, cron_job_key ) INSERT INTO async_calls ( verb, @@ -900,14 +865,15 @@ func (q *Queries) GetArtefactDigests(ctx context.Context, digests [][]byte) ([]G return items, nil } -const getCronJobs = `-- name: GetCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state +const getCronJobByKey = `-- name: GetCronJobByKey :one +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution FROM cron_jobs j INNER JOIN deployments d on j.deployment_id = d.id -WHERE d.min_replicas > 0 +WHERE j.key = $1::cron_job_key +FOR UPDATE SKIP LOCKED ` -type GetCronJobsRow struct { +type GetCronJobByKeyRow struct { Key model.CronJobKey DeploymentKey model.DeploymentKey Module string @@ -915,39 +881,23 @@ type GetCronJobsRow struct { Schedule string StartTime time.Time NextExecution time.Time - State model.CronJobState + LastExecution optional.Option[time.Time] } -func (q *Queries) GetCronJobs(ctx context.Context) ([]GetCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, getCronJobs) - if err != nil { - return nil, err - } - defer rows.Close() - var items []GetCronJobsRow - for rows.Next() { - var i GetCronJobsRow - if err := rows.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, - ); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Close(); err != nil { - return nil, err - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil +func (q *Queries) GetCronJobByKey(ctx context.Context, key model.CronJobKey) (GetCronJobByKeyRow, error) { + row := q.db.QueryRowContext(ctx, getCronJobByKey, key) + var i GetCronJobByKeyRow + err := row.Scan( + &i.Key, + &i.DeploymentKey, + &i.Module, + &i.Verb, + &i.Schedule, + &i.StartTime, + &i.NextExecution, + &i.LastExecution, + ) + return i, err } const getDeployment = `-- name: GetDeployment :one @@ -1769,57 +1719,6 @@ func (q *Queries) GetSchemaForDeployment(ctx context.Context, key model.Deployme return schema, err } -const getStaleCronJobs = `-- name: GetStaleCronJobs :many -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.state -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id -WHERE state = 'executing' - AND start_time < (NOW() AT TIME ZONE 'utc') - $1::INTERVAL -` - -type GetStaleCronJobsRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState -} - -func (q *Queries) GetStaleCronJobs(ctx context.Context, dollar_1 sqltypes.Duration) ([]GetStaleCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, getStaleCronJobs, dollar_1) - if err != nil { - return nil, err - } - defer rows.Close() - var items []GetStaleCronJobsRow - for rows.Next() { - var i GetStaleCronJobsRow - if err := rows.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, - ); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Close(); err != nil { - return nil, err - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil -} - const getSubscription = `-- name: GetSubscription :one WITH module AS ( SELECT id @@ -1946,6 +1845,68 @@ func (q *Queries) GetTopicEvent(ctx context.Context, dollar_1 int64) (TopicEvent return i, err } +const getUnscheduledCronJobs = `-- name: GetUnscheduledCronJobs :many +SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, j.start_time, j.next_execution, j.last_execution +FROM cron_jobs j + INNER JOIN deployments d on j.deployment_id = d.id +WHERE d.min_replicas > 0 + AND j.start_time < $1::TIMESTAMPTZ + AND ( + j.last_execution IS NULL + OR NOT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE + ac.cron_job_key = j.key + AND ac.scheduled_at > j.last_execution::TIMESTAMPTZ + ) + ) +FOR UPDATE SKIP LOCKED +` + +type GetUnscheduledCronJobsRow struct { + Key model.CronJobKey + DeploymentKey model.DeploymentKey + Module string + Verb string + Schedule string + StartTime time.Time + NextExecution time.Time + LastExecution optional.Option[time.Time] +} + +func (q *Queries) GetUnscheduledCronJobs(ctx context.Context, startTime time.Time) ([]GetUnscheduledCronJobsRow, error) { + rows, err := q.db.QueryContext(ctx, getUnscheduledCronJobs, startTime) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetUnscheduledCronJobsRow + for rows.Next() { + var i GetUnscheduledCronJobsRow + if err := rows.Scan( + &i.Key, + &i.DeploymentKey, + &i.Module, + &i.Verb, + &i.Schedule, + &i.StartTime, + &i.NextExecution, + &i.LastExecution, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const insertSubscriber = `-- name: InsertSubscriber :exec INSERT INTO topic_subscribers ( key, @@ -2216,6 +2177,23 @@ func (q *Queries) InsertTimelineLogEvent(ctx context.Context, arg InsertTimeline return err } +const isCronJobPending = `-- name: IsCronJobPending :one +SELECT EXISTS ( + SELECT 1 + FROM async_calls ac + WHERE ac.cron_job_key = $1::cron_job_key + AND ac.scheduled_at > $2::TIMESTAMPTZ + AND ac.state = 'pending' +) AS pending +` + +func (q *Queries) IsCronJobPending(ctx context.Context, key model.CronJobKey, startTime time.Time) (bool, error) { + row := q.db.QueryRowContext(ctx, isCronJobPending, key, startTime) + var pending bool + err := row.Scan(&pending) + return pending, err +} + const killStaleControllers = `-- name: KillStaleControllers :one WITH matches AS ( UPDATE controller @@ -2253,7 +2231,7 @@ func (q *Queries) KillStaleRunners(ctx context.Context, timeout sqltypes.Duratio } const loadAsyncCall = `-- name: LoadAsyncCall :one -SELECT id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context +SELECT id, created_at, lease_id, verb, state, origin, scheduled_at, request, response, error, remaining_attempts, backoff, max_backoff, catch_verb, catching, parent_request_key, trace_context, cron_job_key FROM async_calls WHERE id = $1 ` @@ -2279,6 +2257,7 @@ func (q *Queries) LoadAsyncCall(ctx context.Context, id int64) (AsyncCall, error &i.Catching, &i.ParentRequestKey, &i.TraceContext, + &i.CronJobKey, ) return i, err } @@ -2502,75 +2481,6 @@ func (q *Queries) SetSubscriptionCursor(ctx context.Context, column1 model.Subsc return err } -const startCronJobs = `-- name: StartCronJobs :many -WITH updates AS ( - UPDATE cron_jobs - SET state = 'executing', - start_time = (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - WHERE key = ANY ($1) - AND state = 'idle' - AND start_time < next_execution - AND (next_execution AT TIME ZONE 'utc') < (NOW() AT TIME ZONE 'utc')::TIMESTAMPTZ - RETURNING id, key, state, start_time, next_execution) -SELECT j.key as key, d.key as deployment_key, j.module_name as module, j.verb, j.schedule, - COALESCE(u.start_time, j.start_time) as start_time, - COALESCE(u.next_execution, j.next_execution) as next_execution, - COALESCE(u.state, j.state) as state, - d.min_replicas > 0 as has_min_replicas, - CASE WHEN u.key IS NULL THEN FALSE ELSE TRUE END as updated -FROM cron_jobs j - INNER JOIN deployments d on j.deployment_id = d.id - LEFT JOIN updates u on j.id = u.id -WHERE j.key = ANY ($1) -` - -type StartCronJobsRow struct { - Key model.CronJobKey - DeploymentKey model.DeploymentKey - Module string - Verb string - Schedule string - StartTime time.Time - NextExecution time.Time - State model.CronJobState - HasMinReplicas bool - Updated bool -} - -func (q *Queries) StartCronJobs(ctx context.Context, keys []string) ([]StartCronJobsRow, error) { - rows, err := q.db.QueryContext(ctx, startCronJobs, pq.Array(keys)) - if err != nil { - return nil, err - } - defer rows.Close() - var items []StartCronJobsRow - for rows.Next() { - var i StartCronJobsRow - if err := rows.Scan( - &i.Key, - &i.DeploymentKey, - &i.Module, - &i.Verb, - &i.Schedule, - &i.StartTime, - &i.NextExecution, - &i.State, - &i.HasMinReplicas, - &i.Updated, - ); err != nil { - return nil, err - } - items = append(items, i) - } - if err := rows.Close(); err != nil { - return nil, err - } - if err := rows.Err(); err != nil { - return nil, err - } - return items, nil -} - const startFSMTransition = `-- name: StartFSMTransition :one INSERT INTO fsm_instances ( fsm, @@ -2663,6 +2573,18 @@ func (q *Queries) SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key return column_1, err } +const updateCronJobExecution = `-- name: UpdateCronJobExecution :exec +UPDATE cron_jobs + SET last_execution = $1::TIMESTAMPTZ, + next_execution = $2::TIMESTAMPTZ + WHERE key = $3::cron_job_key +` + +func (q *Queries) UpdateCronJobExecution(ctx context.Context, lastExecution time.Time, nextExecution time.Time, key model.CronJobKey) error { + _, err := q.db.ExecContext(ctx, updateCronJobExecution, lastExecution, nextExecution, key) + return err +} + const upsertController = `-- name: UpsertController :one INSERT INTO controller (key, endpoint) VALUES ($1, $2) diff --git a/backend/controller/sql/schema/20240815164808_async_calls_cron_job_key.sql b/backend/controller/sql/schema/20240815164808_async_calls_cron_job_key.sql new file mode 100644 index 0000000000..fe4bf470ba --- /dev/null +++ b/backend/controller/sql/schema/20240815164808_async_calls_cron_job_key.sql @@ -0,0 +1,22 @@ +-- migrate:up + +ALTER TABLE async_calls + ADD COLUMN cron_job_key cron_job_key; + +ALTER TABLE async_calls + ADD CONSTRAINT fk_async_calls_cron_job_key + FOREIGN KEY (cron_job_key) REFERENCES cron_jobs(key) + ON DELETE SET NULL; + +CREATE INDEX idx_async_calls_cron_job_key + ON async_calls (cron_job_key); + +CREATE INDEX idx_async_calls_cron_job_key_scheduled_at + ON async_calls (cron_job_key, scheduled_at); + +ALTER TABLE cron_jobs + DROP COLUMN state, + ADD COLUMN last_execution TIMESTAMPTZ; + +-- migrate:down + diff --git a/common/configuration/sql/models.go b/common/configuration/sql/models.go index 0c59dbef5c..883d140976 100644 --- a/common/configuration/sql/models.go +++ b/common/configuration/sql/models.go @@ -388,6 +388,7 @@ type AsyncCall struct { Catching bool ParentRequestKey optional.Option[string] TraceContext pqtype.NullRawMessage + CronJobKey optional.Option[model.CronJobKey] } type Controller struct { @@ -407,8 +408,8 @@ type CronJob struct { Schedule string StartTime time.Time NextExecution time.Time - State model.CronJobState ModuleName string + LastExecution optional.Option[time.Time] } type Deployment struct { diff --git a/internal/model/cron_job.go b/internal/model/cron_job.go index 8b190f437e..8420616d26 100644 --- a/internal/model/cron_job.go +++ b/internal/model/cron_job.go @@ -4,13 +4,7 @@ import ( "time" "github.com/TBD54566975/ftl/backend/schema" -) - -type CronJobState string - -const ( - CronJobStateIdle = "idle" - CronJobStateExecuting = "executing" + "github.com/alecthomas/types/optional" ) type CronJob struct { @@ -20,5 +14,5 @@ type CronJob struct { Schedule string StartTime time.Time NextExecution time.Time - State CronJobState + LastExecution optional.Option[time.Time] } diff --git a/sqlc.yaml b/sqlc.yaml index d1794ddf46..6daed08f5f 100644 --- a/sqlc.yaml +++ b/sqlc.yaml @@ -141,11 +141,135 @@ sql: # - postgresql-query-too-costly - postgresql-no-seq-scan - <<: *daldir - queries: "backend/controller/cronjobs/sql/queries.sql" + queries: + - backend/controller/cronjobs/sql/queries.sql + # FIXME: We need to create async calls in the same transaction + - backend/controller/sql/queries.sql gen: go: <<: *gengo out: "backend/controller/cronjobs/sql" + overrides: + - db_type: "uuid" + go_type: "github.com/google/uuid.UUID" + - db_type: "uuid" + nullable: true + go_type: + type: "optional.Option[uuid.UUID]" + - db_type: "timestamptz" + go_type: "time.Time" + - db_type: "pg_catalog.interval" + go_type: "github.com/TBD54566975/ftl/backend/controller/sql/sqltypes.Duration" + - db_type: "pg_catalog.interval" + nullable: true + go_type: + type: "optional.Option[sqltypes.Duration]" + - db_type: "module_schema_pb" + go_type: "*github.com/TBD54566975/ftl/backend/schema.Module" + - db_type: "timestamptz" + nullable: true + go_type: + type: "optional.Option[time.Time]" + - db_type: "pg_catalog.varchar" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[string]" + - db_type: "runner_key" + go_type: "github.com/TBD54566975/ftl/internal/model.RunnerKey" + - db_type: "runner_key" + nullable: true + go_type: + type: "optional.Option[model.RunnerKey]" + - db_type: "schema_ref" + go_type: "github.com/TBD54566975/ftl/backend/schema.RefKey" + - db_type: "schema_ref" + nullable: true + go_type: + type: "optional.Option[schema.RefKey]" + - db_type: "schema_type" + go_type: + type: "Type" + - db_type: "schema_type" + nullable: true + go_type: + type: "optional.Option[Type]" + - db_type: "cron_job_key" + go_type: "github.com/TBD54566975/ftl/internal/model.CronJobKey" + - db_type: "cron_job_key" + nullable: true + go_type: + type: "optional.Option[model.CronJobKey]" + - db_type: "lease_key" + go_type: "github.com/TBD54566975/ftl/backend/controller/leases.Key" + - db_type: "lease_key" + nullable: true + go_type: + type: "optional.Option[leases.Key]" + - db_type: "deployment_key" + go_type: "github.com/TBD54566975/ftl/internal/model.DeploymentKey" + - db_type: "deployment_key" + nullable: true + go_type: + type: "optional.Option[model.DeploymentKey]" + - db_type: "cron_job_state" + go_type: "github.com/TBD54566975/ftl/internal/model.CronJobState" + - db_type: "controller_key" + go_type: "github.com/TBD54566975/ftl/internal/model.ControllerKey" + - db_type: "request_key" + go_type: "github.com/TBD54566975/ftl/internal/model.RequestKey" + - db_type: "request_key" + nullable: true + go_type: + type: "optional.Option[model.RequestKey]" + - db_type: "topic_key" + go_type: "github.com/TBD54566975/ftl/internal/model.TopicKey" + - db_type: "topic_key" + nullable: true + go_type: + type: "optional.Option[model.TopicKey]" + - db_type: "subscription_key" + go_type: "github.com/TBD54566975/ftl/internal/model.SubscriptionKey" + - db_type: "subscription_key" + nullable: true + go_type: + type: "optional.Option[model.SubscriptionKey]" + - db_type: "subscriber_key" + go_type: "github.com/TBD54566975/ftl/internal/model.SubscriberKey" + - db_type: "subscriber_key" + nullable: true + go_type: + type: "optional.Option[model.SubscriberKey]" + - db_type: "topic_event_key" + go_type: "github.com/TBD54566975/ftl/internal/model.TopicEventKey" + - db_type: "topic_event_key" + nullable: true + go_type: + type: "optional.Option[model.TopicEventKey]" + - db_type: "text" + go_type: "string" + - db_type: "text" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[string]" + - db_type: "bytes" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[[]byte]" + - db_type: "pg_catalog.int8" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[int64]" + - db_type: "bigint" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[int64]" + - db_type: "int" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[int32]" + - db_type: "bool" + nullable: true + go_type: "github.com/alecthomas/types/optional.Option[bool]" + - column: "controller.key" + go_type: "github.com/TBD54566975/ftl/internal/model.ControllerKey" + - column: "deployments.name" + go_type: "github.com/TBD54566975/ftl/internal/model.DeploymentKey" + - column: "events.payload" + go_type: "encoding/json.RawMessage" - <<: *daldir queries: "common/configuration/sql/queries.sql" gen: