Skip to content

Commit

Permalink
fix: case where a runner ends up reserved without a timeout (#393)
Browse files Browse the repository at this point in the history
I believe this was being triggered when a controller creates a
reservation transaction, issues the reservation to a runner, then dies
before being able to complete the transaction. This leaves the runner in
a state where it is telling controllers it's reserved, without having
set a reservation_timeout, as the only place that was previously set was
during the reservation transaction. The UpsertRunner calls were not
setting this.

To work around this, and another potential issue, I've added two
triggers:

1. When a runner is set to the reserved state with a NULL
reservation_timeout, set it to a default of 2m.
2. Update runner.module_name whenever runner.deployment_id is set or
unset.

Fixes #392
  • Loading branch information
alecthomas authored Sep 18, 2023
1 parent 87345f8 commit a55de4a
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 19 deletions.
3 changes: 3 additions & 0 deletions backend/controller/internal/dal/dal.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,9 @@ func (d *DAL) DeregisterRunner(ctx context.Context, key model.RunnerKey) error {
return nil
}

// ReserveRunnerForDeployment reserves a runner for the given deployment.
//
// It returns a Reservation that must be committed or rolled back.
func (d *DAL) ReserveRunnerForDeployment(ctx context.Context, deployment model.DeploymentName, reservationTimeout time.Duration, labels model.Labels) (Reservation, error) {
jsonLabels, err := json.Marshal(labels)
if err != nil {
Expand Down
11 changes: 2 additions & 9 deletions backend/controller/internal/sql/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,18 @@ WITH deployment_rel AS (
ELSE COALESCE((SELECT id
FROM deployments d
WHERE d.name = sqlc.narg('deployment_name')
LIMIT 1), -1) END AS id),
module_rel AS (SELECT m.name
FROM modules m
INNER JOIN deployments d ON m.id = d.module_id
WHERE d.name = sqlc.narg('deployment_name')
LIMIT 1)
LIMIT 1), -1) END AS id)
INSERT
INTO runners (key, endpoint, state, labels, module_name, deployment_id, last_seen)
INTO runners (key, endpoint, state, labels, deployment_id, last_seen)
VALUES ($1,
$2,
$3,
$4,
(SELECT name FROM module_rel),
(SELECT id FROM deployment_rel),
NOW() AT TIME ZONE 'utc')
ON CONFLICT (key) DO UPDATE SET endpoint = $2,
state = $3,
labels = $4,
module_name = (SELECT name FROM module_rel),
deployment_id = (SELECT id FROM deployment_rel),
last_seen = NOW() AT TIME ZONE 'utc'
RETURNING deployment_id;
Expand Down
11 changes: 2 additions & 9 deletions backend/controller/internal/sql/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 43 additions & 1 deletion backend/controller/internal/sql/schema/001_init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,48 @@ CREATE TABLE runners
labels JSONB NOT NULL DEFAULT '{}'
);

-- Automatically update module_name when deployment_id is set or unset.
CREATE OR REPLACE FUNCTION runners_update_module_name() RETURNS TRIGGER AS
$$
BEGIN
IF NEW.deployment_id IS NULL
THEN
NEW.module_name = NULL;
ELSE
SELECT m.name
INTO NEW.module_name
FROM modules m
INNER JOIN deployments d on m.id = d.module_id
WHERE d.id = NEW.deployment_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER runners_update_module_name
BEFORE INSERT OR UPDATE
ON runners
FOR EACH ROW
EXECUTE PROCEDURE runners_update_module_name();

-- Set a default reservation_timeout when a runner is reserved.
CREATE OR REPLACE FUNCTION runners_set_reservation_timeout() RETURNS TRIGGER AS
$$
BEGIN
IF OLD.state != 'reserved' AND NEW.state = 'reserved' AND NEW.reservation_timeout IS NULL
THEN
NEW.reservation_timeout = NOW() AT TIME ZONE 'utc' + INTERVAL '2 minutes';
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER runners_set_reservation_timeout
BEFORE INSERT OR UPDATE
ON runners
FOR EACH ROW
EXECUTE PROCEDURE runners_set_reservation_timeout();

CREATE UNIQUE INDEX runners_key ON runners (key);
CREATE UNIQUE INDEX runners_endpoint_not_dead_idx ON runners (endpoint) WHERE state <> 'dead';
CREATE INDEX runners_module_name_idx ON runners (module_name);
Expand Down Expand Up @@ -150,7 +192,7 @@ CREATE TABLE requests
-- ingress: ingress-<method>-<path>-<hash> (eg. ingress-GET-foo-bar-<hash>)
-- cron: cron-<name>-<hash> (eg. cron-poll-news-sources-<hash>)
-- pubsub: pubsub-<subscription>-<hash> (eg. pubsub-articles-<hash>)
name VARCHAR UNIQUE NOT NULL,
name VARCHAR UNIQUE NOT NULL,
source_addr VARCHAR NOT NULL
);

Expand Down

0 comments on commit a55de4a

Please sign in to comment.