diff --git a/backend/controller/cronjobs/sql/models.go b/backend/controller/cronjobs/sql/models.go index ab3a9693ca..c8b93b436c 100644 --- a/backend/controller/cronjobs/sql/models.go +++ b/backend/controller/cronjobs/sql/models.go @@ -431,9 +431,11 @@ type DeploymentArtefact struct { } type EncryptionKey struct { - ID int64 - Key []byte - CreatedAt time.Time + ID int64 + Key []byte + CreatedAt time.Time + VerifyTimeline encryption.OptionalEncryptedTimelineColumn + VerifyAsync encryption.OptionalEncryptedAsyncColumn } type FsmInstance struct { diff --git a/backend/controller/dal/dal.go b/backend/controller/dal/dal.go index 2101d0ba7c..90dfd6670d 100644 --- a/backend/controller/dal/dal.go +++ b/backend/controller/dal/dal.go @@ -221,6 +221,9 @@ func New(ctx context.Context, conn *stdsql.DB, encryptionBuilder encryption.Buil return nil, fmt.Errorf("failed to setup encryptor: %w", err) } d.encryptor = encryptor + if err = d.verifyEncryptor(ctx); err != nil { + return nil, fmt.Errorf("failed to verify encryption: %w", err) + } return d, nil } diff --git a/backend/controller/dal/dal_test.go b/backend/controller/dal/dal_test.go index 10def9f50b..6dab71cf0b 100644 --- a/backend/controller/dal/dal_test.go +++ b/backend/controller/dal/dal_test.go @@ -542,3 +542,81 @@ func TestDeleteOldEvents(t *testing.T) { assert.Equal(t, int64(0), count) }) } + +func TestVerifyEncryption(t *testing.T) { + ctx := log.ContextWithNewDefaultLogger(context.Background()) + conn := sqltest.OpenForTesting(ctx, t) + uri := "fake-kms://CK6YwYkBElQKSAowdHlwZS5nb29nbGVhcGlzLmNvbS9nb29nbGUuY3J5cHRvLnRpbmsuQWVzR2NtS2V5EhIaEJy4TIQgfCuwxA3ZZgChp_wYARABGK6YwYkBIAE" + + t.Run("DeleteVerificationColumns", func(t *testing.T) { + dal, err := New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.NoError(t, err) + + // check that there are columns set in encryption_keys + row, err := dal.db.GetOnlyEncryptionKey(ctx) + assert.NoError(t, err) + assert.NotZero(t, row.VerifyTimeline.Ok()) + assert.NotZero(t, row.VerifyAsync.Ok()) + + // delete the columns to see if they are recreated + err = dal.db.UpdateEncryptionVerification(ctx, optional.None[encryption.EncryptedTimelineColumn](), optional.None[encryption.EncryptedAsyncColumn]()) + assert.NoError(t, err) + + dal, err = New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.NoError(t, err) + + row, err = dal.db.GetOnlyEncryptionKey(ctx) + assert.NoError(t, err) + assert.NotZero(t, row.VerifyTimeline.Ok()) + assert.NotZero(t, row.VerifyAsync.Ok()) + }) + + t.Run("DifferentKey", func(t *testing.T) { + _, err := New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.NoError(t, err) + + differentKey := "fake-kms://CJP7ksIKElQKSAowdHlwZS5nb29nbGVhcGlzLmNvbS9nb29nbGUuY3J5cHRvLnRpbmsuQWVzR2NtS2V5EhIaEJWT3z-xdW23HO7hc9vF3YoYARABGJP7ksIKIAE" + _, err = New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(differentKey))) + assert.Error(t, err) + assert.Contains(t, err.Error(), "decryption failed") + }) + + t.Run("SameKeyButWrongTimelineVerification", func(t *testing.T) { + dal, err := New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.NoError(t, err) + + err = dal.db.UpdateEncryptionVerification(ctx, optional.Some[encryption.EncryptedTimelineColumn]([]byte("123")), optional.None[encryption.EncryptedAsyncColumn]()) + assert.NoError(t, err) + _, err = New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.Error(t, err) + assert.Contains(t, err.Error(), "verification sanity") + assert.Contains(t, err.Error(), "verify timeline") + + err = dal.db.UpdateEncryptionVerification(ctx, optional.None[encryption.EncryptedTimelineColumn](), optional.Some[encryption.EncryptedAsyncColumn]([]byte("123"))) + assert.NoError(t, err) + _, err = New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.Error(t, err) + assert.Contains(t, err.Error(), "verification sanity") + assert.Contains(t, err.Error(), "verify async") + }) + + t.Run("SameKeyButEncryptWrongPlainText", func(t *testing.T) { + result, err := conn.Exec("DELETE FROM encryption_keys") + assert.NoError(t, err) + affected, err := result.RowsAffected() + assert.NoError(t, err) + assert.Equal(t, int64(1), affected) + dal, err := New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.NoError(t, err) + + encrypted := encryption.EncryptedColumn[encryption.TimelineSubKey]{} + err = dal.encrypt([]byte("123"), &encrypted) + assert.NoError(t, err) + + err = dal.db.UpdateEncryptionVerification(ctx, optional.Some(encrypted), optional.None[encryption.EncryptedAsyncColumn]()) + assert.NoError(t, err) + _, err = New(ctx, conn, encryption.NewBuilder().WithKMSURI(optional.Some(uri))) + assert.Error(t, err) + assert.Contains(t, err.Error(), "string does not match") + }) +} diff --git a/backend/controller/dal/encryption.go b/backend/controller/dal/encryption.go index 7106aebbd9..51af56ca0f 100644 --- a/backend/controller/dal/encryption.go +++ b/backend/controller/dal/encryption.go @@ -5,6 +5,8 @@ import ( "encoding/json" "fmt" + "github.com/alecthomas/types/optional" + "github.com/TBD54566975/ftl/backend/dal" "github.com/TBD54566975/ftl/internal/encryption" "github.com/TBD54566975/ftl/internal/log" @@ -66,10 +68,11 @@ func (d *DAL) EnsureKey(ctx context.Context, generateKey func() ([]byte, error)) } defer tx.CommitOrRollback(ctx, &err) - encryptedKey, err = tx.db.GetOnlyEncryptionKey(ctx) + var key []byte + row, err := tx.db.GetOnlyEncryptionKey(ctx) if err != nil && dal.IsNotFound(err) { logger.Debugf("No encryption key found, generating a new one") - key, err := generateKey() + key, err = generateKey() if err != nil { return nil, fmt.Errorf("failed to generate key: %w", err) } @@ -84,5 +87,85 @@ func (d *DAL) EnsureKey(ctx context.Context, generateKey func() ([]byte, error)) } logger.Debugf("Encryption key found, using it") - return encryptedKey, nil + + return row.Key, nil +} + +const verification = "FTL - Towards a 𝝺-calculus for large-scale systems" + +func (d *DAL) verifyEncryptor(ctx context.Context) (err error) { + var tx *Tx + tx, err = d.Begin(ctx) + if err != nil { + return fmt.Errorf("failed to begin transaction: %w", err) + } + defer tx.CommitOrRollback(ctx, &err) + + row, err := tx.db.GetOnlyEncryptionKey(ctx) + if err != nil { + if dal.IsNotFound(err) { + // No encryption key found, probably using noop. + return nil + } + return fmt.Errorf("failed to get encryption row from the db: %w", err) + } + + needsUpdate := false + newTimeline, err := verifySubkey(d.encryptor, row.VerifyTimeline) + if err != nil { + return fmt.Errorf("failed to verify timeline subkey: %w", err) + } + if newTimeline != nil { + needsUpdate = true + row.VerifyTimeline = optional.Some(newTimeline) + } + + newAsync, err := verifySubkey(d.encryptor, row.VerifyAsync) + if err != nil { + return fmt.Errorf("failed to verify async subkey: %w", err) + } + if newAsync != nil { + needsUpdate = true + row.VerifyAsync = optional.Some(newAsync) + } + + if !needsUpdate { + return nil + } + + if !row.VerifyTimeline.Ok() || !row.VerifyAsync.Ok() { + panic("should be unreachable. verifySubkey should have set the subkey") + } + + err = tx.db.UpdateEncryptionVerification(ctx, row.VerifyTimeline, row.VerifyAsync) + if err != nil { + return fmt.Errorf("failed to update encryption verification: %w", err) + } + + return nil +} + +// verifySubkey checks if the subkey is set and if not, sets it to a verification string. +// returns (nil, nil) if verified and not changed +func verifySubkey[SK encryption.SubKey](encryptor encryption.DataEncryptor, encrypted optional.Option[encryption.EncryptedColumn[SK]]) (encryption.EncryptedColumn[SK], error) { + verifyField, ok := encrypted.Get() + if !ok { + err := encryptor.Encrypt([]byte(verification), &verifyField) + if err != nil { + return nil, fmt.Errorf("failed to encrypt verification sanity string: %w", err) + } + return verifyField, nil + } + + decrypted, err := encryptor.Decrypt(&verifyField) + if err != nil { + return nil, fmt.Errorf("failed to decrypt verification sanity string: %w", err) + } + + if string(decrypted) != verification { + return nil, fmt.Errorf("decrypted verification string does not match expected value") + } + + // verified, no need to update + return nil, nil } diff --git a/backend/controller/sql/models.go b/backend/controller/sql/models.go index ab3a9693ca..c8b93b436c 100644 --- a/backend/controller/sql/models.go +++ b/backend/controller/sql/models.go @@ -431,9 +431,11 @@ type DeploymentArtefact struct { } type EncryptionKey struct { - ID int64 - Key []byte - CreatedAt time.Time + ID int64 + Key []byte + CreatedAt time.Time + VerifyTimeline encryption.OptionalEncryptedTimelineColumn + VerifyAsync encryption.OptionalEncryptedAsyncColumn } type FsmInstance struct { diff --git a/backend/controller/sql/querier.go b/backend/controller/sql/querier.go index 74b4582179..9affa69406 100644 --- a/backend/controller/sql/querier.go +++ b/backend/controller/sql/querier.go @@ -73,7 +73,7 @@ type Querier interface { GetLeaseInfo(ctx context.Context, key leases.Key) (GetLeaseInfoRow, error) GetModulesByID(ctx context.Context, ids []int64) ([]Module, error) GetNextEventForSubscription(ctx context.Context, consumptionDelay sqltypes.Duration, topic model.TopicKey, cursor optional.Option[model.TopicEventKey]) (GetNextEventForSubscriptionRow, error) - GetOnlyEncryptionKey(ctx context.Context) ([]byte, error) + GetOnlyEncryptionKey(ctx context.Context) (GetOnlyEncryptionKeyRow, error) GetProcessList(ctx context.Context) ([]GetProcessListRow, error) GetRandomSubscriber(ctx context.Context, key model.SubscriptionKey) (GetRandomSubscriberRow, error) // Retrieve routing information for a runner. @@ -118,6 +118,7 @@ type Querier interface { StartFSMTransition(ctx context.Context, arg StartFSMTransitionParams) (FsmInstance, error) SucceedAsyncCall(ctx context.Context, response encryption.OptionalEncryptedAsyncColumn, iD int64) (bool, error) SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key string) (bool, error) + UpdateEncryptionVerification(ctx context.Context, verifyTimeline encryption.OptionalEncryptedTimelineColumn, verifyAsync encryption.OptionalEncryptedAsyncColumn) error UpsertController(ctx context.Context, key model.ControllerKey, endpoint string) (int64, error) UpsertModule(ctx context.Context, language string, name string) (int64, error) // Upsert a runner and return the deployment ID that it is assigned to, if any. diff --git a/backend/controller/sql/queries.sql b/backend/controller/sql/queries.sql index 6cdbb71258..918f8a3cb2 100644 --- a/backend/controller/sql/queries.sql +++ b/backend/controller/sql/queries.sql @@ -901,10 +901,16 @@ FROM topic_events WHERE id = $1::BIGINT; -- name: GetOnlyEncryptionKey :one -SELECT key +SELECT key, verify_timeline, verify_async FROM encryption_keys WHERE id = 1; -- name: CreateOnlyEncryptionKey :exec INSERT INTO encryption_keys (id, key) VALUES (1, $1); + +-- name: UpdateEncryptionVerification :exec +UPDATE encryption_keys +SET verify_timeline = $1, + verify_async = $2 +WHERE id = 1; diff --git a/backend/controller/sql/queries.sql.go b/backend/controller/sql/queries.sql.go index d85ce0a2f8..b0810f039f 100644 --- a/backend/controller/sql/queries.sql.go +++ b/backend/controller/sql/queries.sql.go @@ -1466,16 +1466,22 @@ func (q *Queries) GetNextEventForSubscription(ctx context.Context, consumptionDe } const getOnlyEncryptionKey = `-- name: GetOnlyEncryptionKey :one -SELECT key +SELECT key, verify_timeline, verify_async FROM encryption_keys WHERE id = 1 ` -func (q *Queries) GetOnlyEncryptionKey(ctx context.Context) ([]byte, error) { +type GetOnlyEncryptionKeyRow struct { + Key []byte + VerifyTimeline encryption.OptionalEncryptedTimelineColumn + VerifyAsync encryption.OptionalEncryptedAsyncColumn +} + +func (q *Queries) GetOnlyEncryptionKey(ctx context.Context) (GetOnlyEncryptionKeyRow, error) { row := q.db.QueryRowContext(ctx, getOnlyEncryptionKey) - var key []byte - err := row.Scan(&key) - return key, err + var i GetOnlyEncryptionKeyRow + err := row.Scan(&i.Key, &i.VerifyTimeline, &i.VerifyAsync) + return i, err } const getProcessList = `-- name: GetProcessList :many @@ -2665,6 +2671,18 @@ func (q *Queries) SucceedFSMInstance(ctx context.Context, fsm schema.RefKey, key return column_1, err } +const updateEncryptionVerification = `-- name: UpdateEncryptionVerification :exec +UPDATE encryption_keys +SET verify_timeline = $1, + verify_async = $2 +WHERE id = 1 +` + +func (q *Queries) UpdateEncryptionVerification(ctx context.Context, verifyTimeline encryption.OptionalEncryptedTimelineColumn, verifyAsync encryption.OptionalEncryptedAsyncColumn) error { + _, err := q.db.ExecContext(ctx, updateEncryptionVerification, verifyTimeline, verifyAsync) + return err +} + const upsertController = `-- name: UpsertController :one INSERT INTO controller (key, endpoint) VALUES ($1, $2) diff --git a/backend/controller/sql/schema/20240820011612_encryption_verification.sql b/backend/controller/sql/schema/20240820011612_encryption_verification.sql new file mode 100644 index 0000000000..0295cef209 --- /dev/null +++ b/backend/controller/sql/schema/20240820011612_encryption_verification.sql @@ -0,0 +1,7 @@ +-- migrate:up + +ALTER TABLE encryption_keys + ADD COLUMN verify_timeline encrypted_timeline, + ADD COLUMN verify_async encrypted_async; + +-- migrate:down diff --git a/internal/configuration/sql/models.go b/internal/configuration/sql/models.go index ab3a9693ca..c8b93b436c 100644 --- a/internal/configuration/sql/models.go +++ b/internal/configuration/sql/models.go @@ -431,9 +431,11 @@ type DeploymentArtefact struct { } type EncryptionKey struct { - ID int64 - Key []byte - CreatedAt time.Time + ID int64 + Key []byte + CreatedAt time.Time + VerifyTimeline encryption.OptionalEncryptedTimelineColumn + VerifyAsync encryption.OptionalEncryptedAsyncColumn } type FsmInstance struct { diff --git a/internal/encryption/database.go b/internal/encryption/database.go index a1269f3d25..f51e9c38c4 100644 --- a/internal/encryption/database.go +++ b/internal/encryption/database.go @@ -50,7 +50,7 @@ type SubKey interface{ SubKey() string } // TimelineSubKey is a type that represents the subkey for logs. type TimelineSubKey struct{} -func (TimelineSubKey) SubKey() string { return "logs" } +func (TimelineSubKey) SubKey() string { return "timeline" } // AsyncSubKey is a type that represents the subkey for async. type AsyncSubKey struct{} diff --git a/internal/encryption/encryption.go b/internal/encryption/encryption.go index 5f0dd11c3b..3f56062d9b 100644 --- a/internal/encryption/encryption.go +++ b/internal/encryption/encryption.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "strings" + "sync" "github.com/alecthomas/types/optional" awsv1kms "github.com/aws/aws-sdk-go/service/kms" @@ -99,6 +100,7 @@ type KMSEncryptor struct { root keyset.Handle kekAEAD tink.AEAD encryptedKeyset []byte + cachedDerivedMu sync.RWMutex cachedDerived map[SubKey]tink.AEAD } @@ -206,7 +208,10 @@ func deriveKeyset(root keyset.Handle, salt []byte) (*keyset.Handle, error) { } func (k *KMSEncryptor) getDerivedPrimitive(subKey SubKey) (tink.AEAD, error) { - if primitive, ok := k.cachedDerived[subKey]; ok { + k.cachedDerivedMu.RLock() + primitive, ok := k.cachedDerived[subKey] + k.cachedDerivedMu.RUnlock() + if ok { return primitive, nil } @@ -215,12 +220,15 @@ func (k *KMSEncryptor) getDerivedPrimitive(subKey SubKey) (tink.AEAD, error) { return nil, fmt.Errorf("failed to derive keyset: %w", err) } - primitive, err := aead.New(derived) + primitive, err = aead.New(derived) if err != nil { return nil, fmt.Errorf("failed to create primitive: %w", err) } + k.cachedDerivedMu.Lock() k.cachedDerived[subKey] = primitive + k.cachedDerivedMu.Unlock() + return primitive, nil }