From a270a5eccc7118be072197780bb5d64e28932fd3 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Fri, 7 Jun 2024 21:48:33 +0000 Subject: [PATCH 1/7] Core: periodically prune orphan Vault Clients Previously, the CachingClientFactory would keep all cached Vault Clients alive even if no longer had any referring objects. This change adds a new periodic function that will prune any Vault client that has no referring objects. - Don't prune new clients. --- api/v1beta1/vaultpkisecret_types.go | 3 + api/v1beta1/vaultstaticsecret_types.go | 3 + api/v1beta1/zz_generated.deepcopy.go | 2 + ...secrets.hashicorp.com_vaultpkisecrets.yaml | 15 ++ ...rets.hashicorp.com_vaultstaticsecrets.yaml | 15 ++ ...secrets.hashicorp.com_vaultpkisecrets.yaml | 15 ++ ...rets.hashicorp.com_vaultstaticsecrets.yaml | 15 ++ .../vaultdynamicsecret_controller_test.go | 2 + controllers/vaultpkisecret_controller.go | 4 + controllers/vaultstaticsecret_controller.go | 4 + docs/api/api-reference.md | 2 + internal/vault/cache.go | 30 ++- internal/vault/client.go | 29 ++ internal/vault/client_factory.go | 156 ++++++++++- internal/vault/client_factory_test.go | 254 ++++++++++++++++++ 15 files changed, 532 insertions(+), 17 deletions(-) diff --git a/api/v1beta1/vaultpkisecret_types.go b/api/v1beta1/vaultpkisecret_types.go index 9bf9c92e8..e6fb956a9 100644 --- a/api/v1beta1/vaultpkisecret_types.go +++ b/api/v1beta1/vaultpkisecret_types.go @@ -137,6 +137,9 @@ type VaultPKISecretStatus struct { SecretMAC string `json:"secretMAC,omitempty"` Valid bool `json:"valid"` Error string `json:"error"` + // VaultClientMeta contains the status of the Vault client and is used during + // resource reconciliation. + VaultClientMeta VaultClientMeta `json:"vaultClientMeta,omitempty"` } //+kubebuilder:object:root=true diff --git a/api/v1beta1/vaultstaticsecret_types.go b/api/v1beta1/vaultstaticsecret_types.go index 211465fec..6d4f26b14 100644 --- a/api/v1beta1/vaultstaticsecret_types.go +++ b/api/v1beta1/vaultstaticsecret_types.go @@ -66,6 +66,9 @@ type VaultStaticSecretStatus struct { // The SecretMac is also used to detect drift in the Destination Secret's Data. // If drift is detected the data will be synced to the Destination. SecretMAC string `json:"secretMAC,omitempty"` + // VaultClientMeta contains the status of the Vault client and is used during + // resource reconciliation. + VaultClientMeta VaultClientMeta `json:"vaultClientMeta,omitempty"` } //+kubebuilder:object:root=true diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index d05c83cc9..f7ce438f6 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -1373,6 +1373,7 @@ func (in *VaultPKISecretSpec) DeepCopy() *VaultPKISecretSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultPKISecretStatus) DeepCopyInto(out *VaultPKISecretStatus) { *out = *in + out.VaultClientMeta = in.VaultClientMeta } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultPKISecretStatus. @@ -1503,6 +1504,7 @@ func (in *VaultStaticSecretSpec) DeepCopy() *VaultStaticSecretSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultStaticSecretStatus) DeepCopyInto(out *VaultStaticSecretStatus) { *out = *in + out.VaultClientMeta = in.VaultClientMeta } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultStaticSecretStatus. diff --git a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml index a7ed16f95..5f5757347 100644 --- a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -374,6 +374,21 @@ spec: type: string valid: type: boolean + vaultClientMeta: + description: |- + VaultClientMeta contains the status of the Vault client and is used during + resource reconciliation. + properties: + cacheKey: + description: CacheKey is the unique key used to identify the client + cache. + type: string + id: + description: |- + ID is the Vault ID of the authenticated client. The ID should never contain + any sensitive information. + type: string + type: object required: - error - lastGeneration diff --git a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml index 986e0423b..c0781e0b2 100644 --- a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -295,6 +295,21 @@ spec: The SecretMac is also used to detect drift in the Destination Secret's Data. If drift is detected the data will be synced to the Destination. type: string + vaultClientMeta: + description: |- + VaultClientMeta contains the status of the Vault client and is used during + resource reconciliation. + properties: + cacheKey: + description: CacheKey is the unique key used to identify the client + cache. + type: string + id: + description: |- + ID is the Vault ID of the authenticated client. The ID should never contain + any sensitive information. + type: string + type: object required: - lastGeneration type: object diff --git a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml index a7ed16f95..5f5757347 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -374,6 +374,21 @@ spec: type: string valid: type: boolean + vaultClientMeta: + description: |- + VaultClientMeta contains the status of the Vault client and is used during + resource reconciliation. + properties: + cacheKey: + description: CacheKey is the unique key used to identify the client + cache. + type: string + id: + description: |- + ID is the Vault ID of the authenticated client. The ID should never contain + any sensitive information. + type: string + type: object required: - error - lastGeneration diff --git a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml index 986e0423b..c0781e0b2 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -295,6 +295,21 @@ spec: The SecretMac is also used to detect drift in the Destination Secret's Data. If drift is detected the data will be synced to the Destination. type: string + vaultClientMeta: + description: |- + VaultClientMeta contains the status of the Vault client and is used during + resource reconciliation. + properties: + cacheKey: + description: CacheKey is the unique key used to identify the client + cache. + type: string + id: + description: |- + ID is the Vault ID of the authenticated client. The ID should never contain + any sensitive information. + type: string + type: object required: - lastGeneration type: object diff --git a/controllers/vaultdynamicsecret_controller_test.go b/controllers/vaultdynamicsecret_controller_test.go index e0ca9fc3b..efe6f3849 100644 --- a/controllers/vaultdynamicsecret_controller_test.go +++ b/controllers/vaultdynamicsecret_controller_test.go @@ -992,6 +992,8 @@ func TestVaultDynamicSecretReconciler_computePostSyncHorizon(t *testing.T) { } } +var _ vault.Client = (*stubVaultClient)(nil) + type stubVaultClient struct { vault.Client cacheKey vault.ClientCacheKey diff --git a/controllers/vaultpkisecret_controller.go b/controllers/vaultpkisecret_controller.go index de0b90a72..943c4fbc9 100644 --- a/controllers/vaultpkisecret_controller.go +++ b/controllers/vaultpkisecret_controller.go @@ -175,6 +175,10 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque }, nil } + clientCacheKey, _ := c.GetCacheKey() + o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() + o.Status.VaultClientMeta.ID = c.ID() + resp, err := c.Write(ctx, vault.NewWriteRequest(path, o.GetIssuerAPIData())) if err != nil { if vault.IsForbiddenError(err) { diff --git a/controllers/vaultstaticsecret_controller.go b/controllers/vaultstaticsecret_controller.go index e20ca4707..ec250a84f 100644 --- a/controllers/vaultstaticsecret_controller.go +++ b/controllers/vaultstaticsecret_controller.go @@ -79,6 +79,10 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{RequeueAfter: computeHorizonWithJitter(requeueDurationOnError)}, nil } + clientCacheKey, _ := c.GetCacheKey() + o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() + o.Status.VaultClientMeta.ID = c.ID() + var requeueAfter time.Duration if o.Spec.RefreshAfter != "" { d, err := parseDurationString(o.Spec.RefreshAfter, ".spec.refreshAfter", 0) diff --git a/docs/api/api-reference.md b/docs/api/api-reference.md index f8f4e2a14..1df5ac68e 100644 --- a/docs/api/api-reference.md +++ b/docs/api/api-reference.md @@ -804,6 +804,8 @@ sync the secret. This status is used during resource reconciliation. _Appears in:_ - [VaultDynamicSecretStatus](#vaultdynamicsecretstatus) +- [VaultPKISecretStatus](#vaultpkisecretstatus) +- [VaultStaticSecretStatus](#vaultstaticsecretstatus) | Field | Description | Default | Validation | | --- | --- | --- | --- | diff --git a/internal/vault/cache.go b/internal/vault/cache.go index 37c8d658a..edc92aa65 100644 --- a/internal/vault/cache.go +++ b/internal/vault/cache.go @@ -20,6 +20,8 @@ type ClientCache interface { Get(ClientCacheKey) (Client, bool) Add(Client) (bool, error) Remove(ClientCacheKey) bool + Keys() []ClientCacheKey + Values() []Client Len() int Prune(filterFunc ClientCachePruneFilterFunc) []Client Contains(key ClientCacheKey) bool @@ -40,11 +42,19 @@ type clientCache struct { missCloneCounter prometheus.Counter } +func (c *clientCache) Keys() []ClientCacheKey { + return c.cache.Keys() +} + +func (c *clientCache) Values() []Client { + return c.cache.Values() +} + // Purge all Clients from the cache. Useful when shutting down a // CachingClientFactory. func (c *clientCache) Purge() []ClientCacheKey { var purged []ClientCacheKey - for _, key := range c.cache.Keys() { + for _, key := range c.Keys() { client, ok := c.Get(key) if !ok { continue @@ -72,19 +82,27 @@ func (c *clientCache) Len() int { func (c *clientCache) Get(key ClientCacheKey) (Client, bool) { if key.IsClone() { if client, ok := c.cloneCache.Get(key); ok { - c.hitCloneCounter.Inc() + if c.hitCloneCounter != nil { + c.hitCloneCounter.Inc() + } return client, ok } else { - c.missCloneCounter.Inc() + if c.missCloneCounter != nil { + c.missCloneCounter.Inc() + } } return nil, false } if client, ok := c.cache.Get(key); ok { - c.hitCounter.Inc() + if c.hitCounter != nil { + c.hitCounter.Inc() + } return client, ok } else { - c.missCounter.Inc() + if c.missCounter != nil { + c.missCounter.Inc() + } return nil, false } } @@ -136,7 +154,7 @@ func (c *clientCache) Remove(key ClientCacheKey) bool { func (c *clientCache) Prune(filterFunc ClientCachePruneFilterFunc) []Client { var pruned []Client - for _, k := range c.cache.Keys() { + for _, k := range c.Keys() { if client, ok := c.cache.Peek(k); ok { if filterFunc(client) { if c.remove(k, client) { diff --git a/internal/vault/client.go b/internal/vault/client.go index cba045786..adb0ce5eb 100644 --- a/internal/vault/client.go +++ b/internal/vault/client.go @@ -26,6 +26,26 @@ import ( "github.com/hashicorp/vault-secrets-operator/internal/metrics" ) +type ClientStat struct { + // createTime is the time the client was created. + createTime time.Time + mu sync.RWMutex +} + +// Age returns the duration since the client was created. +func (m *ClientStat) Age() time.Duration { + m.mu.RLock() + defer m.mu.RUnlock() + return time.Since(m.createTime) +} + +// Reset the client's creation time to the current time. +func (m *ClientStat) Reset() { + m.mu.Lock() + defer m.mu.Unlock() + m.createTime = time.Now() +} + type ClientOptions struct { SkipRenewal bool WatcherDoneCh chan<- *ClientCallbackHandlerRequest @@ -170,6 +190,7 @@ type Client interface { SetNamespace(string) Tainted() bool Untaint() bool + Stat() *ClientStat } var _ Client = (*defaultClient)(nil) @@ -193,6 +214,11 @@ type defaultClient struct { once sync.Once mu sync.RWMutex id string + clientStat *ClientStat +} + +func (c *defaultClient) Stat() *ClientStat { + return c.clientStat } // Untaint the client, marking it as untainted. This should be done after the @@ -772,6 +798,9 @@ func (c *defaultClient) init(ctx context.Context, client ctrlclient.Client, c.connObj = connObj c.watcherDoneCh = opts.WatcherDoneCh + c.clientStat = &ClientStat{} + c.clientStat.Reset() + return nil } diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index ffcaa58e4..c92a7a5a9 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -40,6 +40,8 @@ const ( ClientCallbackOnCacheRemoval ) +var defaultPruneOrphanAge = 5 * time.Minute + func (o ClientCallbackOn) String() string { switch o { case ClientCallbackOnLifetimeWatcherDone: @@ -139,7 +141,8 @@ type cachingClientFactory struct { // encClientLock is a lock for the encryption client. It is used to ensure that // only one encryption client is created. This is necessary because the // encryption client is not stored in the cache. - encClientLock sync.RWMutex + encClientLock sync.RWMutex + orphanPrunerCancel context.CancelFunc } // Start method for cachingClientFactory starts the lifetime watcher handler. @@ -148,6 +151,7 @@ func (m *cachingClientFactory) Start(ctx context.Context) { defer m.mu.Unlock() m.onceDoWatcher.Do(func() { m.startClientCallbackHandler(ctx) + m.startOrphanClientPruner(ctx) }) } @@ -245,10 +249,14 @@ func (m *cachingClientFactory) pruneStorage(ctx context.Context, client ctrlclie // onClientEvict should be called whenever an eviction from the ClientCache occurs. // It should always call Client.Close() to prevent leaking Go routines. func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlclient.Client, cacheKey ClientCacheKey, c Client) { - logger := m.logger.WithValues("cacheKey", cacheKey) + logger := m.logger.WithName("onClientEvict").WithValues("cacheKey", cacheKey) logger.Info("Handling client cache eviction") c.Close(m.revokeOnEvict) + if m.clientCacheKeyEncrypt == cacheKey { + m.clientCacheKeyEncrypt = "" + } + if m.storageEnabled() && m.pruneStorageOnEvict { if count, err := m.pruneStorage(ctx, client, cacheKey); err != nil { logger.Error(err, "Failed to remove Client from storage") @@ -689,15 +697,15 @@ func (m *cachingClientFactory) incrementRequestCounter(operation string, err err } func (m *cachingClientFactory) startClientCallbackHandler(ctx context.Context) { + logger := log.FromContext(ctx).WithName("clientCallbackHandler") if m.callbackHandlerCancel != nil { - m.logger.Info("Already started") + logger.Info("Already started") return } callbackCtx, cancel := context.WithCancel(ctx) m.callbackHandlerCancel = cancel - logger := log.FromContext(ctx).WithName("clientCallbackHandler") logger.Info("Starting client callback handler") go func() { @@ -793,18 +801,142 @@ func (m *cachingClientFactory) callClientCallbacks(ctx context.Context, c Client } } +// startOrphanClientPruner starts a go routine that will periodically prune +// orphaned clients from the cache. An orphaned client is a client that is not +// associated with any of secretsv1beta1.VaultStaticSecret, +// secretsv1beta1.VaultPKISecret, secretsv1beta1.VaultDynamicSecret. +func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { + logger := log.FromContext(ctx).WithName("orphanClientPruner") + if m.orphanPrunerCancel != nil { + logger.Info("Already started") + } + + ctx_, cancel := context.WithCancel(ctx) + m.orphanPrunerCancel = cancel + // TODO: make period a command line option + ticker := time.NewTicker(30 * time.Second) + go func() { + defer func() { + m.orphanPrunerCancel = nil + }() + for { + select { + case <-ctx_.Done(): + logger.Info("Done") + return + case <-ticker.C: + if count, err := m.pruneOrphanClients(ctx); err != nil { + logger.Error(err, "Prune orphan Vault Clients") + } else { + logger.Info("Prune orphan Vault Clients", "count", count) + } + } + } + }() +} + +// pruneOrphanClients will remove all clients from the cache that are not +// associated with any of the following custom resources: +// secretsv1beta1.VaultStaticSecret, secretsv1beta1.VaultPKISecret, +// secretsv1beta1.VaultDynamicSecret. +func (m *cachingClientFactory) pruneOrphanClients(ctx context.Context) (int, error) { + m.mu.Lock() + defer m.mu.Unlock() + + logger := m.logger.WithName("pruneOrphanClients") + + currentClientCacheKeys := map[ClientCacheKey]empty{} + addCurrentClientCacheKeys := func(meta secretsv1beta1.VaultClientMeta) { + if meta.CacheKey != "" { + key := ClientCacheKey(meta.CacheKey) + currentClientCacheKeys[key] = empty{} + } + } + + var vssList secretsv1beta1.VaultStaticSecretList + err := m.ctrlClient.List(ctx, &vssList) + if err != nil { + return 0, err + } + + for _, o := range vssList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + + var vpsList secretsv1beta1.VaultPKISecretList + err = m.ctrlClient.List(ctx, &vpsList) + if err != nil { + return 0, err + } + for _, o := range vpsList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + + var vdsList secretsv1beta1.VaultDynamicSecretList + err = m.ctrlClient.List(ctx, &vdsList) + if err != nil { + return 0, err + } + for _, o := range vdsList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + + var toPrune []ClientCacheKey + for _, c := range m.cache.Values() { + key, err := c.GetCacheKey() + if err != nil { + continue + } + + if _, ok := currentClientCacheKeys[key]; !ok { + if key == m.clientCacheKeyEncrypt { + continue + } + stat := c.Stat() + if stat == nil { + continue + } + // prune clients that have not been created in the last 5 minutes, this gives + // time for any referring resource to update their + // .status.vaultClientMeta.cacheKey + if stat.Age() >= defaultPruneOrphanAge { + toPrune = append(toPrune, key) + } + } + } + + // TODO: ensure that this does not block forever... + var count int + wg := sync.WaitGroup{} + wg.Add(len(toPrune)) + for _, key := range toPrune { + count++ + go func() { + defer wg.Done() + m.cache.Remove(key) + m.removeClientLock(key) + }() + } + wg.Wait() + + logger.V(consts.LogLevelDebug).Info( + "Pruned orphaned clients", "count", count, "pruned", toPrune) + return count, nil +} + // NewCachingClientFactory returns a CachingClientFactory with ClientCache initialized. // The ClientCache's onEvictCallback is registered with the factory's onClientEvict(), // to ensure any evictions are handled by the factory (this is very important). func NewCachingClientFactory(ctx context.Context, client ctrlclient.Client, cacheStorage ClientCacheStorage, config *CachingClientFactoryConfig) (CachingClientFactory, error) { factory := &cachingClientFactory{ - storage: cacheStorage, - recorder: config.Recorder, - persist: config.Persist, - ctrlClient: client, - callbackHandlerCh: make(chan *ClientCallbackHandlerRequest), - encryptionRequired: config.StorageConfig.EnforceEncryption, - clientLocks: make(map[ClientCacheKey]*sync.RWMutex, config.ClientCacheSize), + storage: cacheStorage, + recorder: config.Recorder, + persist: config.Persist, + ctrlClient: client, + pruneStorageOnEvict: true, + callbackHandlerCh: make(chan *ClientCallbackHandlerRequest), + encryptionRequired: config.StorageConfig.EnforceEncryption, + clientLocks: make(map[ClientCacheKey]*sync.RWMutex, config.ClientCacheSize), logger: zap.New().WithName("clientCacheFactory").WithValues( "persist", config.Persist, "enforceEncryption", config.StorageConfig.EnforceEncryption, @@ -932,3 +1064,5 @@ type nullEventRecorder struct { } func (n *nullEventRecorder) Event(_ runtime.Object, _, _, _ string) {} + +type empty struct{} diff --git a/internal/vault/client_factory_test.go b/internal/vault/client_factory_test.go index cf6240dbc..f4b331ba2 100644 --- a/internal/vault/client_factory_test.go +++ b/internal/vault/client_factory_test.go @@ -5,12 +5,24 @@ package vault import ( "context" + "errors" + "fmt" "sync" "testing" "time" + lru "github.com/hashicorp/golang-lru/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + secretsv1beta1 "github.com/hashicorp/vault-secrets-operator/api/v1beta1" + "github.com/hashicorp/vault-secrets-operator/internal/credentials/provider" ) func Test_cachingClientFactory_RegisterClientCallbackHandler(t *testing.T) { @@ -300,3 +312,245 @@ func Test_cachingClientFactory_callClientCallbacks(t *testing.T) { }) } } + +func Test_cachingClientFactory_pruneOrphanClients(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + type keyTest struct { + key ClientCacheKey + creationTimeOffset time.Duration + } + + newCache := func(size int, keyTests ...keyTest) *clientCache { + lruCache, err := lru.NewWithEvict[ClientCacheKey, Client](size, nil) + require.NoError(t, err) + c := &clientCache{ + cache: lruCache, + } + for _, k := range keyTests { + c.cache.Add(k.key, &stubClient{ + cacheKey: k.key, + clientStat: &ClientStat{ + createTime: time.Now().Add(k.creationTimeOffset), + }, + }) + } + return c + } + + clientBuilder := newClientBuilder() + schemeLessBuilder := fake.NewClientBuilder() + tests := []struct { + name string + cache ClientCache + c ctrlclient.Client + want int + createFunc func(t *testing.T, c ctrlclient.Client) error + wantClientCacheKeys []ClientCacheKey + wantErr assert.ErrorAssertionFunc + }{ + { + name: "empty-cache", + c: clientBuilder.Build(), + cache: newCache(1), + wantErr: assert.NoError, + }, + { + name: "no-referring-objects-purge", + c: clientBuilder.Build(), + cache: newCache(1, + keyTest{ + key: "kubernetes-123456", + creationTimeOffset: -defaultPruneOrphanAge, + }, + ), + want: 1, + wantErr: assert.NoError, + }, + { + name: "prune-some", + c: clientBuilder.Build(), + createFunc: func(t *testing.T, c ctrlclient.Client) error { + t.Helper() + var errs error + for _, o := range []ctrlclient.Object{ + &secretsv1beta1.VaultStaticSecret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vss-1", + Namespace: "default", + }, + Status: secretsv1beta1.VaultStaticSecretStatus{ + VaultClientMeta: secretsv1beta1.VaultClientMeta{ + CacheKey: "kubernetes-123456", + }, + }, + }, + } { + errs = errors.Join(errs, c.Create(ctx, o)) + } + return errs + }, + cache: newCache(2, + keyTest{ + key: "kubernetes-123455", + creationTimeOffset: -defaultPruneOrphanAge, + }, + keyTest{ + key: "kubernetes-123456", + creationTimeOffset: -defaultPruneOrphanAge, + }, + ), + wantClientCacheKeys: []ClientCacheKey{ + ClientCacheKey("kubernetes-123456"), + }, + want: 1, + wantErr: assert.NoError, + }, + { + name: "none", + c: clientBuilder.Build(), + cache: newCache(1, + keyTest{ + key: "kubernetes-123456", + creationTimeOffset: -defaultPruneOrphanAge, + }, + ), + createFunc: func(t *testing.T, c ctrlclient.Client) error { + t.Helper() + var errs error + for _, o := range []ctrlclient.Object{ + &secretsv1beta1.VaultStaticSecret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vss-1", + Namespace: "default", + }, + Status: secretsv1beta1.VaultStaticSecretStatus{ + VaultClientMeta: secretsv1beta1.VaultClientMeta{ + CacheKey: "kubernetes-123456", + }, + }, + }, + &secretsv1beta1.VaultPKISecret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vps-1", + Namespace: "default", + }, + Status: secretsv1beta1.VaultPKISecretStatus{ + VaultClientMeta: secretsv1beta1.VaultClientMeta{ + CacheKey: "kubernetes-123456", + }, + }, + }, + &secretsv1beta1.VaultDynamicSecret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vds-1", + Namespace: "default", + }, + Status: secretsv1beta1.VaultDynamicSecretStatus{ + VaultClientMeta: secretsv1beta1.VaultClientMeta{ + CacheKey: "kubernetes-123456", + }, + }, + }, + } { + errs = errors.Join(errs, c.Create(ctx, o)) + } + return errs + }, + wantErr: assert.NoError, + wantClientCacheKeys: []ClientCacheKey{ + ClientCacheKey("kubernetes-123456"), + }, + want: 0, + }, + { + name: "no-prune-recent", + c: clientBuilder.Build(), + cache: newCache(2, + keyTest{ + key: "kubernetes-123455", + creationTimeOffset: -(defaultPruneOrphanAge - time.Second*1), + }, + keyTest{ + key: "kubernetes-123456", + creationTimeOffset: -(defaultPruneOrphanAge - time.Second*1), + }, + ), + wantClientCacheKeys: []ClientCacheKey{ + ClientCacheKey("kubernetes-123455"), + ClientCacheKey("kubernetes-123456"), + }, + want: 0, + wantErr: assert.NoError, + }, + { + name: "vss-scheme-not-set", + c: schemeLessBuilder.Build(), + cache: newCache(1, + keyTest{ + key: "kubernetes-123456", + creationTimeOffset: -defaultPruneOrphanAge, + }, + ), + want: 0, + wantClientCacheKeys: []ClientCacheKey{ + ClientCacheKey("kubernetes-123456"), + }, + wantErr: func(t assert.TestingT, err error, msgAndArgs ...interface{}) bool { + return assert.ErrorContains(t, err, "no kind is registered for the type v1beta1.VaultStaticSecret") + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := &cachingClientFactory{ + cache: tt.cache, + ctrlClient: tt.c, + } + + if tt.createFunc != nil { + require.NoError(t, tt.createFunc(t, tt.c)) + } + + got, err := m.pruneOrphanClients(ctx) + if !tt.wantErr(t, err, fmt.Sprintf("pruneOrphanClients(%v)", ctx)) { + return + } + assert.Equalf(t, tt.want, got, "pruneOrphanClients(%v)", ctx) + assert.ElementsMatchf(t, tt.wantClientCacheKeys, m.cache.Keys(), "pruneOrphanClients(%v)", ctx) + }) + } +} + +// newClientBuilder returns a new fake.ClientBuilder with the necessary schemes. +// copied from helpers +func newClientBuilder() *fake.ClientBuilder { + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(secretsv1beta1.AddToScheme(scheme)) + return fake.NewClientBuilder().WithScheme(scheme) +} + +var _ Client = (*stubClient)(nil) + +type stubClient struct { + Client + cacheKey ClientCacheKey + credentialProvider provider.CredentialProviderBase + isClone bool + clientStat *ClientStat +} + +func (c *stubClient) GetCacheKey() (ClientCacheKey, error) { + return c.cacheKey, nil +} + +func (c *stubClient) IsClone() bool { + return c.isClone +} + +func (c *stubClient) Stat() *ClientStat { + return c.clientStat +} From c3f3e3548353976802fa0020b42c2d7f7a73ea63 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Mon, 10 Jun 2024 16:49:04 +0000 Subject: [PATCH 2/7] Adjust log level for onClientEvict() --- internal/vault/client_factory.go | 84 ++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index c92a7a5a9..bf1b4520d 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -40,7 +40,9 @@ const ( ClientCallbackOnCacheRemoval ) -var defaultPruneOrphanAge = 5 * time.Minute +// defaultPruneOrphanAge is the default age at which orphaned clients are +// eligible for pruning. +var defaultPruneOrphanAge = 2 * time.Minute func (o ClientCallbackOn) String() string { switch o { @@ -249,8 +251,8 @@ func (m *cachingClientFactory) pruneStorage(ctx context.Context, client ctrlclie // onClientEvict should be called whenever an eviction from the ClientCache occurs. // It should always call Client.Close() to prevent leaking Go routines. func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlclient.Client, cacheKey ClientCacheKey, c Client) { - logger := m.logger.WithName("onClientEvict").WithValues("cacheKey", cacheKey) - logger.Info("Handling client cache eviction") + logger := log.FromContext(ctx).WithName("onClientEvict").WithValues("cacheKey", cacheKey) + logger.V(consts.LogLevelDebug).Info("Handling client cache eviction") c.Close(m.revokeOnEvict) if m.clientCacheKeyEncrypt == cacheKey { @@ -261,7 +263,7 @@ func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlcli if count, err := m.pruneStorage(ctx, client, cacheKey); err != nil { logger.Error(err, "Failed to remove Client from storage") } else { - logger.Info("Pruned storage", "count", count) + logger.V(consts.LogLevelDebug).Info("Pruned storage", "count", count) } } @@ -845,42 +847,11 @@ func (m *cachingClientFactory) pruneOrphanClients(ctx context.Context) (int, err logger := m.logger.WithName("pruneOrphanClients") - currentClientCacheKeys := map[ClientCacheKey]empty{} - addCurrentClientCacheKeys := func(meta secretsv1beta1.VaultClientMeta) { - if meta.CacheKey != "" { - key := ClientCacheKey(meta.CacheKey) - currentClientCacheKeys[key] = empty{} - } - } - - var vssList secretsv1beta1.VaultStaticSecretList - err := m.ctrlClient.List(ctx, &vssList) + currentClientCacheKeys, err := GetGlobalVaultCacheKeys(ctx, m.ctrlClient) if err != nil { return 0, err } - for _, o := range vssList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - - var vpsList secretsv1beta1.VaultPKISecretList - err = m.ctrlClient.List(ctx, &vpsList) - if err != nil { - return 0, err - } - for _, o := range vpsList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - - var vdsList secretsv1beta1.VaultDynamicSecretList - err = m.ctrlClient.List(ctx, &vdsList) - if err != nil { - return 0, err - } - for _, o := range vdsList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - var toPrune []ClientCacheKey for _, c := range m.cache.Values() { key, err := c.GetCacheKey() @@ -1065,4 +1036,43 @@ type nullEventRecorder struct { func (n *nullEventRecorder) Event(_ runtime.Object, _, _, _ string) {} -type empty struct{} +// GetGlobalVaultCacheKeys returns the current set of vault.ClientCacheKey(s) that are in +// use. +func GetGlobalVaultCacheKeys(ctx context.Context, client ctrlclient.Client) (map[ClientCacheKey]int, error) { + currentClientCacheKeys := map[ClientCacheKey]int{} + addCurrentClientCacheKeys := func(meta secretsv1beta1.VaultClientMeta) { + if meta.CacheKey != "" { + key := ClientCacheKey(meta.CacheKey) + currentClientCacheKeys[key] = currentClientCacheKeys[key] + 1 + } + } + + var vssList secretsv1beta1.VaultStaticSecretList + err := client.List(ctx, &vssList) + if err != nil { + return nil, err + } + + for _, o := range vssList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + var vpsList secretsv1beta1.VaultPKISecretList + err = client.List(ctx, &vpsList) + if err != nil { + return nil, err + } + for _, o := range vpsList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + + var vdsList secretsv1beta1.VaultDynamicSecretList + err = client.List(ctx, &vdsList) + if err != nil { + return nil, err + } + for _, o := range vdsList.Items { + addCurrentClientCacheKeys(o.Status.VaultClientMeta) + } + + return currentClientCacheKeys, nil +} From 1341d4e6c6b1a9850da324d797c6e06775a9df2c Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Mon, 10 Jun 2024 16:34:23 -0400 Subject: [PATCH 3/7] Use object references to prune clients on object deletion. --- controllers/vaultdynamicsecret_controller.go | 1 + controllers/vaultpkisecret_controller.go | 1 + controllers/vaultstaticsecret_controller.go | 1 + internal/common/common.go | 17 ++- internal/vault/client.go | 55 ++++++++-- internal/vault/client_factory.go | 107 +++++++++++++++++-- internal/vault/client_factory_test.go | 6 +- 7 files changed, 169 insertions(+), 19 deletions(-) diff --git a/controllers/vaultdynamicsecret_controller.go b/controllers/vaultdynamicsecret_controller.go index 68af34481..b1598746d 100644 --- a/controllers/vaultdynamicsecret_controller.go +++ b/controllers/vaultdynamicsecret_controller.go @@ -651,6 +651,7 @@ func (r *VaultDynamicSecretReconciler) handleDeletion(ctx context.Context, o *se r.SyncRegistry.Delete(objKey) r.BackOffRegistry.Delete(objKey) r.referenceCache.Remove(SecretTransformation, objKey) + r.ClientFactory.UnregisterObjectRef(o.GetUID()) if controllerutil.ContainsFinalizer(o, vaultDynamicSecretFinalizer) { logger.Info("Removing finalizer") if controllerutil.RemoveFinalizer(o, vaultDynamicSecretFinalizer) { diff --git a/controllers/vaultpkisecret_controller.go b/controllers/vaultpkisecret_controller.go index 943c4fbc9..77d355a44 100644 --- a/controllers/vaultpkisecret_controller.go +++ b/controllers/vaultpkisecret_controller.go @@ -335,6 +335,7 @@ func (r *VaultPKISecretReconciler) handleDeletion(ctx context.Context, o *secret logger := log.FromContext(ctx).WithName("handleDeletion").WithValues( "finalizer", vaultPKIFinalizer, "isSet", finalizerSet) logger.V(consts.LogLevelTrace).Info("In deletion") + r.ClientFactory.UnregisterObjectRef(o.GetUID()) if finalizerSet { logger.V(consts.LogLevelDebug).Info("Delete finalizer") if controllerutil.RemoveFinalizer(o, vaultPKIFinalizer) { diff --git a/controllers/vaultstaticsecret_controller.go b/controllers/vaultstaticsecret_controller.go index ec250a84f..a2acd2e4f 100644 --- a/controllers/vaultstaticsecret_controller.go +++ b/controllers/vaultstaticsecret_controller.go @@ -209,6 +209,7 @@ func (r *VaultStaticSecretReconciler) handleDeletion(ctx context.Context, o clie objKey := client.ObjectKeyFromObject(o) r.referenceCache.Remove(SecretTransformation, objKey) r.BackOffRegistry.Delete(objKey) + r.ClientFactory.UnregisterObjectRef(o.GetUID()) if controllerutil.ContainsFinalizer(o, vaultStaticSecretFinalizer) { logger.Info("Removing finalizer") if controllerutil.RemoveFinalizer(o, vaultStaticSecretFinalizer) { diff --git a/internal/common/common.go b/internal/common/common.go index 483b5ba2e..794842933 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -681,7 +681,8 @@ type SyncableSecretMetaData struct { Namespace string // Destination of the syncable-secret object. Maps to obj.Spec.Destination. Destination *secretsv1beta1.Destination - AuthRef string + // AuthRef of the syncable-secret object. Maps to obj.Spec.VaultAuthRef. + AuthRef string } // NewSyncableSecretMetaData returns SyncableSecretMetaData if obj is a supported type. @@ -721,3 +722,17 @@ func NewSyncableSecretMetaData(obj ctrlclient.Object) (*SyncableSecretMetaData, return meta, nil } + +// GetVaultClientMeta returns the VaultClientMeta for the supported object types. +func GetVaultClientMeta(obj client.Object) (*secretsv1beta1.VaultClientMeta, error) { + switch t := obj.(type) { + case *secretsv1beta1.VaultDynamicSecret: + return t.Status.VaultClientMeta.DeepCopy(), nil + case *secretsv1beta1.VaultStaticSecret: + return t.Status.VaultClientMeta.DeepCopy(), nil + case *secretsv1beta1.VaultPKISecret: + return t.Status.VaultClientMeta.DeepCopy(), nil + default: + return nil, fmt.Errorf("unsupported type %T", t) + } +} diff --git a/internal/vault/client.go b/internal/vault/client.go index adb0ce5eb..9e3f7b701 100644 --- a/internal/vault/client.go +++ b/internal/vault/client.go @@ -26,24 +26,63 @@ import ( "github.com/hashicorp/vault-secrets-operator/internal/metrics" ) -type ClientStat struct { +type ClientStat interface { + Age() time.Duration + Reset() + RefCount() int + IncRefCount() + DecRefCount() +} + +var _ ClientStat = (*clientStat)(nil) + +type clientStat struct { // createTime is the time the client was created. createTime time.Time - mu sync.RWMutex + // refCount is the number of references to the client. + refCount int + mu sync.RWMutex } // Age returns the duration since the client was created. -func (m *ClientStat) Age() time.Duration { +func (m *clientStat) Age() time.Duration { m.mu.RLock() defer m.mu.RUnlock() return time.Since(m.createTime) } // Reset the client's creation time to the current time. -func (m *ClientStat) Reset() { +func (m *clientStat) Reset() { m.mu.Lock() defer m.mu.Unlock() m.createTime = time.Now() + m.refCount = 0 +} + +// IncRefCount increments the client's reference count. This is useful for +// tracking the number of references to a client. +func (m *clientStat) IncRefCount() { + m.mu.Lock() + defer m.mu.Unlock() + m.refCount++ +} + +// DecRefCount decrements the client's reference count. This is useful for +// tracking the number of references to a client. +func (m *clientStat) DecRefCount() { + m.mu.Lock() + defer m.mu.Unlock() + if m.refCount > 0 { + m.refCount-- + } +} + +// RefCount returns the client's reference count. This is useful for tracking the +// number of references to a client. +func (m *clientStat) RefCount() int { + m.mu.RLock() + defer m.mu.RUnlock() + return m.refCount } type ClientOptions struct { @@ -190,7 +229,7 @@ type Client interface { SetNamespace(string) Tainted() bool Untaint() bool - Stat() *ClientStat + Stat() *clientStat } var _ Client = (*defaultClient)(nil) @@ -214,10 +253,10 @@ type defaultClient struct { once sync.Once mu sync.RWMutex id string - clientStat *ClientStat + clientStat *clientStat } -func (c *defaultClient) Stat() *ClientStat { +func (c *defaultClient) Stat() *clientStat { return c.clientStat } @@ -798,7 +837,7 @@ func (c *defaultClient) init(ctx context.Context, client ctrlclient.Client, c.connObj = connObj c.watcherDoneCh = opts.WatcherDoneCh - c.clientStat = &ClientStat{} + c.clientStat = &clientStat{} c.clientStat.Reset() return nil diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index bf1b4520d..7f77f57b2 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -42,7 +42,7 @@ const ( // defaultPruneOrphanAge is the default age at which orphaned clients are // eligible for pruning. -var defaultPruneOrphanAge = 2 * time.Minute +var defaultPruneOrphanAge = 1 * time.Minute func (o ClientCallbackOn) String() string { switch o { @@ -90,6 +90,7 @@ func (e *ClientFactoryDisabledError) Error() string { type ClientFactory interface { Get(context.Context, ctrlclient.Client, ctrlclient.Object) (Client, error) RegisterClientCallbackHandler(ClientCallbackHandler) + UnregisterObjectRef(types.UID) } // clientCacheObjectFilterFunc provides a way to selectively prune CachingClientFactory's Client cache. @@ -143,8 +144,40 @@ type cachingClientFactory struct { // encClientLock is a lock for the encryption client. It is used to ensure that // only one encryption client is created. This is necessary because the // encryption client is not stored in the cache. - encClientLock sync.RWMutex + encClientLock sync.RWMutex + // orphanPrunerCancel is a function that is used to cancel the orphan client pruner. orphanPrunerCancel context.CancelFunc + // orphanPrunerClientCh is a channel that is used to handle Clients that no longer + // have any object references. + orphanPrunerClientCh chan Client + // cacheKeyReferences is a map of cache keys to ClientCacheKey. It is used to track + cacheKeyReferences map[types.UID]ClientCacheKey +} + +// UnregisterObjectRef removes the reference to the object with the specified +// UID. This is used to remove the reference to the object when it is deleted. +// All controllers should call this method if they are using the caching client +// factory. +func (m *cachingClientFactory) UnregisterObjectRef(uid types.UID) { + m.mu.Lock() + defer m.mu.Unlock() + logger := log.FromContext(context.Background()).WithName("UnregisterObjectRef").WithValues( + "uid", uid, + ) + logger.V(consts.LogLevelDebug).Info("Unregistering client reference") + key, ok := m.cacheKeyReferences[uid] + if !ok { + return + } + if c, ok := m.cache.Get(key); ok { + c.Stat().DecRefCount() + logger.V(consts.LogLevelDebug).Info("Writing to orphanPrunerClientCh", + "id", c.ID(), "cacheKey", key, c.Stat().RefCount(), + ) + m.orphanPrunerClientCh <- c + } + logger.V(consts.LogLevelDebug).Info("Removing cache ref", "cacheKey", key) + delete(m.cacheKeyReferences, uid) } // Start method for cachingClientFactory starts the lifetime watcher handler. @@ -154,6 +187,7 @@ func (m *cachingClientFactory) Start(ctx context.Context) { m.onceDoWatcher.Do(func() { m.startClientCallbackHandler(ctx) m.startOrphanClientPruner(ctx) + m.cacheKeyReferences = make(map[types.UID]ClientCacheKey) }) } @@ -251,7 +285,7 @@ func (m *cachingClientFactory) pruneStorage(ctx context.Context, client ctrlclie // onClientEvict should be called whenever an eviction from the ClientCache occurs. // It should always call Client.Close() to prevent leaking Go routines. func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlclient.Client, cacheKey ClientCacheKey, c Client) { - logger := log.FromContext(ctx).WithName("onClientEvict").WithValues("cacheKey", cacheKey) + logger := m.logger.WithName("onClientEvict").WithValues("cacheKey", cacheKey) logger.V(consts.LogLevelDebug).Info("Handling client cache eviction") c.Close(m.revokeOnEvict) @@ -259,6 +293,12 @@ func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlcli m.clientCacheKeyEncrypt = "" } + for k, v := range m.cacheKeyReferences { + if v == cacheKey { + delete(m.cacheKeyReferences, k) + } + } + if m.storageEnabled() && m.pruneStorageOnEvict { if count, err := m.pruneStorage(ctx, client, cacheKey); err != nil { logger.Error(err, "Failed to remove Client from storage") @@ -381,6 +421,40 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client } else { mt.Set(0) } + + if errs != nil { + lastCacheKey, ok := m.cacheKeyReferences[obj.GetUID()] + if ok { + c, ok := m.cache.Get(lastCacheKey) + if ok { + c.Stat().DecRefCount() + } + delete(m.cacheKeyReferences, obj.GetUID()) + } + } else { + var increment bool + lastCacheKey, ok := m.cacheKeyReferences[obj.GetUID()] + if ok { + if lastCacheKey != cacheKey { + c, ok := m.cache.Get(lastCacheKey) + if ok { + c.Stat().DecRefCount() + } + m.orphanPrunerClientCh <- c + increment = true + } + } else { + increment = true + } + + if increment { + c, ok := m.cache.Get(cacheKey) + if ok { + c.Stat().IncRefCount() + } + } + m.cacheKeyReferences[obj.GetUID()] = cacheKey + } }() cacheKey, err = ComputeClientCacheKeyFromObj(ctx, client, obj) @@ -813,12 +887,17 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { logger.Info("Already started") } + if m.orphanPrunerClientCh == nil { + m.orphanPrunerClientCh = make(chan Client) + } + ctx_, cancel := context.WithCancel(ctx) m.orphanPrunerCancel = cancel // TODO: make period a command line option - ticker := time.NewTicker(30 * time.Second) + ticker := time.NewTicker(30 * time.Minute) go func() { defer func() { + close(m.orphanPrunerClientCh) m.orphanPrunerCancel = nil }() for { @@ -826,11 +905,26 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { case <-ctx_.Done(): logger.Info("Done") return + + case c, stillOpen := <-m.orphanPrunerClientCh: + if !stillOpen { + logger.Info("Client callback handler channel closed") + return + } + if c.Stat().RefCount() == 0 { + cacheKey, err := c.GetCacheKey() + if err != nil { + logger.Error(err, "Prune orphan Vault Clients", "trigger", "wakeup") + } else { + logger.Info("Prune orphan Vault Clients", "refCount", c.Stat().RefCount(), "trigger", "wakeup") + m.cache.Remove(cacheKey) + } + } case <-ticker.C: if count, err := m.pruneOrphanClients(ctx); err != nil { - logger.Error(err, "Prune orphan Vault Clients") + logger.Error(err, "Prune orphan Vault Clients", "trigger", "tick") } else { - logger.Info("Prune orphan Vault Clients", "count", count) + logger.Info("Prune orphan Vault Clients", "count", count, "trigger", "tick") } } } @@ -885,7 +979,6 @@ func (m *cachingClientFactory) pruneOrphanClients(ctx context.Context) (int, err go func() { defer wg.Done() m.cache.Remove(key) - m.removeClientLock(key) }() } wg.Wait() diff --git a/internal/vault/client_factory_test.go b/internal/vault/client_factory_test.go index f4b331ba2..8b943e4fa 100644 --- a/internal/vault/client_factory_test.go +++ b/internal/vault/client_factory_test.go @@ -332,7 +332,7 @@ func Test_cachingClientFactory_pruneOrphanClients(t *testing.T) { for _, k := range keyTests { c.cache.Add(k.key, &stubClient{ cacheKey: k.key, - clientStat: &ClientStat{ + clientStat: &clientStat{ createTime: time.Now().Add(k.creationTimeOffset), }, }) @@ -540,7 +540,7 @@ type stubClient struct { cacheKey ClientCacheKey credentialProvider provider.CredentialProviderBase isClone bool - clientStat *ClientStat + clientStat *clientStat } func (c *stubClient) GetCacheKey() (ClientCacheKey, error) { @@ -551,6 +551,6 @@ func (c *stubClient) IsClone() bool { return c.isClone } -func (c *stubClient) Stat() *ClientStat { +func (c *stubClient) Stat() *clientStat { return c.clientStat } From 48da7add64f5cf8c7b86befdf7867b0d5d9f5023 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Tue, 11 Jun 2024 19:04:27 +0000 Subject: [PATCH 4/7] Rework client stats handling --- api/v1beta1/common.go | 3 + api/v1beta1/zz_generated.deepcopy.go | 13 +- ...ets.hashicorp.com_vaultdynamicsecrets.yaml | 4 + ...secrets.hashicorp.com_vaultpkisecrets.yaml | 4 + ...rets.hashicorp.com_vaultstaticsecrets.yaml | 4 + ...ets.hashicorp.com_vaultdynamicsecrets.yaml | 4 + ...secrets.hashicorp.com_vaultpkisecrets.yaml | 4 + ...rets.hashicorp.com_vaultstaticsecrets.yaml | 4 + controllers/vaultdynamicsecret_controller.go | 62 ++++---- controllers/vaultpkisecret_controller.go | 46 ++---- controllers/vaultstaticsecret_controller.go | 38 +++-- docs/api/api-reference.md | 1 + internal/vault/client.go | 18 ++- internal/vault/client_factory.go | 145 +++++++++++------- internal/vault/client_factory_test.go | 2 +- test/integration/modules/vso-helm/main.tf | 8 + .../vaultdynamicsecret_integration_test.go | 2 +- 17 files changed, 213 insertions(+), 149 deletions(-) diff --git a/api/v1beta1/common.go b/api/v1beta1/common.go index 8059ab962..f09bedd66 100644 --- a/api/v1beta1/common.go +++ b/api/v1beta1/common.go @@ -5,6 +5,7 @@ package v1beta1 import ( v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // Destination provides the configuration that will be applied to the @@ -123,4 +124,6 @@ type VaultClientMeta struct { // ID is the Vault ID of the authenticated client. The ID should never contain // any sensitive information. ID string `json:"id,omitempty"` + // CreatedAt is the time the client was created. + CreatedAt metav1.Time `json:"createdAt,omitempty"` } diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index f7ce438f6..45bed4af0 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -1052,6 +1052,7 @@ func (in *VaultAuthStatus) DeepCopy() *VaultAuthStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultClientMeta) DeepCopyInto(out *VaultClientMeta) { *out = *in + in.CreatedAt.DeepCopyInto(&out.CreatedAt) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultClientMeta. @@ -1166,7 +1167,7 @@ func (in *VaultDynamicSecret) DeepCopyInto(out *VaultDynamicSecret) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultDynamicSecret. @@ -1252,7 +1253,7 @@ func (in *VaultDynamicSecretStatus) DeepCopyInto(out *VaultDynamicSecretStatus) *out = *in out.SecretLease = in.SecretLease out.StaticCredsMetaData = in.StaticCredsMetaData - out.VaultClientMeta = in.VaultClientMeta + in.VaultClientMeta.DeepCopyInto(&out.VaultClientMeta) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultDynamicSecretStatus. @@ -1271,7 +1272,7 @@ func (in *VaultPKISecret) DeepCopyInto(out *VaultPKISecret) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultPKISecret. @@ -1373,7 +1374,7 @@ func (in *VaultPKISecretSpec) DeepCopy() *VaultPKISecretSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultPKISecretStatus) DeepCopyInto(out *VaultPKISecretStatus) { *out = *in - out.VaultClientMeta = in.VaultClientMeta + in.VaultClientMeta.DeepCopyInto(&out.VaultClientMeta) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultPKISecretStatus. @@ -1422,7 +1423,7 @@ func (in *VaultStaticSecret) DeepCopyInto(out *VaultStaticSecret) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultStaticSecret. @@ -1504,7 +1505,7 @@ func (in *VaultStaticSecretSpec) DeepCopy() *VaultStaticSecretSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultStaticSecretStatus) DeepCopyInto(out *VaultStaticSecretStatus) { *out = *in - out.VaultClientMeta = in.VaultClientMeta + in.VaultClientMeta.DeepCopyInto(&out.VaultClientMeta) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultStaticSecretStatus. diff --git a/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml index 365e07fc5..54091b4d3 100644 --- a/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml @@ -393,6 +393,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml index 5f5757347..261964d4d 100644 --- a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -383,6 +383,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml index c0781e0b2..668ebd834 100644 --- a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -304,6 +304,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml index 365e07fc5..54091b4d3 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml @@ -393,6 +393,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml index 5f5757347..261964d4d 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -383,6 +383,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml index c0781e0b2..668ebd834 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -304,6 +304,10 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string + createdAt: + description: CreatedAt is the time the client was created. + format: date-time + type: string id: description: |- ID is the Vault ID of the authenticated client. The ID should never contain diff --git a/controllers/vaultdynamicsecret_controller.go b/controllers/vaultdynamicsecret_controller.go index b1598746d..0c88f2f3c 100644 --- a/controllers/vaultdynamicsecret_controller.go +++ b/controllers/vaultdynamicsecret_controller.go @@ -132,21 +132,25 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{RequeueAfter: requeueDurationOnError}, nil } - vClient, err := r.ClientFactory.Get(ctx, r.Client, o) + defer r.updateStatus(ctx, o) + + c, err := r.ClientFactory.Get(ctx, r.Client, o) if err != nil { + o.Status.VaultClientMeta = secretsv1beta1.VaultClientMeta{} r.Recorder.Eventf(o, corev1.EventTypeWarning, consts.ReasonVaultClientConfigError, "Failed to get Vault client: %s", err) return ctrl.Result{RequeueAfter: computeHorizonWithJitter(requeueDurationOnError)}, nil } // we can ignore the error here, since it was handled above in the Get() call. - clientCacheKey, _ := vClient.GetCacheKey() + clientCacheKey, _ := c.GetCacheKey() lastClientCacheKey := o.Status.VaultClientMeta.CacheKey lastClientID := o.Status.VaultClientMeta.ID // update the VaultClientMeta in the resource's status. o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() - o.Status.VaultClientMeta.ID = vClient.ID() + o.Status.VaultClientMeta.ID = c.ID() + o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) var syncReason string // doSync indicates that the controller should perform the secret sync, @@ -192,9 +196,6 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R r.Recorder.Eventf(o, corev1.EventTypeNormal, consts.ReasonSecretLeaseRenewal, "Not in renewal window after transitioning to a new leader/pod, lease_id=%s, horizon=%s", leaseID, horizon) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{RequeueAfter: horizon}, nil } } else if inWindow { @@ -203,16 +204,13 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R r.Recorder.Eventf(o, corev1.EventTypeNormal, consts.ReasonSecretLeaseRenewal, "In rotation period after transitioning to a new leader/pod, lease_id=%s, horizon=%s", leaseID, horizon) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{RequeueAfter: horizon}, nil } } if !doSync && r.isRenewableLease(&o.Status.SecretLease, o, true) && !o.Spec.AllowStaticCreds && leaseID != "" { // Renew the lease and return from Reconcile if the lease is successfully renewed. - if secretLease, err := r.renewLease(ctx, vClient, o); err == nil { + if secretLease, err := r.renewLease(ctx, c, o); err == nil { if !r.isRenewableLease(secretLease, o, false) { return ctrl.Result{}, nil } @@ -227,9 +225,6 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R o.Status.StaticCredsMetaData = secretsv1beta1.VaultStaticCredsMetaData{} o.Status.SecretLease = *secretLease o.Status.LastRenewalTime = nowFunc().Unix() - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } leaseDuration := time.Duration(secretLease.LeaseDuration) * time.Second if leaseDuration < 1 { @@ -252,7 +247,7 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R "Could not renew lease, lease_id=%s, err=%s", leaseID, err) } else if vault.IsForbiddenError(err) { logger.V(consts.LogLevelWarning).Info("Tainting client", "err", err) - vClient.Taint() + c.Taint() } syncReason = consts.ReasonSecretLeaseRenewalError } @@ -271,12 +266,12 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R } // sync the secret - secretLease, staticCredsUpdated, err := r.syncSecret(ctx, vClient, o, transOption) + secretLease, staticCredsUpdated, err := r.syncSecret(ctx, c, o, transOption) if err != nil { r.SyncRegistry.Add(req.NamespacedName) if vault.IsForbiddenError(err) { logger.V(consts.LogLevelWarning).Info("Tainting client", "err", err) - vClient.Taint() + c.Taint() } entry, _ := r.BackOffRegistry.Get(req.NamespacedName) horizon := entry.NextBackOff() @@ -292,10 +287,6 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R doRolloutRestart := (doSync && o.Status.LastGeneration > 1) || staticCredsUpdated o.Status.SecretLease = *secretLease o.Status.LastRenewalTime = nowFunc().Unix() - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } - horizon := r.computePostSyncHorizon(ctx, o) r.Recorder.Eventf(o, corev1.EventTypeNormal, reason, "Secret synced, lease_id=%q, horizon=%s, sync_reason=%q", @@ -641,17 +632,19 @@ func (r *VaultDynamicSecretReconciler) SetupWithManager(mgr ctrl.Manager, opts c // * revoking any associated outstanding leases // * removing our finalizer func (r *VaultDynamicSecretReconciler) handleDeletion(ctx context.Context, o *secretsv1beta1.VaultDynamicSecret) error { - logger := log.FromContext(ctx) - // We are ignoring errors inside `revokeLease`, otherwise we may fail to remove the finalizer. + logger := log.FromContext(ctx).WithName("handleDeletion") + // We are ignoring errors inside `handleLeaseRevocation`, otherwise we may fail to remove the finalizer. // Worst case at this point we will leave a dangling lease instead of a secret which // cannot be deleted. Events are emitted in these cases. - r.revokeLease(ctx, o, "") + r.handleLeaseRevocation(ctx, o) objKey := client.ObjectKeyFromObject(o) r.SyncRegistry.Delete(objKey) r.BackOffRegistry.Delete(objKey) r.referenceCache.Remove(SecretTransformation, objKey) - r.ClientFactory.UnregisterObjectRef(o.GetUID()) + if err := r.ClientFactory.UnregisterObjectRef(ctx, o); err != nil { + logger.Error(err, "Warning: failed to unregister object reference") + } if controllerutil.ContainsFinalizer(o, vaultDynamicSecretFinalizer) { logger.Info("Removing finalizer") if controllerutil.RemoveFinalizer(o, vaultDynamicSecretFinalizer) { @@ -665,18 +658,25 @@ func (r *VaultDynamicSecretReconciler) handleDeletion(ctx context.Context, o *se return nil } -// revokeLease revokes the VDS secret's lease. +// handleLeaseRevocation revokes the VDS secret's lease. // NOTE: Enabling revocation requires the VaultAuthMethod referenced by `o.Spec.VaultAuthRef` to have a policy // that includes `path "sys/leases/revoke" { capabilities = ["update"] }`, otherwise this will fail with permission // errors. -func (r *VaultDynamicSecretReconciler) revokeLease(ctx context.Context, o *secretsv1beta1.VaultDynamicSecret, id string) { - logger := log.FromContext(ctx) +func (r *VaultDynamicSecretReconciler) handleLeaseRevocation(ctx context.Context, o *secretsv1beta1.VaultDynamicSecret) { + logger := log.FromContext(ctx).WithName("handleLeaseRevocation") // Allow us to override the SecretLease in the event that we want to revoke an old lease. - leaseID := id + if !o.Spec.Revoke { + logger.V(consts.LogLevelDebug).Info("Lease revocation not enabled") + return + } + + leaseID := o.Status.SecretLease.ID if leaseID == "" { - leaseID = o.Status.SecretLease.ID + logger.Info("No lease to revoke") + return } - logger.Info("Revoking lease for credential ", "id", leaseID) + + logger.V(consts.LogLevelDebug).Info("Revoking lease for credential ", "id", leaseID) c, err := r.ClientFactory.Get(ctx, r.Client, o) if err != nil { logger.Error(err, "Failed to get client when revoking lease for ", "id", leaseID) @@ -691,7 +691,7 @@ func (r *VaultDynamicSecretReconciler) revokeLease(ctx context.Context, o *secre } else { msg := "Lease revoked" r.Recorder.Eventf(o, corev1.EventTypeNormal, consts.ReasonSecretLeaseRevoke, msg+": %s", leaseID) - logger.Info("Lease revoked ", "id", leaseID) + logger.V(consts.LogLevelDebug).Info("Lease revoked ", "id", leaseID) } } diff --git a/controllers/vaultpkisecret_controller.go b/controllers/vaultpkisecret_controller.go index 77d355a44..50460cb45 100644 --- a/controllers/vaultpkisecret_controller.go +++ b/controllers/vaultpkisecret_controller.go @@ -14,6 +14,7 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" @@ -95,10 +96,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque logger.Info(msg) o.Status.Error = consts.ReasonK8sClientError r.recordEvent(o, o.Status.Error, msg) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{ RequeueAfter: horizon, }, nil @@ -163,11 +160,14 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque } } + defer r.updateStatus(ctx, o) + // assume that status is always invalid o.Status.Valid = false logger.Info("Must sync", "reason", syncReason) c, err := r.ClientFactory.Get(ctx, r.Client, o) if err != nil { + o.Status.VaultClientMeta = secretsv1beta1.VaultClientMeta{} o.Status.Error = consts.ReasonK8sClientError logger.Error(err, "Get Vault client") return ctrl.Result{ @@ -178,6 +178,7 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque clientCacheKey, _ := c.GetCacheKey() o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() o.Status.VaultClientMeta.ID = c.ID() + o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) resp, err := c.Write(ctx, vault.NewWriteRequest(path, o.GetIssuerAPIData())) if err != nil { @@ -188,10 +189,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque msg := "Failed to issue certificate from Vault" logger.Error(err, msg) r.recordEvent(o, o.Status.Error, msg+": %s", err) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } - r.SyncRegistry.Add(req.NamespacedName) entry, _ := r.BackOffRegistry.Get(req.NamespacedName) return ctrl.Result{ @@ -207,9 +204,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque msg := "Failed to unmarshal PKI response" logger.Error(err, msg) r.recordEvent(o, o.Status.Error, msg+": %s", err) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{ RequeueAfter: computeHorizonWithJitter(requeueDurationOnError), }, nil @@ -220,9 +214,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque msg := "Invalid Vault secret data, serial_number cannot be empty" logger.Error(nil, msg) r.recordEvent(o, o.Status.Error, msg) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{ RequeueAfter: computeHorizonWithJitter(requeueDurationOnError), }, nil @@ -234,9 +225,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque msg := "Failed to marshal Vault secret data" logger.Error(err, msg) r.recordEvent(o, o.Status.Error, msg+": %s", err) - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{ RequeueAfter: computeHorizonWithJitter(requeueDurationOnError), }, nil @@ -265,9 +253,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque if err != nil { logger.Error(err, "HMAC data") o.Status.Error = consts.ReasonHMACDataError - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{ RequeueAfter: computeHorizonWithJitter(requeueDurationOnError), }, nil @@ -278,9 +263,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque if err := helpers.SyncSecret(ctx, r.Client, o, data); err != nil { logger.Error(err, "Sync secret") o.Status.Error = consts.ReasonSecretSyncError - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } return ctrl.Result{ RequeueAfter: computeHorizonWithJitter(requeueDurationOnError), }, nil @@ -310,11 +292,6 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque o.Status.SerialNumber = certResp.SerialNumber o.Status.Expiration = certResp.Expiration o.Status.LastRotation = time.Now().Unix() - if err := r.updateStatus(ctx, o); err != nil { - logger.Error(err, "Failed to update the status") - return ctrl.Result{}, err - } - r.SyncRegistry.Delete(req.NamespacedName) horizon, _ := computePKIRenewalWindow(ctx, o, .05) @@ -326,16 +303,19 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque } func (r *VaultPKISecretReconciler) handleDeletion(ctx context.Context, o *secretsv1beta1.VaultPKISecret) error { + finalizerSet := controllerutil.ContainsFinalizer(o, vaultPKIFinalizer) + logger := log.FromContext(ctx).WithName("handleDeletion").WithValues( + "finalizer", vaultPKIFinalizer, "isSet", finalizerSet) + objKey := client.ObjectKeyFromObject(o) + r.referenceCache.Remove(SecretTransformation, objKey) r.SyncRegistry.Delete(objKey) r.BackOffRegistry.Delete(objKey) + if err := r.ClientFactory.UnregisterObjectRef(ctx, o); err != nil { + logger.Error(err, "Warning: failed to unregister object reference") + } - r.referenceCache.Remove(SecretTransformation, objKey) - finalizerSet := controllerutil.ContainsFinalizer(o, vaultPKIFinalizer) - logger := log.FromContext(ctx).WithName("handleDeletion").WithValues( - "finalizer", vaultPKIFinalizer, "isSet", finalizerSet) logger.V(consts.LogLevelTrace).Info("In deletion") - r.ClientFactory.UnregisterObjectRef(o.GetUID()) if finalizerSet { logger.V(consts.LogLevelDebug).Info("Delete finalizer") if controllerutil.RemoveFinalizer(o, vaultPKIFinalizer) { diff --git a/controllers/vaultstaticsecret_controller.go b/controllers/vaultstaticsecret_controller.go index a2acd2e4f..fad11a174 100644 --- a/controllers/vaultstaticsecret_controller.go +++ b/controllers/vaultstaticsecret_controller.go @@ -11,6 +11,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" @@ -72,17 +73,6 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, r.handleDeletion(ctx, o) } - c, err := r.ClientFactory.Get(ctx, r.Client, o) - if err != nil { - r.Recorder.Eventf(o, corev1.EventTypeWarning, consts.ReasonVaultClientConfigError, - "Failed to get Vault auth login: %s", err) - return ctrl.Result{RequeueAfter: computeHorizonWithJitter(requeueDurationOnError)}, nil - } - - clientCacheKey, _ := c.GetCacheKey() - o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() - o.Status.VaultClientMeta.ID = c.ID() - var requeueAfter time.Duration if o.Spec.RefreshAfter != "" { d, err := parseDurationString(o.Spec.RefreshAfter, ".spec.refreshAfter", 0) @@ -112,6 +102,21 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{RequeueAfter: computeHorizonWithJitter(requeueDurationOnError)}, nil } + defer r.updateStatus(ctx, o) + + c, err := r.ClientFactory.Get(ctx, r.Client, o) + if err != nil { + o.Status.VaultClientMeta = secretsv1beta1.VaultClientMeta{} + r.Recorder.Eventf(o, corev1.EventTypeWarning, consts.ReasonVaultClientConfigError, + "Failed to get Vault auth login: %s", err) + return ctrl.Result{}, err + } + + clientCacheKey, _ := c.GetCacheKey() + o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() + o.Status.VaultClientMeta.ID = c.ID() + o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) + resp, err := c.Read(ctx, kvReq) if err != nil { if vault.IsForbiddenError(err) { @@ -182,10 +187,6 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re logger.V(consts.LogLevelDebug).Info("Secret sync not required") } - if err := r.updateStatus(ctx, o); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{ RequeueAfter: requeueAfter, }, nil @@ -196,6 +197,7 @@ func (r *VaultStaticSecretReconciler) updateStatus(ctx context.Context, o *secre logger.V(consts.LogLevelDebug).Info("Updating status") o.Status.LastGeneration = o.GetGeneration() if err := r.Status().Update(ctx, o); err != nil { + logger.Error(err, "Failed to update status") r.Recorder.Eventf(o, corev1.EventTypeWarning, consts.ReasonStatusUpdateError, "Failed to update the resource's status, err=%s", err) } @@ -205,11 +207,13 @@ func (r *VaultStaticSecretReconciler) updateStatus(ctx context.Context, o *secre } func (r *VaultStaticSecretReconciler) handleDeletion(ctx context.Context, o client.Object) error { - logger := log.FromContext(ctx) + logger := log.FromContext(ctx).WithName("handleDeletion") objKey := client.ObjectKeyFromObject(o) r.referenceCache.Remove(SecretTransformation, objKey) r.BackOffRegistry.Delete(objKey) - r.ClientFactory.UnregisterObjectRef(o.GetUID()) + if err := r.ClientFactory.UnregisterObjectRef(ctx, o); err != nil { + logger.Error(err, "Warning: failed to unregister object reference") + } if controllerutil.ContainsFinalizer(o, vaultStaticSecretFinalizer) { logger.Info("Removing finalizer") if controllerutil.RemoveFinalizer(o, vaultStaticSecretFinalizer) { diff --git a/docs/api/api-reference.md b/docs/api/api-reference.md index 1df5ac68e..bee8c1f0d 100644 --- a/docs/api/api-reference.md +++ b/docs/api/api-reference.md @@ -811,6 +811,7 @@ _Appears in:_ | --- | --- | --- | --- | | `cacheKey` _string_ | CacheKey is the unique key used to identify the client cache. | | | | `id` _string_ | ID is the Vault ID of the authenticated client. The ID should never contain
any sensitive information. | | | +| `createdAt` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#time-v1-meta)_ | CreatedAt is the time the client was created. | | | #### VaultConnection diff --git a/internal/vault/client.go b/internal/vault/client.go index 9e3f7b701..fcd59b4db 100644 --- a/internal/vault/client.go +++ b/internal/vault/client.go @@ -28,6 +28,7 @@ import ( type ClientStat interface { Age() time.Duration + CreatedAt() time.Time Reset() RefCount() int IncRefCount() @@ -37,25 +38,32 @@ type ClientStat interface { var _ ClientStat = (*clientStat)(nil) type clientStat struct { - // createTime is the time the client was created. - createTime time.Time + // createdAt is the time the client was created. + createdAt time.Time // refCount is the number of references to the client. refCount int mu sync.RWMutex } +// CreatedAt returns the time the client was created. +func (m *clientStat) CreatedAt() time.Time { + m.mu.RLock() + defer m.mu.RUnlock() + return m.createdAt +} + // Age returns the duration since the client was created. func (m *clientStat) Age() time.Duration { m.mu.RLock() defer m.mu.RUnlock() - return time.Since(m.createTime) + return time.Since(m.createdAt) } // Reset the client's creation time to the current time. func (m *clientStat) Reset() { m.mu.Lock() defer m.mu.Unlock() - m.createTime = time.Now() + m.createdAt = time.Now() m.refCount = 0 } @@ -584,7 +592,7 @@ func (c *defaultClient) startLifetimeWatcher(ctx context.Context) error { return case renewal := <-watcher.RenewCh(): - logger.V(consts.LogLevelDebug).Info("Successfully renewed the client") + logger.V(consts.LogLevelTrace).Info("Successfully renewed the client") c.authSecret = renewal.Secret c.lastRenewal = renewal.RenewedAt.Unix() diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index 7f77f57b2..5655ffa99 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -90,7 +90,7 @@ func (e *ClientFactoryDisabledError) Error() string { type ClientFactory interface { Get(context.Context, ctrlclient.Client, ctrlclient.Object) (Client, error) RegisterClientCallbackHandler(ClientCallbackHandler) - UnregisterObjectRef(types.UID) + UnregisterObjectRef(context.Context, ctrlclient.Object) error } // clientCacheObjectFilterFunc provides a way to selectively prune CachingClientFactory's Client cache. @@ -128,14 +128,15 @@ type cachingClientFactory struct { requestCounterVec *prometheus.CounterVec requestErrorCounterVec *prometheus.CounterVec taintedClientGauge *prometheus.GaugeVec - revokeOnEvict bool - pruneStorageOnEvict bool - ctrlClient ctrlclient.Client - clientCallbacks []ClientCallbackHandler - callbackHandlerCh chan *ClientCallbackHandlerRequest - mu sync.RWMutex - onceDoWatcher sync.Once - callbackHandlerCancel context.CancelFunc + // objRefClientGauge *prometheus.GaugeVec + revokeOnEvict bool + pruneStorageOnEvict bool + ctrlClient ctrlclient.Client + clientCallbacks []ClientCallbackHandler + callbackHandlerCh chan *ClientCallbackHandlerRequest + mu sync.RWMutex + onceDoWatcher sync.Once + callbackHandlerCancel context.CancelFunc // clientLocksLock is a lock for the clientLocks map. clientLocksLock sync.RWMutex // clientLocks is a map of cache keys to locks that allow for concurrent access @@ -150,34 +151,36 @@ type cachingClientFactory struct { // orphanPrunerClientCh is a channel that is used to handle Clients that no longer // have any object references. orphanPrunerClientCh chan Client - // cacheKeyReferences is a map of cache keys to ClientCacheKey. It is used to track - cacheKeyReferences map[types.UID]ClientCacheKey } // UnregisterObjectRef removes the reference to the object with the specified // UID. This is used to remove the reference to the object when it is deleted. // All controllers should call this method if they are using the caching client // factory. -func (m *cachingClientFactory) UnregisterObjectRef(uid types.UID) { +func (m *cachingClientFactory) UnregisterObjectRef(ctx context.Context, o ctrlclient.Object) error { m.mu.Lock() defer m.mu.Unlock() logger := log.FromContext(context.Background()).WithName("UnregisterObjectRef").WithValues( - "uid", uid, + "uid", o.GetUID(), ) - logger.V(consts.LogLevelDebug).Info("Unregistering client reference") - key, ok := m.cacheKeyReferences[uid] - if !ok { - return + + vaultClientMeta, err := GetVaultClientMeta(ctx, o) + if err != nil { + return err } - if c, ok := m.cache.Get(key); ok { + + cacheKey := ClientCacheKey(vaultClientMeta.CacheKey) + logger.V(consts.LogLevelDebug).Info("Unregistering client reference", + "vaultClientMeta", vaultClientMeta) + if c, ok := m.cache.Get(cacheKey); ok && c.Stat() != nil { c.Stat().DecRefCount() logger.V(consts.LogLevelDebug).Info("Writing to orphanPrunerClientCh", - "id", c.ID(), "cacheKey", key, c.Stat().RefCount(), + "id", c.ID(), "cacheKey", cacheKey, "refCount", c.Stat().RefCount(), ) m.orphanPrunerClientCh <- c } - logger.V(consts.LogLevelDebug).Info("Removing cache ref", "cacheKey", key) - delete(m.cacheKeyReferences, uid) + + return nil } // Start method for cachingClientFactory starts the lifetime watcher handler. @@ -187,7 +190,6 @@ func (m *cachingClientFactory) Start(ctx context.Context) { m.onceDoWatcher.Do(func() { m.startClientCallbackHandler(ctx) m.startOrphanClientPruner(ctx) - m.cacheKeyReferences = make(map[types.UID]ClientCacheKey) }) } @@ -293,12 +295,6 @@ func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlcli m.clientCacheKeyEncrypt = "" } - for k, v := range m.cacheKeyReferences { - if v == cacheKey { - delete(m.cacheKeyReferences, k) - } - } - if m.storageEnabled() && m.pruneStorageOnEvict { if count, err := m.pruneStorage(ctx, client, cacheKey); err != nil { logger.Error(err, "Failed to remove Client from storage") @@ -407,6 +403,7 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client var cacheKey ClientCacheKey var errs error var tainted bool + var lastVaultClientMeta *secretsv1beta1.VaultClientMeta defer func() { m.incrementRequestCounter(metrics.OperationGet, errs) clientFactoryOperationTimes.WithLabelValues(subsystemClientFactory, metrics.OperationGet).Observe( @@ -422,38 +419,52 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client mt.Set(0) } - if errs != nil { - lastCacheKey, ok := m.cacheKeyReferences[obj.GetUID()] - if ok { - c, ok := m.cache.Get(lastCacheKey) - if ok { - c.Stat().DecRefCount() + var incrementReason string + var decrementReason string + var lastCacheKey ClientCacheKey + if lastVaultClientMeta != nil { + lastCacheKey = ClientCacheKey(lastVaultClientMeta.CacheKey) + logger.Info("Update client stats", "lastVaultClientMeta", lastVaultClientMeta) + switch { + case errs != nil: + decrementReason = "errorOnGet" + case lastCacheKey == "": + incrementReason = "newReference" + case lastCacheKey != cacheKey: + decrementReason = "cacheKeyChange" + incrementReason = "cacheKeyChange" + default: + if c, ok := m.cache.Get(cacheKey); ok && c.Stat().CreatedAt().Unix() != lastVaultClientMeta.CreatedAt.Unix() { + incrementReason = "createdAtChange" } - delete(m.cacheKeyReferences, obj.GetUID()) } - } else { - var increment bool - lastCacheKey, ok := m.cacheKeyReferences[obj.GetUID()] - if ok { - if lastCacheKey != cacheKey { - c, ok := m.cache.Get(lastCacheKey) - if ok { - c.Stat().DecRefCount() - } - m.orphanPrunerClientCh <- c - increment = true - } - } else { - increment = true + } + + if decrementReason != "" && lastCacheKey != "" { + lck, _ := m.clientLock(lastCacheKey) + lck.Lock() + defer lck.Unlock() + if c, ok := m.cache.Get(lastCacheKey); ok { + c.Stat().DecRefCount() + logger.Info("Decrement ref count on err", + "lastVaultClientMeta", lastVaultClientMeta, + "refCount", c.Stat().RefCount(), + "reason", decrementReason, + ) + // send the client to the cache pruner. + m.orphanPrunerClientCh <- c } + } - if increment { - c, ok := m.cache.Get(cacheKey) - if ok { - c.Stat().IncRefCount() - } + if incrementReason != "" { + if c, ok := m.cache.Get(cacheKey); ok { + c.Stat().IncRefCount() + logger.Info("Increment ref count", + "lastVaultClientMeta", lastVaultClientMeta, "refCount", c.Stat().RefCount(), + "createdAt", c.Stat().CreatedAt(), + "reason", incrementReason, + ) } - m.cacheKeyReferences[obj.GetUID()] = cacheKey } }() @@ -473,6 +484,12 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client lock.Lock() defer lock.Unlock() + lastVaultClientMeta, err = GetVaultClientMeta(ctx, obj) + if err != nil { + errs = errors.Join(errs, err) + return nil, errs + } + logger = logger.WithValues("cacheKey", cacheKey) logger.V(consts.LogLevelDebug).Info("Got lock", "numLocks", len(m.clientLocks), @@ -911,6 +928,11 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { logger.Info("Client callback handler channel closed") return } + + if c.Stat() == nil { + continue + } + if c.Stat().RefCount() == 0 { cacheKey, err := c.GetCacheKey() if err != nil { @@ -1169,3 +1191,16 @@ func GetGlobalVaultCacheKeys(ctx context.Context, client ctrlclient.Client) (map return currentClientCacheKeys, nil } + +func GetVaultClientMeta(ctx context.Context, o ctrlclient.Object) (*secretsv1beta1.VaultClientMeta, error) { + switch t := o.(type) { + case *secretsv1beta1.VaultStaticSecret: + return &t.Status.VaultClientMeta, nil + case *secretsv1beta1.VaultPKISecret: + return &t.Status.VaultClientMeta, nil + case *secretsv1beta1.VaultDynamicSecret: + return &t.Status.VaultClientMeta, nil + default: + return nil, fmt.Errorf("vault client meta not found for type %T", t) + } +} diff --git a/internal/vault/client_factory_test.go b/internal/vault/client_factory_test.go index 8b943e4fa..d71259091 100644 --- a/internal/vault/client_factory_test.go +++ b/internal/vault/client_factory_test.go @@ -333,7 +333,7 @@ func Test_cachingClientFactory_pruneOrphanClients(t *testing.T) { c.cache.Add(k.key, &stubClient{ cacheKey: k.key, clientStat: &clientStat{ - createTime: time.Now().Add(k.creationTimeOffset), + createdAt: time.Now().Add(k.creationTimeOffset), }, }) } diff --git a/test/integration/modules/vso-helm/main.tf b/test/integration/modules/vso-helm/main.tf index cb3fe063d..5caefcda1 100644 --- a/test/integration/modules/vso-helm/main.tf +++ b/test/integration/modules/vso-helm/main.tf @@ -57,6 +57,14 @@ resource "helm_release" "vault-secrets-operator" { name = "controller.manager.image.tag" value = var.operator_image_tag } + set { + name = "controller.manager.resources.limits.cpu" + value = "1000m" + } + set { + name = "controller.manager.resources.limits.memory" + value = "512Mi" + } set { name = "controller.manager.clientCache.persistenceModel" value = var.client_cache_config.persistence_model diff --git a/test/integration/vaultdynamicsecret_integration_test.go b/test/integration/vaultdynamicsecret_integration_test.go index ff6e41214..f6e5c6ccc 100644 --- a/test/integration/vaultdynamicsecret_integration_test.go +++ b/test/integration/vaultdynamicsecret_integration_test.go @@ -756,7 +756,7 @@ func TestVaultDynamicSecret_vaultClientCallback(t *testing.T) { }, { name: "create-only-vault-auth-update", - create: 25, + create: 1, triggerFunc: func(t *testing.T, reconciledObjs []*secretsv1beta1.VaultDynamicSecret) { t.Helper() for _, obj := range reconciledObjs { From fe690758b3829920314849ddcbad9256c23aa946 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Tue, 11 Jun 2024 15:30:07 -0400 Subject: [PATCH 5/7] Rename createdAt to creationTimestamp --- api/v1beta1/common.go | 4 ++-- api/v1beta1/zz_generated.deepcopy.go | 2 +- ...ecrets.hashicorp.com_vaultdynamicsecrets.yaml | 4 ++-- .../secrets.hashicorp.com_vaultpkisecrets.yaml | 4 ++-- ...secrets.hashicorp.com_vaultstaticsecrets.yaml | 4 ++-- ...ecrets.hashicorp.com_vaultdynamicsecrets.yaml | 4 ++-- .../secrets.hashicorp.com_vaultpkisecrets.yaml | 4 ++-- ...secrets.hashicorp.com_vaultstaticsecrets.yaml | 4 ++-- controllers/vaultdynamicsecret_controller.go | 2 +- controllers/vaultpkisecret_controller.go | 2 +- controllers/vaultstaticsecret_controller.go | 2 +- docs/api/api-reference.md | 2 +- internal/vault/client.go | 16 ++++++++-------- internal/vault/client_factory.go | 4 ++-- internal/vault/client_factory_test.go | 2 +- 15 files changed, 30 insertions(+), 30 deletions(-) diff --git a/api/v1beta1/common.go b/api/v1beta1/common.go index f09bedd66..4628f689c 100644 --- a/api/v1beta1/common.go +++ b/api/v1beta1/common.go @@ -124,6 +124,6 @@ type VaultClientMeta struct { // ID is the Vault ID of the authenticated client. The ID should never contain // any sensitive information. ID string `json:"id,omitempty"` - // CreatedAt is the time the client was created. - CreatedAt metav1.Time `json:"createdAt,omitempty"` + // CreationTimestamp is the time the client was created. + CreationTimestamp metav1.Time `json:"creationTimestamp,omitempty"` } diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 45bed4af0..663cbf292 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -1052,7 +1052,7 @@ func (in *VaultAuthStatus) DeepCopy() *VaultAuthStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VaultClientMeta) DeepCopyInto(out *VaultClientMeta) { *out = *in - in.CreatedAt.DeepCopyInto(&out.CreatedAt) + in.CreationTimestamp.DeepCopyInto(&out.CreationTimestamp) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VaultClientMeta. diff --git a/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml index 54091b4d3..44775144d 100644 --- a/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultdynamicsecrets.yaml @@ -393,8 +393,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml index 261964d4d..502b2e9ec 100644 --- a/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -383,8 +383,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml index 668ebd834..1afacb4b2 100644 --- a/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/chart/crds/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -304,8 +304,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml index 54091b4d3..44775144d 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultdynamicsecrets.yaml @@ -393,8 +393,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml index 261964d4d..502b2e9ec 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultpkisecrets.yaml @@ -383,8 +383,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml index 668ebd834..1afacb4b2 100644 --- a/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml +++ b/config/crd/bases/secrets.hashicorp.com_vaultstaticsecrets.yaml @@ -304,8 +304,8 @@ spec: description: CacheKey is the unique key used to identify the client cache. type: string - createdAt: - description: CreatedAt is the time the client was created. + creationTimestamp: + description: CreationTimestamp is the time the client was created. format: date-time type: string id: diff --git a/controllers/vaultdynamicsecret_controller.go b/controllers/vaultdynamicsecret_controller.go index 0c88f2f3c..bc60327b5 100644 --- a/controllers/vaultdynamicsecret_controller.go +++ b/controllers/vaultdynamicsecret_controller.go @@ -150,7 +150,7 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R // update the VaultClientMeta in the resource's status. o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() o.Status.VaultClientMeta.ID = c.ID() - o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) + o.Status.VaultClientMeta.CreationTimestamp = metav1.NewTime(c.Stat().CreationTimestamp()) var syncReason string // doSync indicates that the controller should perform the secret sync, diff --git a/controllers/vaultpkisecret_controller.go b/controllers/vaultpkisecret_controller.go index 50460cb45..0abad949f 100644 --- a/controllers/vaultpkisecret_controller.go +++ b/controllers/vaultpkisecret_controller.go @@ -178,7 +178,7 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque clientCacheKey, _ := c.GetCacheKey() o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() o.Status.VaultClientMeta.ID = c.ID() - o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) + o.Status.VaultClientMeta.CreationTimestamp = metav1.NewTime(c.Stat().CreationTimestamp()) resp, err := c.Write(ctx, vault.NewWriteRequest(path, o.GetIssuerAPIData())) if err != nil { diff --git a/controllers/vaultstaticsecret_controller.go b/controllers/vaultstaticsecret_controller.go index fad11a174..3caa2e223 100644 --- a/controllers/vaultstaticsecret_controller.go +++ b/controllers/vaultstaticsecret_controller.go @@ -115,7 +115,7 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re clientCacheKey, _ := c.GetCacheKey() o.Status.VaultClientMeta.CacheKey = clientCacheKey.String() o.Status.VaultClientMeta.ID = c.ID() - o.Status.VaultClientMeta.CreatedAt = metav1.NewTime(c.Stat().CreatedAt()) + o.Status.VaultClientMeta.CreationTimestamp = metav1.NewTime(c.Stat().CreationTimestamp()) resp, err := c.Read(ctx, kvReq) if err != nil { diff --git a/docs/api/api-reference.md b/docs/api/api-reference.md index bee8c1f0d..566ffd335 100644 --- a/docs/api/api-reference.md +++ b/docs/api/api-reference.md @@ -811,7 +811,7 @@ _Appears in:_ | --- | --- | --- | --- | | `cacheKey` _string_ | CacheKey is the unique key used to identify the client cache. | | | | `id` _string_ | ID is the Vault ID of the authenticated client. The ID should never contain
any sensitive information. | | | -| `createdAt` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#time-v1-meta)_ | CreatedAt is the time the client was created. | | | +| `creationTimestamp` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#time-v1-meta)_ | CreationTimestamp is the time the client was created. | | | #### VaultConnection diff --git a/internal/vault/client.go b/internal/vault/client.go index fcd59b4db..6884216b1 100644 --- a/internal/vault/client.go +++ b/internal/vault/client.go @@ -28,7 +28,7 @@ import ( type ClientStat interface { Age() time.Duration - CreatedAt() time.Time + CreationTimestamp() time.Time Reset() RefCount() int IncRefCount() @@ -38,32 +38,32 @@ type ClientStat interface { var _ ClientStat = (*clientStat)(nil) type clientStat struct { - // createdAt is the time the client was created. - createdAt time.Time + // creationTimestamp is the time the client was created. + creationTimestamp time.Time // refCount is the number of references to the client. refCount int mu sync.RWMutex } -// CreatedAt returns the time the client was created. -func (m *clientStat) CreatedAt() time.Time { +// CreationTimestamp returns the time the client was created. +func (m *clientStat) CreationTimestamp() time.Time { m.mu.RLock() defer m.mu.RUnlock() - return m.createdAt + return m.creationTimestamp } // Age returns the duration since the client was created. func (m *clientStat) Age() time.Duration { m.mu.RLock() defer m.mu.RUnlock() - return time.Since(m.createdAt) + return time.Since(m.creationTimestamp) } // Reset the client's creation time to the current time. func (m *clientStat) Reset() { m.mu.Lock() defer m.mu.Unlock() - m.createdAt = time.Now() + m.creationTimestamp = time.Now() m.refCount = 0 } diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index 5655ffa99..ae06a7ccf 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -434,7 +434,7 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client decrementReason = "cacheKeyChange" incrementReason = "cacheKeyChange" default: - if c, ok := m.cache.Get(cacheKey); ok && c.Stat().CreatedAt().Unix() != lastVaultClientMeta.CreatedAt.Unix() { + if c, ok := m.cache.Get(cacheKey); ok && c.Stat().CreationTimestamp().Unix() != lastVaultClientMeta.CreationTimestamp.Unix() { incrementReason = "createdAtChange" } } @@ -461,7 +461,7 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client c.Stat().IncRefCount() logger.Info("Increment ref count", "lastVaultClientMeta", lastVaultClientMeta, "refCount", c.Stat().RefCount(), - "createdAt", c.Stat().CreatedAt(), + "creationTimestamp", c.Stat().CreationTimestamp(), "reason", incrementReason, ) } diff --git a/internal/vault/client_factory_test.go b/internal/vault/client_factory_test.go index d71259091..40af8b6cd 100644 --- a/internal/vault/client_factory_test.go +++ b/internal/vault/client_factory_test.go @@ -333,7 +333,7 @@ func Test_cachingClientFactory_pruneOrphanClients(t *testing.T) { c.cache.Add(k.key, &stubClient{ cacheKey: k.key, clientStat: &clientStat{ - createdAt: time.Now().Add(k.creationTimeOffset), + creationTimestamp: time.Now().Add(k.creationTimeOffset), }, }) } From 21e5cb2070dda57c08d7723fd531c598923ca150 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Tue, 11 Jun 2024 22:22:08 +0000 Subject: [PATCH 6/7] Revert some bits --- internal/vault/client_factory.go | 17 ++++++++--------- test/integration/modules/vso-helm/main.tf | 8 -------- .../vaultdynamicsecret_integration_test.go | 2 +- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index ae06a7ccf..366dc7f5c 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -128,15 +128,14 @@ type cachingClientFactory struct { requestCounterVec *prometheus.CounterVec requestErrorCounterVec *prometheus.CounterVec taintedClientGauge *prometheus.GaugeVec - // objRefClientGauge *prometheus.GaugeVec - revokeOnEvict bool - pruneStorageOnEvict bool - ctrlClient ctrlclient.Client - clientCallbacks []ClientCallbackHandler - callbackHandlerCh chan *ClientCallbackHandlerRequest - mu sync.RWMutex - onceDoWatcher sync.Once - callbackHandlerCancel context.CancelFunc + revokeOnEvict bool + pruneStorageOnEvict bool + ctrlClient ctrlclient.Client + clientCallbacks []ClientCallbackHandler + callbackHandlerCh chan *ClientCallbackHandlerRequest + mu sync.RWMutex + onceDoWatcher sync.Once + callbackHandlerCancel context.CancelFunc // clientLocksLock is a lock for the clientLocks map. clientLocksLock sync.RWMutex // clientLocks is a map of cache keys to locks that allow for concurrent access diff --git a/test/integration/modules/vso-helm/main.tf b/test/integration/modules/vso-helm/main.tf index 5caefcda1..cb3fe063d 100644 --- a/test/integration/modules/vso-helm/main.tf +++ b/test/integration/modules/vso-helm/main.tf @@ -57,14 +57,6 @@ resource "helm_release" "vault-secrets-operator" { name = "controller.manager.image.tag" value = var.operator_image_tag } - set { - name = "controller.manager.resources.limits.cpu" - value = "1000m" - } - set { - name = "controller.manager.resources.limits.memory" - value = "512Mi" - } set { name = "controller.manager.clientCache.persistenceModel" value = var.client_cache_config.persistence_model diff --git a/test/integration/vaultdynamicsecret_integration_test.go b/test/integration/vaultdynamicsecret_integration_test.go index f6e5c6ccc..ff6e41214 100644 --- a/test/integration/vaultdynamicsecret_integration_test.go +++ b/test/integration/vaultdynamicsecret_integration_test.go @@ -756,7 +756,7 @@ func TestVaultDynamicSecret_vaultClientCallback(t *testing.T) { }, { name: "create-only-vault-auth-update", - create: 1, + create: 25, triggerFunc: func(t *testing.T, reconciledObjs []*secretsv1beta1.VaultDynamicSecret) { t.Helper() for _, obj := range reconciledObjs { From 3377c257dc343f73716125f7c27461902abefff6 Mon Sep 17 00:00:00 2001 From: Ben Ash Date: Wed, 12 Jun 2024 11:45:33 -0400 Subject: [PATCH 7/7] Extra refactoring remove periodic pruner controllers: log status update errors --- controllers/vaultdynamicsecret_controller.go | 8 +- controllers/vaultpkisecret_controller.go | 6 +- controllers/vaultstaticsecret_controller.go | 6 +- internal/vault/client.go | 15 +- internal/vault/client_factory.go | 242 ++++++------------- internal/vault/client_factory_test.go | 216 ----------------- 6 files changed, 106 insertions(+), 387 deletions(-) diff --git a/controllers/vaultdynamicsecret_controller.go b/controllers/vaultdynamicsecret_controller.go index bc60327b5..13906fd07 100644 --- a/controllers/vaultdynamicsecret_controller.go +++ b/controllers/vaultdynamicsecret_controller.go @@ -113,7 +113,7 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } logger.Error(err, "error getting resource from k8s", "obj", o) - return ctrl.Result{}, err + return ctrl.Result{RequeueAfter: requeueDurationOnError}, nil } if o.GetDeletionTimestamp() != nil { @@ -132,7 +132,11 @@ func (r *VaultDynamicSecretReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{RequeueAfter: requeueDurationOnError}, nil } - defer r.updateStatus(ctx, o) + defer func() { + if err := r.updateStatus(ctx, o); err != nil { + logger.Error(err, "Failed to update resource status") + } + }() c, err := r.ClientFactory.Get(ctx, r.Client, o) if err != nil { diff --git a/controllers/vaultpkisecret_controller.go b/controllers/vaultpkisecret_controller.go index 0abad949f..7b69136e4 100644 --- a/controllers/vaultpkisecret_controller.go +++ b/controllers/vaultpkisecret_controller.go @@ -160,7 +160,11 @@ func (r *VaultPKISecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque } } - defer r.updateStatus(ctx, o) + defer func() { + if err := r.updateStatus(ctx, o); err != nil { + logger.Error(err, "Failed to update resource status") + } + }() // assume that status is always invalid o.Status.Valid = false diff --git a/controllers/vaultstaticsecret_controller.go b/controllers/vaultstaticsecret_controller.go index 3caa2e223..42e8ffd08 100644 --- a/controllers/vaultstaticsecret_controller.go +++ b/controllers/vaultstaticsecret_controller.go @@ -102,7 +102,11 @@ func (r *VaultStaticSecretReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{RequeueAfter: computeHorizonWithJitter(requeueDurationOnError)}, nil } - defer r.updateStatus(ctx, o) + defer func() { + if err := r.updateStatus(ctx, o); err != nil { + logger.Error(err, "Failed to update resource status") + } + }() c, err := r.ClientFactory.Get(ctx, r.Client, o) if err != nil { diff --git a/internal/vault/client.go b/internal/vault/client.go index 6884216b1..08a20e432 100644 --- a/internal/vault/client.go +++ b/internal/vault/client.go @@ -31,8 +31,8 @@ type ClientStat interface { CreationTimestamp() time.Time Reset() RefCount() int - IncRefCount() - DecRefCount() + IncRefCount() int + DecRefCount() int } var _ ClientStat = (*clientStat)(nil) @@ -69,20 +69,27 @@ func (m *clientStat) Reset() { // IncRefCount increments the client's reference count. This is useful for // tracking the number of references to a client. -func (m *clientStat) IncRefCount() { +// Returns the previous reference count. +func (m *clientStat) IncRefCount() int { m.mu.Lock() defer m.mu.Unlock() + last := m.refCount m.refCount++ + + return last } // DecRefCount decrements the client's reference count. This is useful for // tracking the number of references to a client. -func (m *clientStat) DecRefCount() { +// Returns the previous reference count. +func (m *clientStat) DecRefCount() int { m.mu.Lock() defer m.mu.Unlock() + last := m.refCount if m.refCount > 0 { m.refCount-- } + return last } // RefCount returns the client's reference count. This is useful for tracking the diff --git a/internal/vault/client_factory.go b/internal/vault/client_factory.go index 366dc7f5c..5be1e43e2 100644 --- a/internal/vault/client_factory.go +++ b/internal/vault/client_factory.go @@ -40,10 +40,6 @@ const ( ClientCallbackOnCacheRemoval ) -// defaultPruneOrphanAge is the default age at which orphaned clients are -// eligible for pruning. -var defaultPruneOrphanAge = 1 * time.Minute - func (o ClientCallbackOn) String() string { switch o { case ClientCallbackOnLifetimeWatcherDone: @@ -159,11 +155,11 @@ type cachingClientFactory struct { func (m *cachingClientFactory) UnregisterObjectRef(ctx context.Context, o ctrlclient.Object) error { m.mu.Lock() defer m.mu.Unlock() - logger := log.FromContext(context.Background()).WithName("UnregisterObjectRef").WithValues( + logger := log.FromContext(ctx).WithName("UnregisterObjectRef").WithValues( "uid", o.GetUID(), ) - vaultClientMeta, err := GetVaultClientMeta(ctx, o) + vaultClientMeta, err := getVaultClientMeta(o) if err != nil { return err } @@ -172,9 +168,9 @@ func (m *cachingClientFactory) UnregisterObjectRef(ctx context.Context, o ctrlcl logger.V(consts.LogLevelDebug).Info("Unregistering client reference", "vaultClientMeta", vaultClientMeta) if c, ok := m.cache.Get(cacheKey); ok && c.Stat() != nil { - c.Stat().DecRefCount() + lastRefCount := c.Stat().DecRefCount() logger.V(consts.LogLevelDebug).Info("Writing to orphanPrunerClientCh", - "id", c.ID(), "cacheKey", cacheKey, "refCount", c.Stat().RefCount(), + "id", c.ID(), "cacheKey", cacheKey, "lastRefCount", lastRefCount, "refCount", c.Stat().RefCount(), ) m.orphanPrunerClientCh <- c } @@ -402,7 +398,7 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client var cacheKey ClientCacheKey var errs error var tainted bool - var lastVaultClientMeta *secretsv1beta1.VaultClientMeta + var clientMeta *secretsv1beta1.VaultClientMeta defer func() { m.incrementRequestCounter(metrics.OperationGet, errs) clientFactoryOperationTimes.WithLabelValues(subsystemClientFactory, metrics.OperationGet).Observe( @@ -418,52 +414,8 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client mt.Set(0) } - var incrementReason string - var decrementReason string - var lastCacheKey ClientCacheKey - if lastVaultClientMeta != nil { - lastCacheKey = ClientCacheKey(lastVaultClientMeta.CacheKey) - logger.Info("Update client stats", "lastVaultClientMeta", lastVaultClientMeta) - switch { - case errs != nil: - decrementReason = "errorOnGet" - case lastCacheKey == "": - incrementReason = "newReference" - case lastCacheKey != cacheKey: - decrementReason = "cacheKeyChange" - incrementReason = "cacheKeyChange" - default: - if c, ok := m.cache.Get(cacheKey); ok && c.Stat().CreationTimestamp().Unix() != lastVaultClientMeta.CreationTimestamp.Unix() { - incrementReason = "createdAtChange" - } - } - } - - if decrementReason != "" && lastCacheKey != "" { - lck, _ := m.clientLock(lastCacheKey) - lck.Lock() - defer lck.Unlock() - if c, ok := m.cache.Get(lastCacheKey); ok { - c.Stat().DecRefCount() - logger.Info("Decrement ref count on err", - "lastVaultClientMeta", lastVaultClientMeta, - "refCount", c.Stat().RefCount(), - "reason", decrementReason, - ) - // send the client to the cache pruner. - m.orphanPrunerClientCh <- c - } - } - - if incrementReason != "" { - if c, ok := m.cache.Get(cacheKey); ok { - c.Stat().IncRefCount() - logger.Info("Increment ref count", - "lastVaultClientMeta", lastVaultClientMeta, "refCount", c.Stat().RefCount(), - "creationTimestamp", c.Stat().CreationTimestamp(), - "reason", incrementReason, - ) - } + if clientMeta != nil { + m.updateClientStatsAfterGet(ctx, cacheKey, clientMeta, errs) } }() @@ -483,7 +435,7 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client lock.Lock() defer lock.Unlock() - lastVaultClientMeta, err = GetVaultClientMeta(ctx, obj) + clientMeta, err = getVaultClientMeta(obj) if err != nil { errs = errors.Join(errs, err) return nil, errs @@ -591,6 +543,71 @@ func (m *cachingClientFactory) Get(ctx context.Context, client ctrlclient.Client return c, errs } +func (m *cachingClientFactory) updateClientStatsAfterGet(ctx context.Context, cacheKey ClientCacheKey, + clientMeta *secretsv1beta1.VaultClientMeta, errs error, +) { + logger := log.FromContext(ctx).WithName("updateClientStatsAfterGet").WithValues( + "cacheKey", cacheKey, + "clientMeta", clientMeta, + ) + if clientMeta == nil { + logger.V(consts.LogLevelTrace).Info("Skipping status update, client meta is nil") + return + } + + lastCacheKey := ClientCacheKey(clientMeta.CacheKey) + var incrementReason string + var decrementReason string + logger.Info("Update client stats", "clientMeta", clientMeta) + switch { + // previous get errors + case errs != nil: + decrementReason = "errorOnGet" + case lastCacheKey == "" && cacheKey != "": + incrementReason = "newReference" + case lastCacheKey != cacheKey: + decrementReason = "cacheKeyChange" + incrementReason = "cacheKeyChange" + default: + if !clientMeta.CreationTimestamp.IsZero() { + c, ok := m.cache.Get(cacheKey) + if ok && c.Stat().CreationTimestamp().Unix() != clientMeta.CreationTimestamp.Unix() { + incrementReason = "creationTimestampChange" + } + } + } + if decrementReason == "" && incrementReason == "" { + logger.V(consts.LogLevelTrace).Info("Skipping ref count update, not required") + return + } + + if decrementReason != "" && lastCacheKey != "" { + if c, ok := m.cache.Get(lastCacheKey); ok { + lastRefCount := c.Stat().DecRefCount() + logger.V(consts.LogLevelDebug).Info("Decrement ref count on err", + "lastRefCount", lastRefCount, + "refCount", c.Stat().RefCount(), + "creationTimestamp", c.Stat().CreationTimestamp(), + "reason", decrementReason, + ) + // send the client to the cache pruner. + m.orphanPrunerClientCh <- c + } + } + + if incrementReason != "" { + if c, ok := m.cache.Get(cacheKey); ok { + lastRefCount := c.Stat().IncRefCount() + logger.V(consts.LogLevelDebug).Info("Increment ref count", + "lastRefCount", lastRefCount, + "refCount", c.Stat().RefCount(), + "creationTimestamp", c.Stat().CreationTimestamp(), + "reason", incrementReason, + ) + } + } +} + func (m *cachingClientFactory) storeClient(ctx context.Context, client ctrlclient.Client, c Client) error { var errs error defer func() { @@ -910,7 +927,6 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { ctx_, cancel := context.WithCancel(ctx) m.orphanPrunerCancel = cancel // TODO: make period a command line option - ticker := time.NewTicker(30 * time.Minute) go func() { defer func() { close(m.orphanPrunerClientCh) @@ -932,7 +948,7 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { continue } - if c.Stat().RefCount() == 0 { + if c.Stat().RefCount() <= 0 { cacheKey, err := c.GetCacheKey() if err != nil { logger.Error(err, "Prune orphan Vault Clients", "trigger", "wakeup") @@ -941,74 +957,11 @@ func (m *cachingClientFactory) startOrphanClientPruner(ctx context.Context) { m.cache.Remove(cacheKey) } } - case <-ticker.C: - if count, err := m.pruneOrphanClients(ctx); err != nil { - logger.Error(err, "Prune orphan Vault Clients", "trigger", "tick") - } else { - logger.Info("Prune orphan Vault Clients", "count", count, "trigger", "tick") - } } } }() } -// pruneOrphanClients will remove all clients from the cache that are not -// associated with any of the following custom resources: -// secretsv1beta1.VaultStaticSecret, secretsv1beta1.VaultPKISecret, -// secretsv1beta1.VaultDynamicSecret. -func (m *cachingClientFactory) pruneOrphanClients(ctx context.Context) (int, error) { - m.mu.Lock() - defer m.mu.Unlock() - - logger := m.logger.WithName("pruneOrphanClients") - - currentClientCacheKeys, err := GetGlobalVaultCacheKeys(ctx, m.ctrlClient) - if err != nil { - return 0, err - } - - var toPrune []ClientCacheKey - for _, c := range m.cache.Values() { - key, err := c.GetCacheKey() - if err != nil { - continue - } - - if _, ok := currentClientCacheKeys[key]; !ok { - if key == m.clientCacheKeyEncrypt { - continue - } - stat := c.Stat() - if stat == nil { - continue - } - // prune clients that have not been created in the last 5 minutes, this gives - // time for any referring resource to update their - // .status.vaultClientMeta.cacheKey - if stat.Age() >= defaultPruneOrphanAge { - toPrune = append(toPrune, key) - } - } - } - - // TODO: ensure that this does not block forever... - var count int - wg := sync.WaitGroup{} - wg.Add(len(toPrune)) - for _, key := range toPrune { - count++ - go func() { - defer wg.Done() - m.cache.Remove(key) - }() - } - wg.Wait() - - logger.V(consts.LogLevelDebug).Info( - "Pruned orphaned clients", "count", count, "pruned", toPrune) - return count, nil -} - // NewCachingClientFactory returns a CachingClientFactory with ClientCache initialized. // The ClientCache's onEvictCallback is registered with the factory's onClientEvict(), // to ensure any evictions are handled by the factory (this is very important). @@ -1150,48 +1103,11 @@ type nullEventRecorder struct { func (n *nullEventRecorder) Event(_ runtime.Object, _, _, _ string) {} -// GetGlobalVaultCacheKeys returns the current set of vault.ClientCacheKey(s) that are in -// use. -func GetGlobalVaultCacheKeys(ctx context.Context, client ctrlclient.Client) (map[ClientCacheKey]int, error) { - currentClientCacheKeys := map[ClientCacheKey]int{} - addCurrentClientCacheKeys := func(meta secretsv1beta1.VaultClientMeta) { - if meta.CacheKey != "" { - key := ClientCacheKey(meta.CacheKey) - currentClientCacheKeys[key] = currentClientCacheKeys[key] + 1 - } - } - - var vssList secretsv1beta1.VaultStaticSecretList - err := client.List(ctx, &vssList) - if err != nil { - return nil, err - } - - for _, o := range vssList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - var vpsList secretsv1beta1.VaultPKISecretList - err = client.List(ctx, &vpsList) - if err != nil { - return nil, err - } - for _, o := range vpsList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - - var vdsList secretsv1beta1.VaultDynamicSecretList - err = client.List(ctx, &vdsList) - if err != nil { - return nil, err - } - for _, o := range vdsList.Items { - addCurrentClientCacheKeys(o.Status.VaultClientMeta) - } - - return currentClientCacheKeys, nil -} - -func GetVaultClientMeta(ctx context.Context, o ctrlclient.Object) (*secretsv1beta1.VaultClientMeta, error) { +// getVaultClientMeta returns the VaultClientMeta for the provided Object. It +// supports these types: VaultStaticSecret, VaultPKISecret, VaultDynamicSecret. +// +// If o is not one of the supported types, an error is returned. +func getVaultClientMeta(o ctrlclient.Object) (*secretsv1beta1.VaultClientMeta, error) { switch t := o.(type) { case *secretsv1beta1.VaultStaticSecret: return &t.Status.VaultClientMeta, nil diff --git a/internal/vault/client_factory_test.go b/internal/vault/client_factory_test.go index 40af8b6cd..41d211018 100644 --- a/internal/vault/client_factory_test.go +++ b/internal/vault/client_factory_test.go @@ -5,20 +5,15 @@ package vault import ( "context" - "errors" - "fmt" "sync" "testing" "time" - lru "github.com/hashicorp/golang-lru/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" secretsv1beta1 "github.com/hashicorp/vault-secrets-operator/api/v1beta1" @@ -313,217 +308,6 @@ func Test_cachingClientFactory_callClientCallbacks(t *testing.T) { } } -func Test_cachingClientFactory_pruneOrphanClients(t *testing.T) { - t.Parallel() - - ctx := context.Background() - - type keyTest struct { - key ClientCacheKey - creationTimeOffset time.Duration - } - - newCache := func(size int, keyTests ...keyTest) *clientCache { - lruCache, err := lru.NewWithEvict[ClientCacheKey, Client](size, nil) - require.NoError(t, err) - c := &clientCache{ - cache: lruCache, - } - for _, k := range keyTests { - c.cache.Add(k.key, &stubClient{ - cacheKey: k.key, - clientStat: &clientStat{ - creationTimestamp: time.Now().Add(k.creationTimeOffset), - }, - }) - } - return c - } - - clientBuilder := newClientBuilder() - schemeLessBuilder := fake.NewClientBuilder() - tests := []struct { - name string - cache ClientCache - c ctrlclient.Client - want int - createFunc func(t *testing.T, c ctrlclient.Client) error - wantClientCacheKeys []ClientCacheKey - wantErr assert.ErrorAssertionFunc - }{ - { - name: "empty-cache", - c: clientBuilder.Build(), - cache: newCache(1), - wantErr: assert.NoError, - }, - { - name: "no-referring-objects-purge", - c: clientBuilder.Build(), - cache: newCache(1, - keyTest{ - key: "kubernetes-123456", - creationTimeOffset: -defaultPruneOrphanAge, - }, - ), - want: 1, - wantErr: assert.NoError, - }, - { - name: "prune-some", - c: clientBuilder.Build(), - createFunc: func(t *testing.T, c ctrlclient.Client) error { - t.Helper() - var errs error - for _, o := range []ctrlclient.Object{ - &secretsv1beta1.VaultStaticSecret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vss-1", - Namespace: "default", - }, - Status: secretsv1beta1.VaultStaticSecretStatus{ - VaultClientMeta: secretsv1beta1.VaultClientMeta{ - CacheKey: "kubernetes-123456", - }, - }, - }, - } { - errs = errors.Join(errs, c.Create(ctx, o)) - } - return errs - }, - cache: newCache(2, - keyTest{ - key: "kubernetes-123455", - creationTimeOffset: -defaultPruneOrphanAge, - }, - keyTest{ - key: "kubernetes-123456", - creationTimeOffset: -defaultPruneOrphanAge, - }, - ), - wantClientCacheKeys: []ClientCacheKey{ - ClientCacheKey("kubernetes-123456"), - }, - want: 1, - wantErr: assert.NoError, - }, - { - name: "none", - c: clientBuilder.Build(), - cache: newCache(1, - keyTest{ - key: "kubernetes-123456", - creationTimeOffset: -defaultPruneOrphanAge, - }, - ), - createFunc: func(t *testing.T, c ctrlclient.Client) error { - t.Helper() - var errs error - for _, o := range []ctrlclient.Object{ - &secretsv1beta1.VaultStaticSecret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vss-1", - Namespace: "default", - }, - Status: secretsv1beta1.VaultStaticSecretStatus{ - VaultClientMeta: secretsv1beta1.VaultClientMeta{ - CacheKey: "kubernetes-123456", - }, - }, - }, - &secretsv1beta1.VaultPKISecret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vps-1", - Namespace: "default", - }, - Status: secretsv1beta1.VaultPKISecretStatus{ - VaultClientMeta: secretsv1beta1.VaultClientMeta{ - CacheKey: "kubernetes-123456", - }, - }, - }, - &secretsv1beta1.VaultDynamicSecret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "vds-1", - Namespace: "default", - }, - Status: secretsv1beta1.VaultDynamicSecretStatus{ - VaultClientMeta: secretsv1beta1.VaultClientMeta{ - CacheKey: "kubernetes-123456", - }, - }, - }, - } { - errs = errors.Join(errs, c.Create(ctx, o)) - } - return errs - }, - wantErr: assert.NoError, - wantClientCacheKeys: []ClientCacheKey{ - ClientCacheKey("kubernetes-123456"), - }, - want: 0, - }, - { - name: "no-prune-recent", - c: clientBuilder.Build(), - cache: newCache(2, - keyTest{ - key: "kubernetes-123455", - creationTimeOffset: -(defaultPruneOrphanAge - time.Second*1), - }, - keyTest{ - key: "kubernetes-123456", - creationTimeOffset: -(defaultPruneOrphanAge - time.Second*1), - }, - ), - wantClientCacheKeys: []ClientCacheKey{ - ClientCacheKey("kubernetes-123455"), - ClientCacheKey("kubernetes-123456"), - }, - want: 0, - wantErr: assert.NoError, - }, - { - name: "vss-scheme-not-set", - c: schemeLessBuilder.Build(), - cache: newCache(1, - keyTest{ - key: "kubernetes-123456", - creationTimeOffset: -defaultPruneOrphanAge, - }, - ), - want: 0, - wantClientCacheKeys: []ClientCacheKey{ - ClientCacheKey("kubernetes-123456"), - }, - wantErr: func(t assert.TestingT, err error, msgAndArgs ...interface{}) bool { - return assert.ErrorContains(t, err, "no kind is registered for the type v1beta1.VaultStaticSecret") - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - m := &cachingClientFactory{ - cache: tt.cache, - ctrlClient: tt.c, - } - - if tt.createFunc != nil { - require.NoError(t, tt.createFunc(t, tt.c)) - } - - got, err := m.pruneOrphanClients(ctx) - if !tt.wantErr(t, err, fmt.Sprintf("pruneOrphanClients(%v)", ctx)) { - return - } - assert.Equalf(t, tt.want, got, "pruneOrphanClients(%v)", ctx) - assert.ElementsMatchf(t, tt.wantClientCacheKeys, m.cache.Keys(), "pruneOrphanClients(%v)", ctx) - }) - } -} - // newClientBuilder returns a new fake.ClientBuilder with the necessary schemes. // copied from helpers func newClientBuilder() *fake.ClientBuilder {