Skip to content

Commit

Permalink
chore: fix tests and update expectation helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
tallaxes committed Dec 3, 2024
1 parent 0a00360 commit ec6baf7
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 38 deletions.
6 changes: 4 additions & 2 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ var _ = BeforeSuite(func() {
fakeClock = clock.NewFakeClock(time.Now())
recorder = events.NewRecorder(&record.FakeRecorder{})
cloudProvider = New(azureEnv.InstanceTypesProvider, azureEnv.InstanceProvider, recorder, env.Client, azureEnv.ImageProvider)
cluster = state.NewCluster(fakeClock, env.Client)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock)
})

Expand Down Expand Up @@ -195,7 +195,9 @@ var _ = Describe("CloudProvider", func() {
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/nodeclaim/garbagecollection/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ var _ = BeforeSuite(func() {
cloudProvider = cloudprovider.New(azureEnv.InstanceTypesProvider, azureEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, azureEnv.ImageProvider)
garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider)
fakeClock = &clock.FakeClock{}
cluster = state.NewCluster(fakeClock, env.Client)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock)
})

Expand Down
6 changes: 4 additions & 2 deletions pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func TestAzure(t *testing.T) {
cloudProvider = cloudprovider.New(azureEnv.InstanceTypesProvider, azureEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, azureEnv.ImageProvider)
cloudProviderNonZonal = cloudprovider.New(azureEnvNonZonal.InstanceTypesProvider, azureEnvNonZonal.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, azureEnvNonZonal.ImageProvider)
fakeClock = &clock.FakeClock{}
cluster = state.NewCluster(fakeClock, env.Client)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
coreProvisioner = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock)
RunSpecs(t, "Provider/Azure")
}
Expand Down Expand Up @@ -119,7 +119,9 @@ var _ = Describe("InstanceProvider", func() {
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand Down
31 changes: 23 additions & 8 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"strings"
"testing"

"github.com/awslabs/operatorpkg/object"
"github.com/awslabs/operatorpkg/status"
"github.com/blang/semver/v4"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -93,8 +94,8 @@ func TestAzure(t *testing.T) {
cloudProvider = cloudprovider.New(azureEnv.InstanceTypesProvider, azureEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, azureEnv.ImageProvider)
cloudProviderNonZonal = cloudprovider.New(azureEnvNonZonal.InstanceTypesProvider, azureEnvNonZonal.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, azureEnvNonZonal.ImageProvider)

cluster = state.NewCluster(fakeClock, env.Client)
clusterNonZonal = state.NewCluster(fakeClock, env.Client)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
clusterNonZonal = state.NewCluster(fakeClock, env.Client, cloudProviderNonZonal)
coreProvisioner = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock)
coreProvisionerNonZonal = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProviderNonZonal, clusterNonZonal, fakeClock)

Expand All @@ -121,7 +122,9 @@ var _ = Describe("InstanceType Provider", func() {
Template: karpv1.NodeClaimTemplate{
Spec: karpv1.NodeClaimTemplateSpec{
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
},
Expand Down Expand Up @@ -313,7 +316,9 @@ var _ = Describe("InstanceType Provider", func() {
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
},
},
})
Expand Down Expand Up @@ -382,7 +387,9 @@ var _ = Describe("InstanceType Provider", func() {
Values: []string{"Standard_D64s_v3"},
}})
np.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
}

ExpectApplied(ctx, env.Client, np, nodeClass)
Expand Down Expand Up @@ -413,7 +420,9 @@ var _ = Describe("InstanceType Provider", func() {
Values: []string{"Standard_D64s_v3"},
}})
np.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
}

ExpectApplied(ctx, env.Client, np, nodeClass)
Expand All @@ -440,7 +449,9 @@ var _ = Describe("InstanceType Provider", func() {
Values: []string{"Standard_D2s_v3"},
}})
np.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
}

ExpectApplied(ctx, env.Client, np, nodeClass)
Expand Down Expand Up @@ -892,7 +903,11 @@ var _ = Describe("InstanceType Provider", func() {
Operator: v1.NodeSelectorOpIn,
Values: []string{instanceType},
}})
nodePool.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{Name: nodeClass.Name}
nodePool.Spec.Template.Spec.NodeClassRef = &karpv1.NodeClassReference{
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
pod := coretest.UnschedulablePod(coretest.PodOptions{})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod)
Expand Down
2 changes: 0 additions & 2 deletions pkg/test/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
corev1 "k8s.io/api/core/v1"

karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
karpv1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"

azurecache "github.com/Azure/karpenter-provider-azure/pkg/cache"
"github.com/Azure/karpenter-provider-azure/pkg/fake"
Expand All @@ -40,7 +39,6 @@ import (
)

func init() {
karpv1beta1.NormalizedLabels = lo.Assign(karpv1beta1.NormalizedLabels, map[string]string{"topology.disk.csi.azure.com/zone": corev1.LabelTopologyZone})
karpv1.NormalizedLabels = lo.Assign(karpv1.NormalizedLabels, map[string]string{"topology.disk.csi.azure.com/zone": corev1.LabelTopologyZone})
}

Expand Down
130 changes: 111 additions & 19 deletions test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ func (env *Environment) ExpectUpdated(objects ...client.Object) {
}
}

// ExpectStatusUpdated will update objects in the cluster to match the inputs.
// WARNING: This ignores the resource version check, which can result in
// overwriting changes made by other controllers in the cluster.
// This is useful in ensuring that we can clean up resources by patching
// out finalizers.
// Grab the object before making the updates to reduce the chance of this race.
func (env *Environment) ExpectStatusUpdated(objects ...client.Object) {
GinkgoHelper()
for _, o := range objects {
Eventually(func(g Gomega) {
current := o.DeepCopyObject().(client.Object)
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(current), current)).To(Succeed())
if current.GetResourceVersion() != o.GetResourceVersion() {
log.FromContext(env).Info(fmt.Sprintf("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme()))))
}
o.SetResourceVersion(current.GetResourceVersion())
g.Expect(env.Client.Status().Update(env.Context, o)).To(Succeed())
}).WithTimeout(time.Second * 10).Should(Succeed())
}
}

func ReplaceNodeConditions(node *corev1.Node, conds ...corev1.NodeCondition) *corev1.Node {
keys := sets.New[string](lo.Map(conds, func(c corev1.NodeCondition, _ int) string { return string(c.Type) })...)
node.Status.Conditions = lo.Reject(node.Status.Conditions, func(c corev1.NodeCondition, _ int) bool {
return keys.Has(string(c.Type))
})
node.Status.Conditions = append(node.Status.Conditions, conds...)
return node
}

// ExpectCreatedOrUpdated can update objects in the cluster to match the inputs.
// WARNING: ExpectUpdated ignores the resource version check, which can result in
// overwriting changes made by other controllers in the cluster.
Expand Down Expand Up @@ -272,6 +302,17 @@ func (env *Environment) EventuallyExpectTerminatingWithTimeout(timeout time.Dura
}).WithTimeout(timeout).Should(Succeed())
}

func (env *Environment) EventuallyExpectNoLeakedKubeNodeLease() {
GinkgoHelper()
// expect no kube node lease to be leaked
leases := &coordinationv1.LeaseList{}
Expect(env.Client.List(env.Context, leases, client.InNamespace("kube-node-lease"))).To(Succeed())
leakedLeases := lo.Filter(leases.Items, func(l coordinationv1.Lease, _ int) bool {
return l.OwnerReferences == nil
})
Expect(leakedLeases).To(HaveLen(0))
}

func (env *Environment) EventuallyExpectHealthyWithTimeout(timeout time.Duration, pods ...*corev1.Pod) {
GinkgoHelper()
Eventually(func(g Gomega) {
Expand All @@ -296,6 +337,17 @@ func (env *Environment) ConsistentlyExpectTerminatingPods(duration time.Duration
}, duration.String()).Should(Succeed())
}

func (env *Environment) ConsistentlyExpectActivePods(duration time.Duration, pods ...*corev1.Pod) {
GinkgoHelper()
By(fmt.Sprintf("expecting %d pods to be live for %s", len(pods), duration))
Consistently(func(g Gomega) {
for _, pod := range pods {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(pod), pod)).To(Succeed())
g.Expect(pod.DeletionTimestamp.IsZero()).To(BeTrue())
}
}, duration.String()).Should(Succeed())
}

func (env *Environment) ConsistentlyExpectHealthyPods(duration time.Duration, pods ...*corev1.Pod) {
GinkgoHelper()
By(fmt.Sprintf("expecting %d pods to be ready for %s", len(pods), duration))
Expand Down Expand Up @@ -462,16 +514,13 @@ func (env *Environment) eventuallyExpectScaleDown() {

func (env *Environment) EventuallyExpectNotFound(objects ...client.Object) {
GinkgoHelper()
env.EventuallyExpectNotFoundAssertion(objects...).Should(Succeed())
}

func (env *Environment) EventuallyExpectNotFoundAssertion(objects ...client.Object) AsyncAssertion {
return Eventually(func(g Gomega) {
Eventually(func(g Gomega) {
for _, object := range objects {
err := env.Client.Get(env, client.ObjectKeyFromObject(object), object)
g.Expect(errors.IsNotFound(err)).To(BeTrue())
}
})
}).Should(Succeed())
}

func (env *Environment) ExpectCreatedNodeCount(comparator string, count int) []*corev1.Node {
Expand Down Expand Up @@ -524,34 +573,77 @@ func (env *Environment) ConsistentlyExpectNodeCount(comparator string, count int
return lo.ToSlicePtr(nodeList.Items)
}

func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration time.Duration) (taintedNodes []*corev1.Node) {
// ConsistentlyExpectNoDisruptions asserts that the number of tainted nodes remains the same.
// And that the number of nodeclaims remains the same.
func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration time.Duration) {
GinkgoHelper()
return env.ConsistentlyExpectDisruptionsWithNodeCount(0, nodeCount, duration)
Consistently(func(g Gomega) {
nodeClaimList := &karpv1.NodeClaimList{}
g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeClaimList.Items).To(HaveLen(nodeCount))
nodeList := &corev1.NodeList{}
g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeList.Items).To(HaveLen(nodeCount))
nodeList.Items = lo.Filter(nodeList.Items, func(n corev1.Node, _ int) bool {
_, ok := lo.Find(n.Spec.Taints, func(t corev1.Taint) bool {
return t.MatchTaint(&karpv1.DisruptedNoScheduleTaint)
})
return ok
})
g.Expect(nodeList.Items).To(HaveLen(0))
}, duration).Should(Succeed())
}

// ConsistentlyExpectDisruptionsWithNodeCount will continually ensure that there are exactly disruptingNodes with totalNodes (including replacements and existing nodes)
func (env *Environment) ConsistentlyExpectDisruptionsWithNodeCount(disruptingNodes, totalNodes int, duration time.Duration) (taintedNodes []*corev1.Node) {
// ConsistentlyExpectDisruptionsUntilNoneLeft consistently ensures a max on number of concurrently disrupting and non-terminating nodes.
// This actually uses an Eventually() under the hood so that when we reach 0 tainted nodes we exit early.
// We use the StopTrying() so that we can exit the Eventually() if we've breached an assertion on total concurrency of disruptions.
// For example: if we have 5 nodes, with a budget of 2 nodes, we ensure that `disruptingNodes <= maxNodesDisrupting=2`
// We use nodesAtStart+maxNodesDisrupting to assert that we're not creating too many instances in replacement.
func (env *Environment) ConsistentlyExpectDisruptionsUntilNoneLeft(nodesAtStart, maxNodesDisrupting int, timeout time.Duration) {
GinkgoHelper()
nodes := []corev1.Node{}
Consistently(func(g Gomega) {
// Ensure we don't change our NodeClaims
// We use an eventually to exit when we detect the number of tainted/disrupted nodes matches our target.
Eventually(func(g Gomega) {
// Grab Nodes and NodeClaims
nodeClaimList := &karpv1.NodeClaimList{}
g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeClaimList.Items).To(HaveLen(totalNodes))

nodeList := &corev1.NodeList{}
g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeList.Items).To(HaveLen(totalNodes))

// Don't include NodeClaims with the `Terminating` status condition, as they're not included in budgets
removedProviderIDs := sets.Set[string]{}
nodeClaimList.Items = lo.Filter(nodeClaimList.Items, func(nc karpv1.NodeClaim, _ int) bool {
if !nc.StatusConditions().IsTrue(karpv1.ConditionTypeInstanceTerminating) {
return true
}
removedProviderIDs.Insert(nc.Status.ProviderID)
return false
})
if len(nodeClaimList.Items) > nodesAtStart+maxNodesDisrupting {
StopTrying(fmt.Sprintf("Too many nodeclaims created. Expected no more than %d, got %d", nodesAtStart+maxNodesDisrupting, len(nodeClaimList.Items))).Now()
}

// Don't include Nodes whose NodeClaims have been ignored
nodeList.Items = lo.Filter(nodeList.Items, func(n corev1.Node, _ int) bool {
return !removedProviderIDs.Has(n.Spec.ProviderID)
})
if len(nodeList.Items) > nodesAtStart+maxNodesDisrupting {
StopTrying(fmt.Sprintf("Too many nodes created. Expected no more than %d, got %d", nodesAtStart+maxNodesDisrupting, len(nodeList.Items))).Now()
}

// Filter further by the number of tainted nodes to get the number of nodes that are disrupting
nodes = lo.Filter(nodeList.Items, func(n corev1.Node, _ int) bool {
_, ok := lo.Find(n.Spec.Taints, func(t corev1.Taint) bool {
return karpv1.IsDisruptingTaint(t)
return t.MatchTaint(&karpv1.DisruptedNoScheduleTaint)
})
return ok
})
g.Expect(nodes).To(HaveLen(disruptingNodes))
}, duration).Should(Succeed())
return lo.ToSlicePtr(nodes)
if len(nodes) > maxNodesDisrupting {
StopTrying(fmt.Sprintf("Too many disruptions detected. Expected no more than %d, got %d", maxNodesDisrupting, len(nodeList.Items))).Now()
}

g.Expect(nodes).To(HaveLen(0))
}).WithTimeout(timeout).WithPolling(5 * time.Second).Should(Succeed())
}

func (env *Environment) EventuallyExpectTaintedNodeCount(comparator string, count int) []*corev1.Node {
Expand Down
17 changes: 13 additions & 4 deletions test/suites/nodeclaim/nodeclaim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package nodeclaim_test

import (
"github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2"
"github.com/awslabs/operatorpkg/object"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -46,7 +47,9 @@ var _ = Describe("StandaloneNodeClaim", func() {
}},
},
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand All @@ -66,7 +69,9 @@ var _ = Describe("StandaloneNodeClaim", func() {
},
},
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand Down Expand Up @@ -100,7 +105,9 @@ var _ = Describe("StandaloneNodeClaim", func() {
},
},
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand Down Expand Up @@ -139,7 +146,9 @@ var _ = Describe("StandaloneNodeClaim", func() {
}},
},
NodeClassRef: &karpv1.NodeClassReference{
Name: nodeClass.Name,
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
Expand Down

0 comments on commit ec6baf7

Please sign in to comment.