Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT REVIEW] Automated Testing #611

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile-az.mk
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
AZURE_LOCATION ?= westus2
COMMON_NAME ?= karpenter
AZURE_LOCATION ?= eastus
COMMON_NAME ?= karpe
ifeq ($(CODESPACES),true)
AZURE_RESOURCE_GROUP ?= $(CODESPACE_NAME)
AZURE_ACR_NAME ?= $(subst -,,$(CODESPACE_NAME))
Expand All @@ -19,7 +19,6 @@ CUSTOM_VNET_NAME ?= $(AZURE_CLUSTER_NAME)-vnet
CUSTOM_SUBNET_NAME ?= nodesubnet

az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload

az-all-cniv1: az-login az-create-workload-msi az-mkaks-cniv1 az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload

az-all-cni-overlay: az-login az-create-workload-msi az-mkaks-overlay az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload
Expand All @@ -46,7 +45,12 @@ az-mkacr: az-mkrg ## Create test ACR
az-acrimport: ## Imports an image to an acr registry
az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6"

az-rmpolicy:
az aks disable-addons --addons azure-policy --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP)


az-cleanenv: az-rmnodeclaims-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses)
kubectl delete deployments -n default --all
kubectl delete pods -n default --all
kubectl delete nodeclaims --all
kubectl delete nodepools --all
Expand Down Expand Up @@ -252,7 +256,7 @@ az-taintnodes:

az-e2etests: ## Run e2etests
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite
TEST_SUITE=Utilization make e2etests
TEST_SUITE=Networking make e2etests
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule- --all

az-perftest1: ## Test scaling out/in (1 VM)
Expand Down Expand Up @@ -370,3 +374,8 @@ az-swagger-generate-clients-raw:
az-swagger-generate-clients: az-swagger-generate-clients-raw
hack/boilerplate.sh
make tidy

az-deploy-goldpinger: ## Deploy gold pinger for testing networking
kubectl apply -f https://gist.githubusercontent.com/paulgmiller/084bd4605f1661a329e5ab891a826ae0/raw/94a32d259e137bb300ac8af3ef71caa471463f23/goldpinger-daemon.yaml
kubectl apply -f https://gist.githubusercontent.com/paulgmiller/7bca68cd08cccb4e9bc72b0a08485edf/raw/d6a103fb79a65083f6555e4d822554ed64f510f8/goldpinger-deploy.yaml

278 changes: 278 additions & 0 deletions test/suites/networking/suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
package networking_test

import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"testing"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"

Check failure on line 22 in test/suites/networking/suite_test.go

View workflow job for this annotation

GitHub Actions / ci

SA1019: "io/ioutil" has been deprecated since Go 1.19: As of Go 1.16, the same functionality is now provided by package [io] or package [os], and those implementations should be preferred in new code. See the specific function documentation for details. (staticcheck)
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2"
"github.com/Azure/karpenter-provider-azure/test/pkg/environment/azure"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
)

var env *azure.Environment
var nodeClass *v1alpha2.AKSNodeClass
var nodePool *karpv1.NodePool
var ns string

func TestNetworking(t *testing.T) {
RegisterFailHandler(Fail)
BeforeSuite(func() {
env = azure.NewEnvironment(t)
ns = "default"
})
AfterSuite(func() {
By("Cleaning up Goldpinger resources")
// TODO: Move into env.Cleanup()
env.ExpectDeleted(
&corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-serviceaccount", Namespace: ns}},
&rbacv1.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-clusterrole"}},

Check failure on line 46 in test/suites/networking/suite_test.go

View workflow job for this annotation

GitHub Actions / ci

var `nodeClass` is unused (unused)
&rbacv1.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-clusterrolebinding"}},

Check failure on line 47 in test/suites/networking/suite_test.go

View workflow job for this annotation

GitHub Actions / ci

var `nodePool` is unused (unused)
&appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-daemon", Namespace: ns}},
&corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger", Namespace: ns}},
&appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-deploy", Namespace: ns}},
)
})

RunSpecs(t, "Networking")
}

var _ = BeforeEach(func() { env.BeforeEach() })
var _ = AfterEach(func() { env.Cleanup() })
var _ = AfterEach(func() { env.AfterEach() })

var _ = Describe("Networking", func() {
Describe("GoldPinger", func() {
It("should ensure goldpinger resources are all deployed", func() {
nodeClass := env.DefaultAKSNodeClass()
nodePool := env.DefaultNodePool(nodeClass)
env.ExpectCreated(nodeClass, nodePool)

By("should configure all k8s resources needed")
serviceAccount := createServiceAccount(ns)
clusterRole := createClusterRole()
clusterRoleBinding := createClusterRoleBinding(ns)
daemonSet := createDaemonSet(ns)
service := createService(ns)
deployment := createDeployment(ns)

env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment)
By("should scale up the goldpinger-deploy pods for pod to pod connectivity testing and to scale up karp nodes")

env.ExpectCreatedNodeCount("==", 10)
env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*15, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), 10)
By("should ensure gold pinger service has clusterIP assigned")
Eventually(func() string {
svc := &corev1.Service{}
err := env.Client.Get(context.TODO(), client.ObjectKey{Name: "goldpinger", Namespace: ns}, svc)
if err != nil {
return ""
}
return svc.Spec.ClusterIP
}, 2*time.Minute, 10*time.Second).ShouldNot(BeEmpty(), "Goldpinger service ClusterIP not assigned")
By("Fetching node connectivity status from Goldpinger")
resp, err := http.Get("http://goldpinger.default.svc.cluster.local:8080/check_all")
Expect(err).NotTo(HaveOccurred(), "Failed to reach Goldpinger service")
defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
Expect(err).NotTo(HaveOccurred(), "Failed to read Goldpinger response body")

var checkAllResponse CheckAllResponse
err = json.Unmarshal(body, &checkAllResponse)
Expect(err).NotTo(HaveOccurred(), "Failed to parse Goldpinger response JSON")

for node, status := range checkAllResponse.Nodes {
// This checks that all other nodes in the cluster can reach this node
Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node))
}
// TODO: Check pod stats to see if pod to pod communciation works
time.Sleep(time.Hour * 1)
})
})

})

// --------------------- Test Helpers ------------------------ //
type NodeStatus struct {
Status string `json:"status"`
Latency int `json:"latency"`
}

type CheckAllResponse struct {
Nodes map[string]NodeStatus `json:"nodes"` // For node-to-node connectivity
Pods map[string]map[string]NodeStatus `json:"pods"` // For pod-to-pod reachability
PacketLoss map[string]float64 `json:"packet_loss"` // For packet loss (if it occurred)
}

func createServiceAccount(namespace string) *corev1.ServiceAccount {
return &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger-serviceaccount",
Namespace: namespace,
},
}
}

func createClusterRole() *rbacv1.ClusterRole {
return &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger-clusterrole",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"pods", "nodes", "daemonsets"},
Verbs: []string{"list", "get", "watch"},
},
},
}
}

func createClusterRoleBinding(namespace string) *rbacv1.ClusterRoleBinding {
return &rbacv1.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger-clusterrolebinding",
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: "goldpinger-serviceaccount",
Namespace: namespace,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: "goldpinger-clusterrole",
},
}
}

func createDaemonSet(namespace string) *appsv1.DaemonSet {
return &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger-daemon",
Namespace: namespace,
Labels: map[string]string{"app": "goldpinger"},
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "goldpinger"},
},
UpdateStrategy: appsv1.DaemonSetUpdateStrategy{
Type: appsv1.RollingUpdateDaemonSetStrategyType,
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"app": "goldpinger"},
Annotations: map[string]string{
"prometheus.io/scrape": "true",
"prometheus.io/port": "8080",
},
},
Spec: corev1.PodSpec{
ServiceAccountName: "goldpinger-serviceaccount",
HostNetwork: true,
Containers: []corev1.Container{
{
Name: "goldpinger",
Image: "docker.io/bloomberg/goldpinger:v3.0.0",
Env: []corev1.EnvVar{
{Name: "USE_HOST_IP", Value: "true"},
{Name: "HOST", Value: "0.0.0.0"},
{Name: "PORT", Value: "8080"},
},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080, Name: "http"},
},
},
},
},
},
},
}
}

func createService(namespace string) *corev1.Service {
return &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger",
Namespace: namespace,
Labels: map[string]string{"app": "goldpinger"},
},
Spec: corev1.ServiceSpec{
Type: corev1.ServiceTypeNodePort,
Ports: []corev1.ServicePort{
{
Port: 8080,
TargetPort: intstr.FromInt(8080),
Name: "http",
},
},
Selector: map[string]string{"app": "goldpinger"},
},
}
}

func createDeployment(namespace string) *appsv1.Deployment {
return &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "goldpinger-deploy",
Namespace: namespace,
Labels: map[string]string{"app": "goldpinger"},
},
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](10),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "goldpinger"},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"app": "goldpinger"},
},
Spec: corev1.PodSpec{
// We want to validate node to node communication so we need to spread the deployment between many karpenter nodes
TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "goldpinger"},
},
},
},
// TODO: Contribute ServiceAccountName and Containers to the karpenter-core test.PodOptions
ServiceAccountName: "goldpinger-serviceaccount",
Containers: []corev1.Container{
{
Name: "goldpinger",
Image: "docker.io/bloomberg/goldpinger:v3.0.0",
Ports: []corev1.ContainerPort{
{ContainerPort: 8080, Name: "http"},
},
},
},
},
},
},
}
}
Loading