From 6fb5c0c3cc0dd8f4c6e43754c79ff3aa55195415 Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Wed, 16 Oct 2024 14:56:16 -0400 Subject: [PATCH 1/6] Add support for v6e --- tpu-provisioner/internal/cloud/gke.go | 5 ++++- tpu-provisioner/internal/cloud/gke_test.go | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index cba11e6b7..3d4ef5da3 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -42,6 +42,7 @@ const ( V4PodSliceAccelerator = "tpu-v4-podslice" V5ePodSliceAccelerator = "tpu-v5-lite-podslice" V5pPodSliceAccelerator = "tpu-v5p-slice" + V6eSliceAccelerator = "tpu-v6e-slice" // Resource type labels GoogleTPUResource = "google.com/tpu" @@ -438,7 +439,7 @@ func tpuTopologyToNodeCount(accelerator, topo string) (int, error) { switch accelerator { case V4PodSliceAccelerator, V5pPodSliceAccelerator: expectedDims = 3 - case V5ePodSliceAccelerator: + case V5ePodSliceAccelerator, V6eSliceAccelerator: expectedDims = 2 default: return 0, fmt.Errorf("invalid accelerator: %v", accelerator) @@ -475,6 +476,8 @@ func tpuMachineType(accel string, tpuRequest int) (string, error) { return fmt.Sprintf("ct5lp-hightpu-%vt", tpuRequest), nil case V5pPodSliceAccelerator: // v5p return fmt.Sprintf("ct5p-hightpu-%vt", tpuRequest), nil + case V6eSliceAccelerator: // v6e + return fmt.Sprintf("ct6e-standard-%vt", tpuRequest), nil } return "", fmt.Errorf("invalid accelerator: %v", accel) diff --git a/tpu-provisioner/internal/cloud/gke_test.go b/tpu-provisioner/internal/cloud/gke_test.go index 0b5adc5e0..fa68da354 100644 --- a/tpu-provisioner/internal/cloud/gke_test.go +++ b/tpu-provisioner/internal/cloud/gke_test.go @@ -67,6 +67,16 @@ func Test_tpuTopologyToNodeCount(t *testing.T) { topo: "not-a-topo", err: true, }, + { + accel: "tpu-v6e-slice", + topo: "16x16", + count: 64, + }, + { + accel: "tpu-v6e-slice", + topo: "1x1x1", + err: true, + }, } for _, c := range cases { From bafb427768d063a8fbde0eccbe0d921c961d64f5 Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Thu, 17 Oct 2024 17:03:51 -0400 Subject: [PATCH 2/6] Bump Go version --- tpu-provisioner/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpu-provisioner/go.mod b/tpu-provisioner/go.mod index e2f7f6bfe..3ba455b08 100644 --- a/tpu-provisioner/go.mod +++ b/tpu-provisioner/go.mod @@ -1,6 +1,6 @@ module github.com/GoogleCloudPlatform/ai-on-gke/tpu-provisioner -go 1.22.0 +go 1.23.0 require ( cloud.google.com/go/compute/metadata v0.3.0 From d79f9ae075fe9169aed0cd61afa147e25ca02449 Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Wed, 4 Dec 2024 17:05:02 -0500 Subject: [PATCH 3/6] Add configurable GCP SA and additional networks --- tpu-provisioner/internal/cloud/common.go | 5 ++++ tpu-provisioner/internal/cloud/gke.go | 33 +++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tpu-provisioner/internal/cloud/common.go b/tpu-provisioner/internal/cloud/common.go index 468b3c5d9..8ac790216 100644 --- a/tpu-provisioner/internal/cloud/common.go +++ b/tpu-provisioner/internal/cloud/common.go @@ -26,6 +26,11 @@ const ( // AnnotationCopyLabels is a comma-separated list of labels to copy from the Pod to the node pool config (Nodes). AnnotationCopyLabels = "tpu-provisioner.cloud.google.com/copy-labels" + // AnnotationAdditionalNodeNetworks is a comma-separated list of additional networks and subnets to attach to the node pool. + // Format: ":, ..." + AnnotationAdditionalNodeNetworks = "tpu-provisioner.cloud.google.com/additional-node-networks" + // AnnotatationServiceAccount is the GCP service account to use for the node pool. + AnnotationNodeServiceAccount = "tpu-provisioner.cloud.google.com/node-service-account" EventNodePoolCreationStarted = "NodePoolCreationStarted" EventNodePoolCreationSucceeded = "NodePoolCreationSucceeded" diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index 3d4ef5da3..7ed83a719 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -356,10 +356,40 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node } } + var networkConfig *containerv1beta1.NodeNetworkConfig + var additionalNodeNetworks []*containerv1beta1.AdditionalNodeNetworkConfig + // additional-node-networks: "vpc1:subnet1, vpc2:subnet2" + for _, pair := range strings.Split(getAnnotation(p, AnnotationAdditionalNodeNetworks), ",") { + pair = strings.TrimSpace(pair) + if pair == "" { + continue + } + + netAndSubnet := strings.SplitN(pair, ":", 2) + if len(netAndSubnet) != 2 { + return nil, fmt.Errorf("invalid additional network annotation: %v", pair) + } + + additionalNodeNetworks = append(additionalNodeNetworks, &containerv1beta1.AdditionalNodeNetworkConfig{ + Network: strings.TrimSpace(netAndSubnet[0]), + Subnetwork: strings.TrimSpace(netAndSubnet[1]), + }) + } + if len(additionalNodeNetworks) > 0 { + networkConfig = &containerv1beta1.NodeNetworkConfig{ + AdditionalNodeNetworkConfigs: additionalNodeNetworks, + } + } + + nodeServiceAccount := g.ClusterContext.NodeServiceAccount + if sa, ok := p.Annotations[AnnotationNodeServiceAccount]; ok { + nodeServiceAccount = sa + } + return &containerv1beta1.NodePool{ Name: name, Config: &containerv1beta1.NodeConfig{ - ServiceAccount: g.ClusterContext.NodeServiceAccount, + ServiceAccount: nodeServiceAccount, ShieldedInstanceConfig: &containerv1beta1.ShieldedInstanceConfig{ EnableIntegrityMonitoring: true, EnableSecureBoot: g.ClusterContext.NodeSecureBoot, @@ -388,6 +418,7 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node MaxSurge: 1, }, MaxPodsConstraint: &containerv1beta1.MaxPodsConstraint{MaxPodsPerNode: maxPodsPerNode}, + NetworkConfig: networkConfig, }, nil } From 662cc2be80440f110a7beaad91ec89c39367784f Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Thu, 5 Dec 2024 13:28:50 -0500 Subject: [PATCH 4/6] Add cross-project reservations and tests --- tpu-provisioner/internal/cloud/gke.go | 9 ++- tpu-provisioner/internal/cloud/gke_test.go | 72 ++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index 7ed83a719..e135c4111 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -324,11 +324,18 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node if !g.ClusterContext.ForceOnDemand { if resName, ok := p.Spec.NodeSelector["cloud.google.com/reservation-name"]; ok { + var resVal string + resProj, ok := p.Spec.NodeSelector["cloud.google.com/reservation-project"] + if ok { + resVal = fmt.Sprintf("projects/%s/reservations/%s", resProj, resName) + } else { + resVal = resName + } reservation = &containerv1beta1.ReservationAffinity{ ConsumeReservationType: "SPECIFIC_RESERVATION", Key: "compute.googleapis.com/reservation-name", Values: []string{ - resName, + resVal, }, } } diff --git a/tpu-provisioner/internal/cloud/gke_test.go b/tpu-provisioner/internal/cloud/gke_test.go index fa68da354..ac28f1c48 100644 --- a/tpu-provisioner/internal/cloud/gke_test.go +++ b/tpu-provisioner/internal/cloud/gke_test.go @@ -351,6 +351,39 @@ func TestNodePoolForPod(t *testing.T) { UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, }, }, + { + desc: "pod with cross-project reservation selector", + selector: map[string]string{ + "cloud.google.com/reservation-name": "tpu-rsv", + "cloud.google.com/reservation-project": "tpu-rsv-project", + }, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ReservationAffinity: &container.ReservationAffinity{ + ConsumeReservationType: "SPECIFIC_RESERVATION", + Key: "compute.googleapis.com/reservation-name", + Values: []string{"projects/tpu-rsv-project/reservations/tpu-rsv"}, + }, + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + }, + }, { desc: "pod with reservation selector but on demand is forced", selector: map[string]string{"cloud.google.com/reservation-name": "tpu-rsv"}, @@ -525,6 +558,45 @@ func TestNodePoolForPod(t *testing.T) { UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, }, }, + { + desc: "pod requesting additional node networks", + additionalAnnotations: map[string]string{ + "tpu-provisioner.cloud.google.com/additional-node-networks": "network-1:subnet-1, network-2:subnet-2", + }, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + NetworkConfig: &container.NodeNetworkConfig{ + AdditionalNodeNetworkConfigs: []*container.AdditionalNodeNetworkConfig{ + { + Network: "network-1", + Subnetwork: "subnet-1", + }, + { + Network: "network-2", + Subnetwork: "subnet-2", + }, + }, + }, + }, + }, } for _, tc := range tests { t.Run(tc.desc, func(t *testing.T) { From 061c02e8733b49c04e433018d89284654166c0c6 Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Thu, 5 Dec 2024 14:03:58 -0500 Subject: [PATCH 5/6] Bump go version in Dockerfile --- tpu-provisioner/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpu-provisioner/Dockerfile b/tpu-provisioner/Dockerfile index f545d512c..f3b817b21 100644 --- a/tpu-provisioner/Dockerfile +++ b/tpu-provisioner/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM golang:1.22 as builder +FROM golang:1.23 as builder ARG TARGETOS ARG TARGETARCH From c9652d8c497bd191f0f1c7c26b5c83fe488985bb Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Fri, 6 Dec 2024 16:36:16 -0500 Subject: [PATCH 6/6] Allow additional network to be defined via env var --- tpu-provisioner/cmd/main.go | 30 ++++++------- tpu-provisioner/internal/cloud/gke.go | 6 ++- tpu-provisioner/internal/cloud/gke_context.go | 15 +++---- tpu-provisioner/internal/cloud/gke_test.go | 42 +++++++++++++++++++ 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/tpu-provisioner/cmd/main.go b/tpu-provisioner/cmd/main.go index 82df341ee..0767db090 100644 --- a/tpu-provisioner/cmd/main.go +++ b/tpu-provisioner/cmd/main.go @@ -79,10 +79,11 @@ func main() { GCPCluster string `envconfig:"GCP_CLUSTER"` GCPNodeServiceAccount string `envconfig:"GCP_NODE_SERVICE_ACCOUNT"` - GCPNodeTags []string `envconfig:"GCP_NODE_TAGS"` - GCPPodToNodeLabels []string `envconfig:"GCP_POD_TO_NODE_LABELS"` - GCPNodeSecondaryDisk string `envconfig:"GCP_NODE_SECONDARY_DISK" default:""` - GCPNodeSecureBoot bool `envconfig:"GCP_NODE_SECURE_BOOT" default:"true"` + GCPNodeTags []string `envconfig:"GCP_NODE_TAGS"` + GCPPodToNodeLabels []string `envconfig:"GCP_POD_TO_NODE_LABELS"` + GCPNodeSecondaryDisk string `envconfig:"GCP_NODE_SECONDARY_DISK" default:""` + GCPNodeSecureBoot bool `envconfig:"GCP_NODE_SECURE_BOOT" default:"true"` + GCPNodeAdditionalNetworks string `envconfig:"GCP_NODE_ADDITIONAL_NETWORKS" default:""` // GCPForceOnDemand forces the controller to create nodes on demand, even if // the Pod requests a reservation or spot. @@ -201,16 +202,17 @@ func main() { provider = &cloud.GKE{ Service: containers, ClusterContext: cloud.GKEContext{ - ProjectID: cfg.GCPProjectID, - ClusterLocation: cfg.GCPClusterLocation, - Cluster: cfg.GCPCluster, - NodeZone: cfg.GCPZone, - NodeServiceAccount: cfg.GCPNodeServiceAccount, - NodeSecondaryDisk: cfg.GCPNodeSecondaryDisk, - NodeTags: cfg.GCPNodeTags, - PodToNodeLabels: cfg.GCPPodToNodeLabels, - NodeSecureBoot: cfg.GCPNodeSecureBoot, - ForceOnDemand: cfg.GCPForceOnDemand, + ProjectID: cfg.GCPProjectID, + ClusterLocation: cfg.GCPClusterLocation, + Cluster: cfg.GCPCluster, + NodeZone: cfg.GCPZone, + NodeServiceAccount: cfg.GCPNodeServiceAccount, + NodeAdditionalNetworks: cfg.GCPNodeAdditionalNetworks, + NodeSecondaryDisk: cfg.GCPNodeSecondaryDisk, + NodeTags: cfg.GCPNodeTags, + PodToNodeLabels: cfg.GCPPodToNodeLabels, + NodeSecureBoot: cfg.GCPNodeSecureBoot, + ForceOnDemand: cfg.GCPForceOnDemand, }, Recorder: mgr.GetEventRecorderFor("tpu-provisioner"), } diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index e135c4111..7ddf9eeec 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -366,7 +366,11 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node var networkConfig *containerv1beta1.NodeNetworkConfig var additionalNodeNetworks []*containerv1beta1.AdditionalNodeNetworkConfig // additional-node-networks: "vpc1:subnet1, vpc2:subnet2" - for _, pair := range strings.Split(getAnnotation(p, AnnotationAdditionalNodeNetworks), ",") { + additionalNodeNetworksCSV := g.ClusterContext.NodeAdditionalNetworks + if getAnnotation(p, AnnotationAdditionalNodeNetworks) != "" { + additionalNodeNetworksCSV = getAnnotation(p, AnnotationAdditionalNodeNetworks) + } + for _, pair := range strings.Split(additionalNodeNetworksCSV, ",") { pair = strings.TrimSpace(pair) if pair == "" { continue diff --git a/tpu-provisioner/internal/cloud/gke_context.go b/tpu-provisioner/internal/cloud/gke_context.go index 85c675895..21c3d57ed 100644 --- a/tpu-provisioner/internal/cloud/gke_context.go +++ b/tpu-provisioner/internal/cloud/gke_context.go @@ -3,13 +3,14 @@ package cloud import "fmt" type GKEContext struct { - ProjectID string - ClusterLocation string - Cluster string - NodeZone string - NodeServiceAccount string - NodeSecondaryDisk string - NodeTags []string + ProjectID string + ClusterLocation string + Cluster string + NodeZone string + NodeServiceAccount string + NodeAdditionalNetworks string + NodeSecondaryDisk string + NodeTags []string // PodToNodeLabels is a list of key=value pairs that will be copied from the Pod // to the Node. PodToNodeLabels []string diff --git a/tpu-provisioner/internal/cloud/gke_test.go b/tpu-provisioner/internal/cloud/gke_test.go index ac28f1c48..37e5dabe9 100644 --- a/tpu-provisioner/internal/cloud/gke_test.go +++ b/tpu-provisioner/internal/cloud/gke_test.go @@ -558,8 +558,50 @@ func TestNodePoolForPod(t *testing.T) { UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, }, }, + { + desc: "additional node networks configured in cluster context", + gkeContext: GKEContext{ + NodeAdditionalNetworks: "network-1:subnet-1, network-2:subnet-2", + }, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + NetworkConfig: &container.NodeNetworkConfig{ + AdditionalNodeNetworkConfigs: []*container.AdditionalNodeNetworkConfig{ + { + Network: "network-1", + Subnetwork: "subnet-1", + }, + { + Network: "network-2", + Subnetwork: "subnet-2", + }, + }, + }, + }, + }, { desc: "pod requesting additional node networks", + gkeContext: GKEContext{ + NodeAdditionalNetworks: "should-be-overriden-1:should-be-overriden-2", + }, additionalAnnotations: map[string]string{ "tpu-provisioner.cloud.google.com/additional-node-networks": "network-1:subnet-1, network-2:subnet-2", },