diff --git a/.github/actions/login-azure/action.yml b/.github/actions/login-azure/action.yml index 28111b28..afe15058 100644 --- a/.github/actions/login-azure/action.yml +++ b/.github/actions/login-azure/action.yml @@ -53,6 +53,7 @@ runs: - name: prefetch tokens shell: bash run: | + az account get-access-token --output none az account get-access-token --scope https://storage.azure.com/.default --output none az account get-access-token --scope https://vault.azure.net/.default --output none az account get-access-token --resource https://ossrdbms-aad.database.windows.net --output none diff --git a/cli/internal/install/cloudinstall/cloudconfig.go b/cli/internal/install/cloudinstall/cloudconfig.go index 24507678..ff448a95 100644 --- a/cli/internal/install/cloudinstall/cloudconfig.go +++ b/cli/internal/install/cloudinstall/cloudconfig.go @@ -8,6 +8,8 @@ import ( "io" "strings" "text/template" + + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4" ) //go:embed config.tpl @@ -83,11 +85,13 @@ func (c *ComputeConfig) GetApiHostCluster() *ClusterConfig { } type ClusterConfig struct { - Name string `json:"name"` - ApiHost bool `json:"apiHost"` - Location string `json:"location"` - KubernetesVersion string `json:"kubernetesVersion,omitempty"` - UserNodePools []*NodePoolConfig `json:"userNodePools"` + Name string `json:"name"` + ApiHost bool `json:"apiHost"` + Location string `json:"location"` + Sku armcontainerservice.ManagedClusterSKUTier `json:"sku"` + KubernetesVersion string `json:"kubernetesVersion,omitempty"` + SystemNodePool *NodePoolConfig `json:"systemNodePool"` + UserNodePools []*NodePoolConfig `json:"userNodePools"` } type NodePoolConfig struct { diff --git a/cli/internal/install/cloudinstall/compute.go b/cli/internal/install/cloudinstall/compute.go index df0e649c..fc775da6 100644 --- a/cli/internal/install/cloudinstall/compute.go +++ b/cli/internal/install/cloudinstall/compute.go @@ -59,6 +59,24 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster } tags[TagKey] = &inst.Config.EnvironmentName + if clusterAlreadyExists { + if *existingCluster.Properties.KubernetesVersion != clusterConfig.KubernetesVersion { + existingCluster.Properties.KubernetesVersion = &clusterConfig.KubernetesVersion + log.Ctx(ctx).Info().Msgf("Updating Kubernetes version to %s", clusterConfig.KubernetesVersion) + resp, err := clustersClient.BeginCreateOrUpdate(ctx, inst.Config.Cloud.ResourceGroup, clusterConfig.Name, existingCluster.ManagedCluster, nil) + if err != nil { + return nil, fmt.Errorf("failed to update cluster: %w", err) + } + if _, err := resp.PollUntilDone(ctx, nil); err != nil { + return nil, fmt.Errorf("failed to update cluster: %w", err) + } + existingCluster, err = clustersClient.Get(ctx, inst.Config.Cloud.ResourceGroup, clusterConfig.Name, nil) + if err != nil { + return nil, fmt.Errorf("failed to get cluster: %w", err) + } + } + } + cluster := armcontainerservice.ManagedCluster{ Tags: tags, Location: Ptr(clusterConfig.Location), @@ -82,6 +100,10 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster }, }, }, + SKU: &armcontainerservice.ManagedClusterSKU{ + Name: Ptr(armcontainerservice.ManagedClusterSKUNameBase), + Tier: Ptr(armcontainerservice.ManagedClusterSKUTier(clusterConfig.Sku)), + }, } if workspace := inst.Config.Cloud.LogAnalyticsWorkspace; workspace != nil { @@ -104,34 +126,35 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster "logAnalyticsWorkspaceResourceID": resp.ID, }, } - } cluster.Properties.AgentPoolProfiles = []*armcontainerservice.ManagedClusterAgentPoolProfile{ { - Name: Ptr("system"), - Mode: Ptr(armcontainerservice.AgentPoolModeSystem), - VMSize: Ptr("Standard_DS2_v2"), - EnableAutoScaling: Ptr(true), - Count: Ptr(int32(1)), - MinCount: Ptr(int32(1)), - MaxCount: Ptr(int32(3)), - OSType: Ptr(armcontainerservice.OSTypeLinux), - OSSKU: Ptr(armcontainerservice.OSSKUAzureLinux), + Name: &clusterConfig.SystemNodePool.Name, + Mode: Ptr(armcontainerservice.AgentPoolModeSystem), + OrchestratorVersion: &clusterConfig.KubernetesVersion, + VMSize: &clusterConfig.SystemNodePool.VMSize, + EnableAutoScaling: Ptr(true), + Count: &clusterConfig.SystemNodePool.MinCount, + MinCount: &clusterConfig.SystemNodePool.MinCount, + MaxCount: &clusterConfig.SystemNodePool.MaxCount, + OSType: Ptr(armcontainerservice.OSTypeLinux), + OSSKU: Ptr(armcontainerservice.OSSKUAzureLinux), }, } for _, np := range clusterConfig.UserNodePools { profile := armcontainerservice.ManagedClusterAgentPoolProfile{ - Name: &np.Name, - Mode: Ptr(armcontainerservice.AgentPoolModeUser), - VMSize: &np.VMSize, - EnableAutoScaling: Ptr(true), - Count: &np.MinCount, - MinCount: &np.MinCount, - MaxCount: &np.MaxCount, - OSType: Ptr(armcontainerservice.OSTypeLinux), - OSSKU: Ptr(armcontainerservice.OSSKUAzureLinux), + Name: &np.Name, + Mode: Ptr(armcontainerservice.AgentPoolModeUser), + OrchestratorVersion: &clusterConfig.KubernetesVersion, + VMSize: &np.VMSize, + EnableAutoScaling: Ptr(true), + Count: &np.MinCount, + MinCount: &np.MinCount, + MaxCount: &np.MaxCount, + OSType: Ptr(armcontainerservice.OSTypeLinux), + OSSKU: Ptr(armcontainerservice.OSSKUAzureLinux), NodeLabels: map[string]*string{ "tyger": Ptr("run"), }, @@ -158,39 +181,13 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster if clusterAlreadyExists { // Check for node pools that need to be added or removed, which - // need to be handled separately other cluster property updates. + // need to be handled separately from other cluster property updates. agentPoolsClient, err := armcontainerservice.NewAgentPoolsClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil) if err != nil { return nil, fmt.Errorf("failed to create agent pools client: %w", err) } - agentPoolDeletePollers := make([]*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteResponse], 0) - - for _, existingNodePool := range existingCluster.ManagedCluster.Properties.AgentPoolProfiles { - found := false - for _, newPool := range cluster.Properties.AgentPoolProfiles { - if *newPool.Name == *existingNodePool.Name { - found = true - break - } - } - if !found { - log.Info().Msgf("Deleting node pool '%s' from cluster '%s'", *existingNodePool.Name, clusterConfig.Name) - p, err := agentPoolsClient.BeginDelete(ctx, inst.Config.Cloud.ResourceGroup, clusterConfig.Name, *existingNodePool.Name, nil) - if err != nil { - return nil, fmt.Errorf("failed to delete node pool: %w", err) - } - agentPoolDeletePollers = append(agentPoolDeletePollers, p) - } - } - - for _, deletePoller := range agentPoolDeletePollers { - if _, err := deletePoller.PollUntilDone(ctx, nil); err != nil { - return nil, fmt.Errorf("failed to delete node pool: %w", err) - } - } - agentPoolCreatePollers := make([]*runtime.Poller[armcontainerservice.AgentPoolsClientCreateOrUpdateResponse], 0) for _, newNodePool := range cluster.Properties.AgentPoolProfiles { @@ -198,6 +195,14 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster for _, existingNodePool := range existingCluster.ManagedCluster.Properties.AgentPoolProfiles { if *newNodePool.Name == *existingNodePool.Name { found = true + if *newNodePool.VMSize != *existingNodePool.VMSize { + return nil, fmt.Errorf("create a new node pool instead of changing the VM size of node pool '%s'", *newNodePool.Name) + } + + if *newNodePool.Mode != *existingNodePool.Mode { + return nil, fmt.Errorf("cannot change existing node pool '%s' from user to system (or vice-versa)", *newNodePool.Name) + } + break } } @@ -222,12 +227,38 @@ func (inst *Installer) createCluster(ctx context.Context, clusterConfig *Cluster } agentPoolCreatePollers = append(agentPoolCreatePollers, p) } + } - for _, p := range agentPoolCreatePollers { - if _, err := p.PollUntilDone(ctx, nil); err != nil { - return nil, fmt.Errorf("failed to create node pool: %w", err) + for _, p := range agentPoolCreatePollers { + if _, err := p.PollUntilDone(ctx, nil); err != nil { + return nil, fmt.Errorf("failed to create node pool: %w", err) + } + } + + agentPoolDeletePollers := make([]*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteResponse], 0) + + for _, existingNodePool := range existingCluster.ManagedCluster.Properties.AgentPoolProfiles { + found := false + for _, newPool := range cluster.Properties.AgentPoolProfiles { + if *newPool.Name == *existingNodePool.Name { + found = true + break } } + if !found { + log.Info().Msgf("Deleting node pool '%s' from cluster '%s'", *existingNodePool.Name, clusterConfig.Name) + p, err := agentPoolsClient.BeginDelete(ctx, inst.Config.Cloud.ResourceGroup, clusterConfig.Name, *existingNodePool.Name, nil) + if err != nil { + return nil, fmt.Errorf("failed to delete node pool: %w", err) + } + agentPoolDeletePollers = append(agentPoolDeletePollers, p) + } + } + + for _, deletePoller := range agentPoolDeletePollers { + if _, err := deletePoller.PollUntilDone(ctx, nil); err != nil { + return nil, fmt.Errorf("failed to delete node pool: %w", err) + } } if len(agentPoolDeletePollers) > 0 || len(agentPoolCreatePollers) > 0 { @@ -342,6 +373,10 @@ func clusterNeedsUpdating(cluster, existingCluster armcontainerservice.ManagedCl } } + if *cluster.SKU.Tier != *existingCluster.SKU.Tier { + return true, false + } + if len(cluster.Properties.AgentPoolProfiles) != len(existingCluster.Properties.AgentPoolProfiles) { return true, false } @@ -366,6 +401,9 @@ func clusterNeedsUpdating(cluster, existingCluster armcontainerservice.ManagedCl onlyScaleDown = false } } + if *np.OrchestratorVersion != *existingNp.OrchestratorVersion { + return true, false + } break } } diff --git a/cli/internal/install/cloudinstall/config.tpl b/cli/internal/install/cloudinstall/config.tpl index 5a6f1132..a257ef87 100644 --- a/cli/internal/install/cloudinstall/config.tpl +++ b/cli/internal/install/cloudinstall/config.tpl @@ -16,9 +16,15 @@ cloud: clusters: - name: {{ .EnvironmentName }} apiHost: true - kubernetesVersion: {{ .KubernetesVersion }} + kubernetesVersion: "{{ .KubernetesVersion }}" # location: Defaults to defaultLocation + systemNodePool: + name: system + vmSize: Standard_DS2_v2 + minCount: 1 + maxCount: 3 + userNodePools: - name: cpunp vmSize: Standard_DS12_v2 diff --git a/cli/internal/install/cloudinstall/helm.go b/cli/internal/install/cloudinstall/helm.go index 9df8c07d..97ebed34 100644 --- a/cli/internal/install/cloudinstall/helm.go +++ b/cli/internal/install/cloudinstall/helm.go @@ -54,6 +54,11 @@ func (inst *Installer) installTraefik(ctx context.Context, restConfigPromise *in ChartRef: "traefik/traefik", Version: "24.0.0", Values: map[string]any{ + "image": map[string]any{ + "registry": "mcr.microsoft.com", + "repository": "oss/traefik/traefik", + "tag": "v2.10.7", + }, "logs": map[string]any{ "general": map[string]any{ "format": "json", diff --git a/cli/internal/install/cloudinstall/validation.go b/cli/internal/install/cloudinstall/validation.go index 799b105a..84156b05 100644 --- a/cli/internal/install/cloudinstall/validation.go +++ b/cli/internal/install/cloudinstall/validation.go @@ -8,7 +8,10 @@ import ( "net" "net/url" "regexp" + "slices" + "strings" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/postgresql/armpostgresqlflexibleservers/v4" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage" "github.com/google/uuid" @@ -96,31 +99,30 @@ func quickValidateComputeConfig(success *bool, cloudConfig *CloudConfig) { cluster.KubernetesVersion = DefaultKubernetesVersion } + if cluster.Sku == "" { + cluster.Sku = armcontainerservice.ManagedClusterSKUTierStandard + } else { + possibleValues := armcontainerservice.PossibleManagedClusterSKUTierValues() + if !slices.Contains(possibleValues, cluster.Sku) { + formattedPossibleValues := make([]string, len(possibleValues)) + for i, v := range possibleValues { + formattedPossibleValues[i] = fmt.Sprintf("`%s`", v) + } + validationError(success, "The `sku` field of the cluster `%s` must be one of [%s]", cluster.Name, strings.Join(formattedPossibleValues, ", ")) + } + } + + if cluster.SystemNodePool == nil { + validationError(success, "The `systemNodePool` field is required on a cluster `%s`", cluster.Name) + } else { + quickValidateNodePoolConfig(success, cluster.SystemNodePool, 1) + } + if len(cluster.UserNodePools) == 0 { validationError(success, "At least one user node pool must be specified") } for _, np := range cluster.UserNodePools { - if np.Name == "" { - validationError(success, "The `name` field is required on a node pool") - } else if !ResourceNameRegex.MatchString(np.Name) { - validationError(success, "The node pool `name` field must match the pattern "+ResourceNameRegex.String()) - } - - if np.VMSize == "" { - validationError(success, "The `vmSize` field is required on a node pool") - } - - if np.MinCount < 0 { - validationError(success, "The `minCount` field must be greater than or equal to zero") - } - - if np.MaxCount < 0 { - validationError(success, "The `maxCount` field must be greater than or equal to zero") - } - - if np.MinCount > np.MaxCount { - validationError(success, "The `minCount` field must be less than or equal to the `maxCount` field") - } + quickValidateNodePoolConfig(success, np, 0) } if cluster.ApiHost { @@ -164,6 +166,30 @@ func quickValidateComputeConfig(success *bool, cloudConfig *CloudConfig) { } } +func quickValidateNodePoolConfig(success *bool, np *NodePoolConfig, minNodeCount int) { + if np.Name == "" { + validationError(success, "The `name` field is required on a node pool") + } else if !ResourceNameRegex.MatchString(np.Name) { + validationError(success, "The node pool `name` field must match the pattern "+ResourceNameRegex.String()) + } + + if np.VMSize == "" { + validationError(success, "The `vmSize` field is required on a node pool") + } + + if np.MinCount < int32(minNodeCount) { + validationError(success, "The `minCount` field must be greater than or equal to %d", minNodeCount) + } + + if np.MaxCount < 0 { + validationError(success, "The `maxCount` field must be greater than or equal to %d", minNodeCount) + } + + if np.MinCount > np.MaxCount { + validationError(success, "The `minCount` field must be less than or equal to the `maxCount` field") + } +} + func quickValidateStorageConfig(success *bool, cloudConfig *CloudConfig) { storageConfig := cloudConfig.Storage if storageConfig == nil { diff --git a/deploy/config/microsoft/cloudconfig.yml b/deploy/config/microsoft/cloudconfig.yml index 4b0ac6b4..9d9cf4aa 100644 --- a/deploy/config/microsoft/cloudconfig.yml +++ b/deploy/config/microsoft/cloudconfig.yml @@ -12,7 +12,13 @@ cloud: clusters: - name: ${TYGER_ENVIRONMENT_NAME} apiHost: true - kubernetesVersion: 1.27 + sku: Standard + kubernetesVersion: "1.28" + systemNodePool: + name: system + vmSize: Standard_DS2_v2 + minCount: 1 + maxCount: 3 userNodePools: - name: cpunp vmSize: Standard_DS12_v2