From f31a1764cfa19f588f852c229297fb1e8e3bf694 Mon Sep 17 00:00:00 2001 From: hugoShaka Date: Thu, 19 Dec 2024 18:15:56 -0500 Subject: [PATCH] kube-agent-update: Use the RFD-184 webapi proxy update protocol by default when possible --- .../zz_generated.teleport-kube-agent.mdx | 30 ++++-- .../templates/updater/deployment.yaml | 9 ++ .../tests/updater_deployment_test.yaml | 63 ++++++++++++ .../chart/teleport-kube-agent/values.yaml | 25 +++-- .../cmd/teleport-kube-agent-updater/main.go | 98 +++++++++++++++++-- 5 files changed, 202 insertions(+), 23 deletions(-) diff --git a/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx b/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx index f7b7542c5311f..5bf7fcd719132 100644 --- a/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx +++ b/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx @@ -683,8 +683,16 @@ $ kubectl create secret generic my-root-ca --from-file=ca.pem=/path/to/root-ca.p `updater` controls whether the Kube Agent Updater should be deployed alongside the `teleport-kube-agent`. The updater fetches the target version, validates the -image signature, and updates the teleport deployment. The `enterprise` value should -have been set to `true`. +image signature, and updates the teleport deployment. + +The updater can fetch the update information using two protocols: +- the webapi update protocol (in this case the Teleport Proxy Service is the one driving the version rollout) +- the version server protocol (this is an HTTP server serving static files specifying the version and if the update is critical). + +The webapi protocol takes precedence over the version server one if the Teleport Proxy Service supports it. +The version server protocol failover can be disabled by unsetting `updater.versionServer`. +The webapi protocol can be disabled by setting `updater.proxyAddr` to `""`. +For backward compatibility reasons, the webapi protocol is not enabled if a custom `updater.versionServer` is set. All Kubernetes-specific fields such as `tolerations`, `affinity`, `nodeSelector`, ... default to the agent values. However, they can be overridden from the @@ -744,9 +752,8 @@ concatenating [`versionServer`](#updaterversionserver) and [`releaseChannel` ](#updaterreleasechannel). This field supports gotemplate. -You must set this if the updater is enabled, and you are not a Teleport Cloud user. - -You must not change the default values if you are a Teleport Cloud user. +Setting this field makes the updater fetch the version using the version server protocol. +Setting this field to a custom value disables the webapi update protocol to ensure backward compatibility. ### `updater.releaseChannel` @@ -762,8 +769,17 @@ The complete version endpoint is built by concatenating You must not change the default value if you are a Teleport Cloud user unless instructed by Teleport support. -You can change this value if the updater is enabled, you are not a Teleport -Cloud user, and manage your own version server. +This value is used when the updater is fetching the version using the version server protocol. +It is also used as a failover when fetching the version using the webapi protocol if `updater.group` is unset. + +### `updater.group` + +| Type | Default | +|------|---------| +| `string` | `""` | + +`updater.group` is the update group used when fetching the version using the webapi protocol. +When unset, the group defaults to `update.releaseChannel`. ### `updater.image` diff --git a/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml b/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml index 0487aeab4dccd..5790f16cd8dfc 100644 --- a/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml +++ b/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml @@ -1,5 +1,6 @@ {{- if .Values.updater.enabled -}} {{- $updater := mustMergeOverwrite (mustDeepCopy .Values) .Values.updater -}} +{{- $versionServerOverride := and $updater.versionServer (ne $updater.versionServer "https://{{ .Values.proxyAddr }}/v1/webapi/automaticupgrades/channel") }} apiVersion: apps/v1 kind: Deployment metadata: @@ -62,8 +63,16 @@ spec: - "--agent-name={{ .Release.Name }}" - "--agent-namespace={{ .Release.Namespace }}" - "--base-image={{ include "teleport-kube-agent.baseImage" . }}" + {{- if $updater.versionServer}} - "--version-server={{ tpl $updater.versionServer . }}" - "--version-channel={{ $updater.releaseChannel }}" + {{- end }} + {{- /* We don't want to enable the RFD-184 update protocol if the user has set a custom versionServer as this + would be a breaking change when the teleport proxy starts override the explicitly set RFD-109 version server */ -}} + {{- if and $updater.proxyAddr (not $versionServerOverride)}} + - "--proxy-address={{ $updater.proxyAddr }}" + - "--update-group={{ default $updater.releaseChannel $updater.group }}" + {{- end }} {{- if $updater.pullCredentials }} - "--pull-credentials={{ $updater.pullCredentials }}" {{- end }} diff --git a/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml b/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml index 111039f0ac7ce..3e27f481b5a33 100644 --- a/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml +++ b/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml @@ -67,6 +67,69 @@ tests: - contains: path: spec.template.spec.containers[0].args content: "--version-server=https://proxy.teleport.example.com:443/v1/webapi/automaticupgrades/channel" + - it: defaults the updater proxy server to the proxy address + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--proxy-address=proxy.teleport.example.com:443" + - it: doesn't enable the RFD-184 proxy protocol if the versionServer is custom + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "version-server.example.com" + group: foobar + asserts: + - notContains: + path: spec.template.spec.containers[0].args + content: "--proxy-address=proxy.teleport.example.com:443" + - notContains: + path: spec.template.spec.containers[0].args + content: "--update-group=foobar" + - it: defaults the update group to the release channel when group is unset + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--update-group=stable/cloud" + - it: uses the update group when set + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + group: "foobar" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--update-group=foobar" + - it: unsets the version server when empty + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - notContains: + path: spec.template.spec.containers[0].args + content: "--proxy-server=" + - notContains: + path: spec.template.spec.containers[0].args + content: "--version-channel=stable/cloud" - it: sets the updater version server values: - ../.lint/updater.yaml diff --git a/examples/chart/teleport-kube-agent/values.yaml b/examples/chart/teleport-kube-agent/values.yaml index c51491783e11c..f29a65115b7ba 100644 --- a/examples/chart/teleport-kube-agent/values.yaml +++ b/examples/chart/teleport-kube-agent/values.yaml @@ -576,8 +576,16 @@ tls: # updater -- controls whether the Kube Agent Updater should be deployed alongside # the `teleport-kube-agent`. The updater fetches the target version, validates the -# image signature, and updates the teleport deployment. The `enterprise` value should -# have been set to `true`. +# image signature, and updates the teleport deployment. +# +# The updater can fetch the update information using two protocols: +# - the webapi update protocol (in this case the Teleport Proxy Service is the one driving the version rollout) +# - the version server protocol (this is an HTTP server serving static files specifying the version and if the update is critical). +# +# The webapi protocol takes precedence over the version server one if the Teleport Proxy Service supports it. +# The version server protocol failover can be disabled by unsetting `updater.versionServer`. +# The webapi protocol can be disabled by setting `updater.proxyAddr` to `""`. +# For backward compatibility reasons, the webapi protocol is not enabled if a custom `updater.versionServer` is set. # # All Kubernetes-specific fields such as `tolerations`, `affinity`, `nodeSelector`, # ... default to the agent values. However, they can be overridden from the @@ -626,9 +634,8 @@ updater: # ](#updaterreleasechannel). # This field supports gotemplate. # - # You must set this if the updater is enabled, and you are not a Teleport Cloud user. - # - # You must not change the default values if you are a Teleport Cloud user. + # Setting this field makes the updater fetch the version using the version server protocol. + # Setting this field to a custom value disables the webapi update protocol to ensure backward compatibility. versionServer: "https://{{ .Values.proxyAddr }}/v1/webapi/automaticupgrades/channel" # updater.releaseChannel(string) -- is the release channel the updater @@ -639,10 +646,14 @@ updater: # You must not change the default value if you are a Teleport Cloud user unless # instructed by Teleport support. # - # You can change this value if the updater is enabled, you are not a Teleport - # Cloud user, and manage your own version server. + # This value is used when the updater is fetching the version using the version server protocol. + # It is also used as a failover when fetching the version using the webapi protocol if `updater.group` is unset. releaseChannel: "stable/cloud" + # updater.group(string) -- is the update group used when fetching the version using the webapi protocol. + # When unset, the group defaults to `update.releaseChannel`. + group: "" + # updater.image(string) -- sets the container image used for Teleport updater # pods run when `updater.enabled` is true. # diff --git a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go index 29a0bf46b9173..8d27e32908e26 100644 --- a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go +++ b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go @@ -40,6 +40,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "github.com/gravitational/teleport/api/client/webclient" kubeversionupdater "github.com/gravitational/teleport/integrations/kube-agent-updater" "github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/controller" "github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/img" @@ -72,6 +73,8 @@ func main() { var insecureNoResolve bool var disableLeaderElection bool var credSource string + var proxyAddress string + var updateGroup string flag.StringVar(&agentName, "agent-name", "", "The name of the agent that should be updated. This is mandatory.") flag.StringVar(&agentNamespace, "agent-namespace", "", "The namespace of the agent that should be updated. This is mandatory.") @@ -81,6 +84,8 @@ func main() { flag.BoolVar(&insecureNoVerify, "insecure-no-verify-image", false, "Disable image signature verification. The image tag is still resolved and image must exist.") flag.BoolVar(&insecureNoResolve, "insecure-no-resolve-image", false, "Disable image signature verification AND resolution. The updater can update to non-existing images.") flag.BoolVar(&disableLeaderElection, "disable-leader-election", false, "Disable leader election, used when running the kube-agent-updater outside of Kubernetes.") + flag.StringVar(&proxyAddress, "proxy-address", "", "The proxy address of the teleport cluster. When set, the updater will try to get update via the /find proxy endpoint.") + flag.StringVar(&updateGroup, "update-group", "", "The agent update group, as defined in the `autoupdate_config` resource. When unset or set to an unknown value, agent will update with the default group.") flag.StringVar(&versionServer, "version-server", "https://updates.releases.teleport.dev/v1/", "URL of the HTTP server advertising target version and critical maintenances. Trailing slash is optional.") flag.StringVar(&versionChannel, "version-channel", "stable/cloud", "Version channel to get updates from.") flag.StringVar(&baseImageName, "base-image", "public.ecr.aws/gravitational/teleport", "Image reference containing registry and repository.") @@ -98,6 +103,7 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + // Validate configuration. if agentName == "" { ctrl.Log.Error(trace.BadParameter("--agent-name empty"), "agent-name must be provided") os.Exit(1) @@ -106,7 +112,16 @@ func main() { ctrl.Log.Error(trace.BadParameter("--agent-namespace empty"), "agent-namespace must be provided") os.Exit(1) } + if versionServer == "" && proxyAddress == "" { + ctrl.Log.Error( + trace.BadParameter("at least one of --proxy-address and --version-server must be provided"), + "the updater has no upstream configured, it cannot retrieve the version and check when to update", + ) + os.Exit(1) + } + // Build a new controller manager. We need to do this early as some trigger + // need a Kubernetes client and the manager is the one providing it. mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{BindAddress: metricsAddr}, @@ -130,16 +145,76 @@ func main() { os.Exit(1) } - versionServerURL, err := url.Parse(strings.TrimRight(versionServer, "/") + "/" + versionChannel) - if err != nil { - ctrl.Log.Error(err, "failed to parse version server URL, exiting") - os.Exit(1) + // Craft the version getter and update triggers based on the configuration (use RFD-109 APIs, RFD-184, or both). + var criticalUpdateTriggers []maintenance.Trigger + var plannedMaintenanceTriggers []maintenance.Trigger + var versionGetters []version.Getter + + // If the proxy server is specified, we enabled RFD-184 updates + // See https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#updater-apis + if proxyAddress != "" { + ctrl.Log.Info("fetching versions from the proxy /find endpoint", "proxy_server_url", proxyAddress) + + proxyClt, err := webclient.NewReusableClient(&webclient.Config{ + Context: ctx, + ProxyAddr: proxyAddress, + UpdateGroup: updateGroup, + }) + if err != nil { + ctrl.Log.Error(err, "failed to create proxy client, exiting") + os.Exit(1) + } + + // We do a preflight check before starting to know if the proxy is correctly configured and reachable. + ctrl.Log.Info("preflight check: ping the proxy server", "proxy_server_url", proxyAddress) + pong, err := proxyClt.Ping() + if err != nil { + ctrl.Log.Error(err, "failed to ping proxy, either the proxy address is wrong, or the network blocks connections to the proxy", + "proxy_address", proxyAddress, + ) + os.Exit(1) + } + ctrl.Log.Info("proxy server successfully pinged", + "proxy_server_url", proxyAddress, + "proxy_cluster_name", pong.ClusterName, + "proxy_version", pong.ServerVersion, + ) + + versionGetters = append(versionGetters, version.NewProxyVersionGetter("proxy update protocol", proxyClt)) + + // In RFD 184, the server is driving the update, so both regular maintenances and + // critical ones are fetched from the proxy. Using the same trigger ensures we hit the cache if both triggers + // are evaluated and don't actually make 2 calls. + proxyTrigger := maintenance.NewProxyMaintenanceTrigger("proxy update protocol", proxyClt) + criticalUpdateTriggers = append(criticalUpdateTriggers, proxyTrigger) + plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, proxyTrigger) } - versionGetter := version.NewBasicHTTPVersionGetter(versionServerURL) + + // If the version server is specified, we enable RFD-109 updates + // See https://github.com/gravitational/teleport/blob/master/rfd/0109-cloud-agent-upgrades.md#kubernetes-model + if versionServer != "" { + rawUrl := strings.TrimRight(versionServer, "/") + "/" + versionChannel + versionServerURL, err := url.Parse(rawUrl) + if err != nil { + ctrl.Log.Error(err, "failed to parse version server URL, exiting", "url", rawUrl) + os.Exit(1) + } + ctrl.Log.Info("fetching versions from the version server", "version_server_url", versionServerURL.String()) + + versionGetters = append(versionGetters, version.NewBasicHTTPVersionGetter(versionServerURL)) + // critical updates are advertised by the version channel + criticalUpdateTriggers = append(criticalUpdateTriggers, maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL)) + // planned maintenance windows are exported by the pods + plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient())) + } + maintenanceTriggers := maintenance.Triggers{ - maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL), + // We check if the update is critical. + maintenance.FailoverTrigger(criticalUpdateTriggers), + // We check if the agent in unhealthy. podmaintenance.NewUnhealthyWorkloadTrigger("unhealthy pods", mgr.GetClient()), - podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient()), + // We check if we're in a maintenance window. + maintenance.FailoverTrigger(plannedMaintenanceTriggers), } var imageValidators img.Validators @@ -169,7 +244,12 @@ func main() { os.Exit(1) } - versionUpdater := controller.NewVersionUpdater(versionGetter, imageValidators, maintenanceTriggers, baseImage) + versionUpdater := controller.NewVersionUpdater( + version.FailoverGetter(versionGetters), + imageValidators, + maintenanceTriggers, + baseImage, + ) // Controller registration deploymentController := controller.DeploymentVersionUpdater{ @@ -203,7 +283,7 @@ func main() { os.Exit(1) } - ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version, "url", versionServerURL.String()) + ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version) if err := mgr.Start(ctx); err != nil { ctrl.Log.Error(err, "failed to start manager, exiting")