Skip to content

Commit

Permalink
feat: thanos querier to thanos sidecar mTLS
Browse files Browse the repository at this point in the history
  • Loading branch information
vyzigold committed Oct 7, 2024
1 parent c9424bd commit dd9426c
Show file tree
Hide file tree
Showing 9 changed files with 496 additions and 2 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.6.0
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.59.1
github.com/rhobs/obo-prometheus-operator v0.77.1-rhobs1
Expand Down Expand Up @@ -106,6 +107,7 @@ require (
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
k8s.io/apiserver v0.31.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999 h1:+S998xHiJApsJZjRAO8wyedU9GfqFd8mtwWly6LqHDo=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 h1:dKtHGYiOwl0DKZEWBW4MFWFS6IYW02AVD1WSuUAVwEo=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
Expand Down Expand Up @@ -357,6 +359,8 @@ k8s.io/apiextensions-apiserver v0.31.1 h1:L+hwULvXx+nvTYX/MKM3kKMZyei+UiSXQWciX/
k8s.io/apiextensions-apiserver v0.31.1/go.mod h1:tWMPR3sgW+jsl2xm9v7lAyRF1rYEK71i9G5dRtkknoQ=
k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U=
k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
k8s.io/apiserver v0.31.1 h1:Sars5ejQDCRBY5f7R3QFHdqN3s61nhkpaX8/k1iEw1c=
k8s.io/apiserver v0.31.1/go.mod h1:lzDhpeToamVZJmmFlaLwdYZwd7zB+WYRYIboqA1kGxM=
k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0=
k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg=
k8s.io/component-base v0.31.1 h1:UpOepcrX3rQ3ab5NB6g5iP0tvsgJWzxTyAo20sgYSy8=
Expand Down
147 changes: 147 additions & 0 deletions pkg/assets/certificate_generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package assets

import (
"crypto/rand"
"crypto/x509"
"fmt"
"math/big"
"time"

"github.com/go-logr/logr"
"github.com/openshift/library-go/pkg/crypto"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/authentication/user"
)

const certificateLifetime = time.Duration(crypto.DefaultCertificateLifetimeInDays) * 24 * time.Hour
const GRPCSecretName = "thanos-grpc-secret"

// Taken from
// https://github.com/openshift/library-go/blob/08c2fd1b452520da35ad210930ea9d100545589a/pkg/operator/certrotation/signer.go#L68-L86
// without refresh time handling. We just take care of rotation if we reach 1/5 of the validity timespan before expiration.
func needsNewCert(notBefore, notAfter time.Time, now func() time.Time) bool {
maxWait := notAfter.Sub(notBefore) / 5
latestTime := notAfter.Add(-maxWait)
return now().After(latestTime)
}

// Taken from
// https://github.com/openshift/cluster-monitoring-operator/blob/765d0b0369b176a5997d787b6710783437172879/pkg/manifests/tls.go#L113
func RotateGRPCSecret(s *v1.Secret, logger logr.Logger) (bool, error) {
var (
curCA, newCA *crypto.CA
curCABytes, crtPresent = s.Data["ca.crt"]
curCAKeyBytes, keyPresent = s.Data["ca.key"]
rotate = !crtPresent || !keyPresent
)

if crtPresent && keyPresent {
var err error
curCA, err = crypto.GetCAFromBytes(curCABytes, curCAKeyBytes)
if err != nil {
logger.Info(fmt.Sprintf("generating a new CA due to error reading CA: %v", err))
rotate = true
} else if needsNewCert(curCA.Config.Certs[0].NotBefore, curCA.Config.Certs[0].NotAfter, time.Now) {
logger.Info("generating new CA, because the current one is older than 1/5 of it validity timestamp")
rotate = true
}
}

if !rotate {
return rotate, nil
}

if curCA == nil {
newCAConfig, err := crypto.MakeSelfSignedCAConfig(
fmt.Sprintf("%s@%d", "openshift-cluster-monitoring", time.Now().Unix()),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error generating self signed CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: newCAConfig,
}
} else {
template := curCA.Config.Certs[0]
now := time.Now()
template.NotBefore = now.Add(-1 * time.Second)
template.NotAfter = now.Add(certificateLifetime)
template.SerialNumber = template.SerialNumber.Add(template.SerialNumber, big.NewInt(1))

newCACert, err := createCertificate(template, template, template.PublicKey, curCA.Config.Key)
if err != nil {
return rotate, fmt.Errorf("error rotating CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: &crypto.TLSCertificateConfig{
Certs: []*x509.Certificate{newCACert},
Key: curCA.Config.Key,
},
}
}

newCABytes, newCAKeyBytes, err := newCA.Config.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes from CA: %w", err)
}

s.Data["ca.crt"] = newCABytes
s.Data["ca.key"] = newCAKeyBytes

{
cfg, err := newCA.MakeClientCertificateForDuration(
&user.DefaultInfo{
Name: "thanos-querier",
},
time.Duration(crypto.DefaultCertificateLifetimeInDays)*24*time.Hour,
)
if err != nil {
return rotate, fmt.Errorf("error making client certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for thanos querier client certificate: %w", err)
}
s.Data["thanos-querier-client.crt"] = crt
s.Data["thanos-querier-client.key"] = key
}

{
cfg, err := newCA.MakeServerCert(
sets.NewString("prometheus-grpc"),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error making server certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for prometheus-k8s server certificate: %w", err)
}
s.Data["prometheus-server.crt"] = crt
s.Data["prometheus-server.key"] = key
}

return rotate, nil
}

// createCertificate creates a new certificate and returns it in x509.Certificate form.
func createCertificate(template, parent *x509.Certificate, pub, priv interface{}) (*x509.Certificate, error) {
rawCert, err := x509.CreateCertificate(rand.Reader, template, parent, pub, priv)
if err != nil {
return nil, fmt.Errorf("error creating certificate: %w", err)
}
parsedCerts, err := x509.ParseCertificates(rawCert)
if err != nil {
return nil, fmt.Errorf("error parsing certificate: %w", err)
}
return parsedCerts[0], nil
}
35 changes: 34 additions & 1 deletion pkg/controllers/monitoring/monitoring-stack/components.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package monitoringstack

import (
"fmt"
"reflect"

monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1"
Expand All @@ -12,6 +13,7 @@ import (
"k8s.io/utils/ptr"

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
"github.com/rhobs/observability-operator/pkg/reconciler"
)

Expand All @@ -28,6 +30,7 @@ func stackComponentReconcilers(
thanos ThanosConfiguration,
prometheus PrometheusConfiguration,
alertmanager AlertmanagerConfiguration,
tlsHashes map[string]string,
) []reconciler.Reconciler {
prometheusName := ms.Name + "-prometheus"
alertmanagerName := ms.Name + "-alertmanager"
Expand All @@ -44,7 +47,7 @@ func stackComponentReconcilers(
reconciler.NewUpdater(newPrometheus(ms, prometheusName,
additionalScrapeConfigsSecretName,
instanceSelectorKey, instanceSelectorValue,
thanos, prometheus), ms),
thanos, prometheus, tlsHashes), ms),
reconciler.NewUpdater(newPrometheusService(ms, instanceSelectorKey, instanceSelectorValue), ms),
reconciler.NewUpdater(newThanosSidecarService(ms, instanceSelectorKey, instanceSelectorValue), ms),
reconciler.NewOptionalUpdater(newPrometheusPDB(ms, instanceSelectorKey, instanceSelectorValue), ms,
Expand Down Expand Up @@ -115,6 +118,7 @@ func newPrometheus(
instanceSelectorValue string,
thanosCfg ThanosConfiguration,
prometheusCfg PrometheusConfiguration,
tlsHashes map[string]string,
) *monv1.Prometheus {
prometheusSelector := ms.Spec.ResourceSelector

Expand Down Expand Up @@ -193,12 +197,33 @@ func newPrometheus(
}
return []monv1.EnableFeature{}
}(),
Volumes: []corev1.Volume{
{
Name: "thanos-tls-assets",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: assets.GRPCSecretName,
},
},
},
},
},
Retention: ms.Spec.Retention,
RuleSelector: prometheusSelector,
RuleNamespaceSelector: ms.Spec.NamespaceSelector,
Thanos: &monv1.ThanosSpec{
Image: ptr.To(thanosCfg.Image),
GRPCServerTLSConfig: &monv1.TLSConfig{
CAFile: "/etc/thanos/tls-assets/ca.crt",
CertFile: "/etc/thanos/tls-assets/prometheus-server.crt",
KeyFile: "/etc/thanos/tls-assets/prometheus-server.key",
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "thanos-tls-assets",
MountPath: "/etc/thanos/tls-assets",
},
},
},
},
}
Expand Down Expand Up @@ -230,6 +255,14 @@ func newPrometheus(
prometheus.Spec.Secrets = append(prometheus.Spec.Secrets, tlsConfig.CertificateAuthority.Name)
}

if len(tlsHashes) > 0 {
tlsAnnotations := map[string]string{}
for name, hash := range tlsHashes {
tlsAnnotations[fmt.Sprintf("monitoring.openshift.io/%s-hash", name)] = hash
}
prometheus.Spec.CommonPrometheusFields.PodMetadata.Annotations = tlsAnnotations
}

if prometheusCfg.Image != "" {
prometheus.Spec.CommonPrometheusFields.Image = ptr.To(prometheusCfg.Image)
}
Expand Down
52 changes: 52 additions & 0 deletions pkg/controllers/monitoring/monitoring-stack/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,18 @@ import (
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/predicate"

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
"github.com/rhobs/observability-operator/pkg/controllers/monitoring/utils"
)

type resourceManager struct {
Expand Down Expand Up @@ -133,6 +137,42 @@ func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := rm.logger.WithValues("stack", req.NamespacedName)
logger.Info("Reconciling monitoring stack")

gRPCSecret := v1.Secret{
TypeMeta: metav1.TypeMeta{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: "Secret",
},
ObjectMeta: metav1.ObjectMeta{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
Data: map[string][]byte{},
}
err := rm.k8sClient.Get(ctx,
types.NamespacedName{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
&gRPCSecret)
if client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
}

rotate, err := assets.RotateGRPCSecret(&gRPCSecret, logger)
if err != nil {
return ctrl.Result{}, err
}
if rotate {
err = rm.k8sClient.Update(ctx, &gRPCSecret)
if errors.IsNotFound(err) {
err = rm.k8sClient.Create(ctx, &gRPCSecret)
}
if err != nil {
return ctrl.Result{}, err
}
}

ms, err := rm.getStack(ctx, req)
if err != nil {
// retry since some error has occured
Expand All @@ -149,12 +189,24 @@ func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}

// querier <---> sidecar mTLS hashes
mTLSSecretKeys := []string{"prometheus-server.key", "prometheus-server.crt", "ca.crt"}
tlsHashes := map[string]string{}
for _, key := range mTLSSecretKeys {
hash, err := utils.HashOfTLSSecret(assets.GRPCSecretName, key, ms.Namespace, rm.k8sClient)
if err != nil {
return ctrl.Result{}, err
}
tlsHashes[fmt.Sprintf("%s-%s", assets.GRPCSecretName, key)] = hash
}

reconcilers := stackComponentReconcilers(ms,
rm.instanceSelectorKey,
rm.instanceSelectorValue,
rm.thanos,
rm.prometheus,
rm.alertmanager,
tlsHashes,
)
for _, reconciler := range reconcilers {
err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme)
Expand Down
Loading

0 comments on commit dd9426c

Please sign in to comment.