Skip to content

Commit

Permalink
feat: Thanos Querier to Thanos sidecar mTLS
Browse files Browse the repository at this point in the history
  • Loading branch information
vyzigold committed Jun 11, 2024
1 parent db0b62f commit 107cdc2
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 0 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
Expand Down Expand Up @@ -119,6 +120,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiserver v0.29.3 // indirect
k8s.io/klog/v2 v2.110.1 // indirect
k8s.io/kube-openapi v0.0.0-20231129212854-f0671cc7e66a // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrB
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999 h1:+S998xHiJApsJZjRAO8wyedU9GfqFd8mtwWly6LqHDo=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 h1:dKtHGYiOwl0DKZEWBW4MFWFS6IYW02AVD1WSuUAVwEo=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/ovh/go-ovh v1.4.3 h1:Gs3V823zwTFpzgGLZNI6ILS4rmxZgJwJCz54Er9LwD0=
Expand Down Expand Up @@ -864,6 +866,8 @@ k8s.io/apiextensions-apiserver v0.29.3 h1:9HF+EtZaVpFjStakF4yVufnXGPRppWFEQ87qnO
k8s.io/apiextensions-apiserver v0.29.3/go.mod h1:po0XiY5scnpJfFizNGo6puNU6Fq6D70UJY2Cb2KwAVc=
k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU=
k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU=
k8s.io/apiserver v0.29.3 h1:xR7ELlJ/BZSr2n4CnD3lfA4gzFivh0wwfNfz9L0WZcE=
k8s.io/apiserver v0.29.3/go.mod h1:hrvXlwfRulbMbBgmWRQlFru2b/JySDpmzvQwwk4GUOs=
k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg=
k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0=
k8s.io/component-base v0.29.3 h1:Oq9/nddUxlnrCuuR2K/jp6aflVvc0uDvxMzAWxnGzAo=
Expand Down
148 changes: 148 additions & 0 deletions pkg/assets/certificate_generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package assets

import (
"crypto/rand"
"crypto/x509"
"fmt"
"math/big"
"time"

"github.com/go-logr/logr"

"github.com/openshift/library-go/pkg/crypto"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/authentication/user"
)

const certificateLifetime = time.Duration(crypto.DefaultCertificateLifetimeInDays) * 24 * time.Hour
const GRPCSecretName = "thanos-grpc-secret"

// Taken from
// https://github.com/openshift/library-go/blob/08c2fd1b452520da35ad210930ea9d100545589a/pkg/operator/certrotation/signer.go#L68-L86
// without refresh time handling. We just take care of rotation if we reach 1/5 of the validity timespan before expiration.
func needsNewCert(notBefore, notAfter time.Time, now func() time.Time) bool {
maxWait := notAfter.Sub(notBefore) / 5
latestTime := notAfter.Add(-maxWait)
return now().After(latestTime)
}

// Taken from
// https://github.com/openshift/cluster-monitoring-operator/blob/765d0b0369b176a5997d787b6710783437172879/pkg/manifests/tls.go#L113
func RotateGRPCSecret(s *v1.Secret, logger logr.Logger) (bool, error) {
var (
curCA, newCA *crypto.CA
curCABytes, crtPresent = s.Data["ca.crt"]
curCAKeyBytes, keyPresent = s.Data["ca.key"]
rotate = !crtPresent || !keyPresent
)

if crtPresent && keyPresent {
var err error
curCA, err = crypto.GetCAFromBytes(curCABytes, curCAKeyBytes)
if err != nil {
logger.Info(fmt.Sprintf("generating a new CA due to error reading CA: %v", err))
rotate = true
} else if needsNewCert(curCA.Config.Certs[0].NotBefore, curCA.Config.Certs[0].NotAfter, time.Now) {
logger.Info("generating new CA, because the current one is older than 1/5 of it validity timestamp")
rotate = true
}
}

if !rotate {
return rotate, nil
}

if curCA == nil {
newCAConfig, err := crypto.MakeSelfSignedCAConfig(
fmt.Sprintf("%s@%d", "openshift-cluster-monitoring", time.Now().Unix()),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error generating self signed CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: newCAConfig,
}
} else {
template := curCA.Config.Certs[0]
now := time.Now()
template.NotBefore = now.Add(-1 * time.Second)
template.NotAfter = now.Add(certificateLifetime)
template.SerialNumber = template.SerialNumber.Add(template.SerialNumber, big.NewInt(1))

newCACert, err := createCertificate(template, template, template.PublicKey, curCA.Config.Key)
if err != nil {
return rotate, fmt.Errorf("error rotating CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: &crypto.TLSCertificateConfig{
Certs: []*x509.Certificate{newCACert},
Key: curCA.Config.Key,
},
}
}

newCABytes, newCAKeyBytes, err := newCA.Config.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes from CA: %w", err)
}

s.Data["ca.crt"] = newCABytes
s.Data["ca.key"] = newCAKeyBytes

{
cfg, err := newCA.MakeClientCertificateForDuration(
&user.DefaultInfo{
Name: "thanos-querier",
},
time.Duration(crypto.DefaultCertificateLifetimeInDays)*24*time.Hour,
)
if err != nil {
return rotate, fmt.Errorf("error making client certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for thanos querier client certificate: %w", err)
}
s.Data["thanos-querier-client.crt"] = crt
s.Data["thanos-querier-client.key"] = key
}

{
cfg, err := newCA.MakeServerCert(
sets.NewString("prometheus-grpc"),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error making server certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for prometheus-k8s server certificate: %w", err)
}
s.Data["prometheus-server.crt"] = crt
s.Data["prometheus-server.key"] = key
}

return rotate, nil
}

// createCertificate creates a new certificate and returns it in x509.Certificate form.
func createCertificate(template, parent *x509.Certificate, pub, priv interface{}) (*x509.Certificate, error) {
rawCert, err := x509.CreateCertificate(rand.Reader, template, parent, pub, priv)
if err != nil {
return nil, fmt.Errorf("error creating certificate: %w", err)
}
parsedCerts, err := x509.ParseCertificates(rawCert)
if err != nil {
return nil, fmt.Errorf("error parsing certificate: %w", err)
}
return parsedCerts[0], nil
}
22 changes: 22 additions & 0 deletions pkg/controllers/monitoring/monitoring-stack/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/reconciler"
"github.com/rhobs/observability-operator/pkg/assets"
)

const AdditionalScrapeConfigsSelfScrapeKey = "self-scrape-config"
Expand Down Expand Up @@ -189,12 +190,33 @@ func newPrometheus(
}
return []string{}
}(),
Volumes: []corev1.Volume{
{
Name: "thanos-tls-assets",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: assets.GRPCSecretName,
},
},
},
},
},
Retention: ms.Spec.Retention,
RuleSelector: prometheusSelector,
RuleNamespaceSelector: ms.Spec.NamespaceSelector,
Thanos: &monv1.ThanosSpec{
Image: ptr.To(thanosCfg.Image),
GRPCServerTLSConfig: &monv1.TLSConfig{
CAFile: "/etc/thanos/tls-assets/ca.crt",
CertFile: "/etc/thanos/tls-assets/prometheus-server.crt",
KeyFile: "/etc/thanos/tls-assets/prometheus-server.key",
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "thanos-tls-assets",
MountPath: "/etc/thanos/tls-assets",
},
},
},
},
}
Expand Down
39 changes: 39 additions & 0 deletions pkg/controllers/monitoring/monitoring-stack/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ import (
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/predicate"

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
)

type resourceManager struct {
Expand Down Expand Up @@ -133,6 +136,42 @@ func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := rm.logger.WithValues("stack", req.NamespacedName)
logger.Info("Reconciling monitoring stack")

gRPCSecret := v1.Secret{
TypeMeta: metav1.TypeMeta{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: "Secret",
},
ObjectMeta: metav1.ObjectMeta{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
Data: map[string][]byte{},
}
err := rm.k8sClient.Get(ctx,
types.NamespacedName{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
&gRPCSecret)
if client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
}

rotate, err := assets.RotateGRPCSecret(&gRPCSecret, logger)
if err != nil {
return ctrl.Result{}, err
}
if rotate {
err = rm.k8sClient.Update(ctx, &gRPCSecret)
if errors.IsNotFound(err) {
err = rm.k8sClient.Create(ctx, &gRPCSecret)
}
if err != nil {
return ctrl.Result{}, err
}
}

ms, err := rm.getStack(ctx, req)
if err != nil {
// retry since some error has occured
Expand Down
22 changes: 22 additions & 0 deletions pkg/controllers/monitoring/thanos-querier/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"k8s.io/utils/ptr"

msoapi "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
"github.com/rhobs/observability-operator/pkg/reconciler"
)

Expand All @@ -29,6 +30,11 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
"--log.format=logfmt",
"--query.replica-label=prometheus_replica",
"--query.auto-downsampling",
"--grpc-client-tls-secure",
"--grpc-client-server-name=prometheus-grpc",
"--grpc-client-tls-ca=/etc/thanos/tls-sidecar-assets/ca.crt",
"--grpc-client-tls-key=/etc/thanos/tls-sidecar-assets/thanos-querier-client.key",
"--grpc-client-tls-cert=/etc/thanos/tls-sidecar-assets/thanos-querier-client.crt",
}
for _, endpoint := range sidecarUrls {
args = append(args, fmt.Sprintf("--endpoint=%s", endpoint))
Expand Down Expand Up @@ -86,6 +92,12 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "thanos-sidecar-tls-assets",
MountPath: "/etc/thanos/tls-sidecar-assets",
},
},
},
},
NodeSelector: map[string]string{
Expand All @@ -97,6 +109,16 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Volumes: []corev1.Volume{
{
Name: "thanos-sidecar-tls-assets",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: assets.GRPCSecretName,
},
},
},
},
},
},
ProgressDeadlineSeconds: ptr.To(int32(300)),
Expand Down

0 comments on commit 107cdc2

Please sign in to comment.