Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce validating admission webhook (experimental) #1133

Merged
merged 19 commits into from
Dec 6, 2019
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ BUILD_DATE_PATH := github.com/kudobuilder/kudo/pkg/version.buildDate
DATE_FMT := "%Y-%m-%dT%H:%M:%SZ"
BUILD_DATE := $(shell date -u -d "@$SOURCE_DATE_EPOCH" "+${DATE_FMT}" 2>/dev/null || date -u -r "${SOURCE_DATE_EPOCH}" "+${DATE_FMT}" 2>/dev/null || date -u "+${DATE_FMT}")
LDFLAGS := -X ${GIT_VERSION_PATH}=${GIT_VERSION} -X ${GIT_COMMIT_PATH}=${GIT_COMMIT} -X ${BUILD_DATE_PATH}=${BUILD_DATE}
ENABLE_WEBHOOKS ?= false
zen-dog marked this conversation as resolved.
Show resolved Hide resolved

export GO111MODULE=on

Expand Down Expand Up @@ -67,7 +68,9 @@ manager-clean:
.PHONY: run
# Run against the configured Kubernetes cluster in ~/.kube/config
run:
go run -ldflags "${LDFLAGS}" ./cmd/manager/main.go
# for local development, webhooks are disabled by default
# if you enable them, you have to take care of providing the TLS certs locally
ENABLE_WEBHOOKS=${ENABLE_WEBHOOKS} go run -ldflags "${LDFLAGS}" ./cmd/manager/main.go

.PHONY: deploy
# Install KUDO into a cluster via kubectl kudo init
Expand Down
80 changes: 79 additions & 1 deletion cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,21 @@ package main

import (
"fmt"
"net/http"
"net/url"
"os"
"strings"

"sigs.k8s.io/controller-runtime/pkg/client/apiutil"

"k8s.io/apimachinery/pkg/runtime/schema"

"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/go-logr/logr"
"github.com/kudobuilder/kudo/pkg/apis/kudo/v1beta1"
"github.com/kudobuilder/kudo/pkg/util/kudo"
"k8s.io/apimachinery/pkg/runtime"
kensipe marked this conversation as resolved.
Show resolved Hide resolved

"github.com/kudobuilder/kudo/pkg/apis"
"github.com/kudobuilder/kudo/pkg/controller/instance"
Expand All @@ -34,7 +48,7 @@ import (
)

func main() {
logf.SetLogger(zap.Logger(false))
logf.SetLogger(zap.New(zap.UseDevMode(false)))
log := logf.Log.WithName("entrypoint")

// Get version of KUDO
Expand All @@ -44,6 +58,7 @@ func main() {
log.Info("setting up manager")
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
MapperProvider: util.NewDynamicRESTMapper,
CertDir: "/tmp/cert",
})
if err != nil {
log.Error(err, "unable to start manager")
Expand Down Expand Up @@ -93,10 +108,73 @@ func main() {
os.Exit(1)
}

if os.Getenv("ENABLE_WEBHOOKS") == "true" {
err = registerValidatingWebhook(&v1beta1.Instance{}, mgr, log)
if err != nil {
log.Error(err, "unable to create webhook")
os.Exit(1)
}
}

// Start the Cmd
log.Info("Starting the Cmd.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why you changed CRD installation messages to past tense and after the event... and left all the manager manages as logs prior to the event?
also seems like something that should have been on separate pr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm which messages? I don't see any change on this line

if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
log.Error(err, "unable to run the manager")
os.Exit(1)
}
}

// this is a fork of a code in controller-runtime to be able to pass in our own Validator interface
// see kudo.Validator docs for more details\
//
// ideally in the future we should switch to just simply doing
// err = ctrl.NewWebhookManagedBy(mgr).
// For(&v1beta1.Instance{}).
// Complete()
//
// that internally calls this method but using their own internal Validator type
func registerValidatingWebhook(obj runtime.Object, mgr manager.Manager, log logr.Logger) error {
gvk, err := apiutil.GVKForObject(obj, mgr.GetScheme())
if err != nil {
return err
}
validator, isValidator := obj.(kudo.Validator)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: these variable are called ok by convention

Suggested change
validator, isValidator := obj.(kudo.Validator)
validator, ok := obj.(kudo.Validator)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this method is adopted from the controller-runtime but since we're the one calling it, you could pass kudo.Validatior directly and avoid the casting?

Copy link
Contributor Author

@alenkacz alenkacz Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I need runtime.object here to be able to get GVK

Copy link
Contributor

@zen-dog zen-dog Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's ok then ;)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right right, it's ok now... 😝

if !isValidator {
<<<<<<< HEAD
log.Info("skip registering a validating webhook, admission.Validator interface is not implemented %v", gvk)
=======
log.Infof("skip registering a validating webhook, kudo.Validator interface is not implemented", "GVK", gvk)
>>>>>>> fc12fe83516b102f81011da2b2a33c9b93a39dd1
alenkacz marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
vwh := kudo.WebhookFor(validator)
if vwh != nil {
path := generateValidatePath(gvk)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we need the generate component. I also didn't know path for what until diving into the code. It's the API path. Does pathFor(gvk) make sense? or apiPath(gvk)? or resourcePath(gvk)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of that is adopted from the controller-runtime. It might not be ideal but I'd rather keep it this way: easier to bump once cr changes things.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all these methods actually map to the exact same methods in controller-runtime. https://github.com/kubernetes-sigs/controller-runtime/blob/dc8357113a904bf02721efcde5d92937be39031c/pkg/builder/webhook.go#L163 as the names are not totally out of this world bad, I would probably stick with them, it can make it easier updating or migrating back to their implementation if they extend it in the future


// Checking if the path is already registered.
// If so, just skip it.
if !isAlreadyHandled(path, mgr) {
log.Info("Registering a validating webhook for %v on path %s", gvk, path)
mgr.GetWebhookServer().Register(path, vwh)
}
}
return nil
}

func isAlreadyHandled(path string, mgr manager.Manager) bool {
if mgr.GetWebhookServer().WebhookMux == nil {
return false
}
h, p := mgr.GetWebhookServer().WebhookMux.Handler(&http.Request{URL: &url.URL{Path: path}})
if p == path && h != nil {
return true
}
return false
}

// if the strategy to generate this path changes we should update init code and webhook setup
// right now this is in sync how controller-runtime generates these paths
func generateValidatePath(gvk schema.GroupVersionKind) string {
return "/validate-" + strings.Replace(gvk.Group, ".", "-", -1) + "-" +
gvk.Version + "-" + strings.ToLower(gvk.Kind)
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/dustinkirkland/golang-petname v0.0.0-20170921220637-d3c2ba80e75e
github.com/gogo/protobuf v1.3.1 // indirect
github.com/golangci/golangci-lint v1.21.0
github.com/google/martian v2.1.0+incompatible
github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf
github.com/gophercloud/gophercloud v0.2.0 // indirect
github.com/gorilla/context v1.1.1 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI=
github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no=
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason to introduce another logger, I guess?

github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
Expand Down
34 changes: 34 additions & 0 deletions pkg/apis/kudo/v1beta1/instance_validator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package v1beta1

import (
"fmt"

"github.com/kudobuilder/kudo/pkg/util/kudo"
"k8s.io/apimachinery/pkg/runtime"
alenkacz marked this conversation as resolved.
Show resolved Hide resolved
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

var _ kudo.Validator = &Instance{}
kensipe marked this conversation as resolved.
Show resolved Hide resolved

// ValidateCreate implements webhookutil.validator (from controller-runtime)
// we do not enforce any rules upon creation right now
func (i *Instance) ValidateCreate(req admission.Request) error {
return nil
}

// ValidateUpdate hook called when UPDATE operation is triggered and our admission webhook is triggered
// ValidateUpdate implements webhookutil.validator (from controller-runtime)
func (i *Instance) ValidateUpdate(old runtime.Object, req admission.Request) error {
if i.Status.AggregatedStatus.Status.IsRunning() && req.RequestSubResource != "status" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this line would be my biggest pain point:

  1. IsRunning is defined as ExecutionInProgress || ExecutionPending || ErrorStatus. Why wouldn't a user be able to interrupt Pending or _Error_ed plans? Maybe the Error is due to some misconfiguration and will be fixed by a parameter update?
  2. Even when plan execution is InProgress: let's say a Deployment can't get healthy because a Pod needs too many resources (not available in the cluster). As a user, I would like to update .Params.Memory to fix the problem.

I guess we need to talk about interruptable plans e.g. deploy can be interrupted while backup/restore not? And even that is dependant on the specific operator.

P.S. I'd be ok with merging this PR without this line and figure out the exact rules in a followup PR

Copy link
Contributor Author

@alenkacz alenkacz Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the question is - how do you interrupt? I am not sure that interrupting by just overriding something in spec is the way to go. I would rather interrupt by explicitly triggering manual plan execution with some force parameter. Than all that will be handled via some extension on KEP-20 and all this will be ok...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I had an extensive comment here which I unfortunately lost. I'll try to recreate it.)

In my experience one of the main sources of confusion for operators of SDK services are the implicit and opaque (sometimes undocumented) dependencies and conflicts between plans.

One thing that we'll need to think about (probably not in this PR) is how to deal with:

  1. stuck plans (what is the behavior of starting a "deploy" plan when the previous "deploy" execution is stuck? for example, if a step can't complete because a container is crash-looping for some reason)
  2. conflicting plans (what is the behavior of starting a "backup" plan when the "deploy" plan is either in progress or stuck on some step that could be seen as a dependency for any of the "backup" plan steps?)

For example, this SDK test shows a conflict scenario between the "deploy" and "recovery" plans in an SDK service.

The plan execution section of the SDK developer guide has some more (but not all) details about plan behavior.

Feedback from folks like @kaiwalyajoshi and @takirala might also be invaluable when thinking about improving plan behavior in the future and preventing past mistakes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding interrupting plan executions, I think it does make sense. The SDK has
a concept of pausing plans. I think it'd be also useful to think about rolling
back plan executions.

From the SDK developer guide:

Normally, steps progress through statuses in the following order:

PENDING → PREPARED → STARTING → COMPLETE

The status of a phase or a plan is determined by examination of the step elements. A step may enter an ERROR state when its construction is malformed or whenever the service author determines it to be appropriate. The WAITING state occurs when the operator of the service indicates that an element should be paused. An operator might want to pause a deployment for a multitude of reasons, including unexpected failures during an update.

// when updating anything else than status, there shouldn't be a running plan
return fmt.Errorf("cannot update Instance %s/%s right now, there's plan %s in progress", i.Namespace, i.Name, i.Status.AggregatedStatus.ActivePlanName)
}
return nil
}

// ValidateDelete hook called when DELETE operation is triggered and our admission webhook is triggered
// we don't enforce any validation on DELETE right now
// ValidateDelete implements webhookutil.validator (from controller-runtime)
func (i *Instance) ValidateDelete(req admission.Request) error {
return nil
}
22 changes: 11 additions & 11 deletions pkg/apis/kudo/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion pkg/kudoctl/cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ type initCmd struct {
crdOnly bool
home kudohome.Home
client *kube.Client
webhooks string
}

func newInitCmd(fs afero.Fs, out io.Writer) *cobra.Command {
Expand Down Expand Up @@ -98,6 +99,7 @@ func newInitCmd(fs afero.Fs, out io.Writer) *cobra.Command {
f.BoolVar(&i.crdOnly, "crd-only", false, "Add only KUDO CRDs to your cluster")
f.BoolVarP(&i.wait, "wait", "w", false, "Block until KUDO manager is running and ready to receive requests")
f.Int64Var(&i.timeout, "wait-timeout", 300, "Wait timeout to be used")
f.StringVar(&i.webhooks, "webhook", "", "List of webhooks exposed, when empty, no webhook server will be started (the only webhook right now is InstanceValidation)")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f.StringVar(&i.webhooks, "webhook", "", "List of webhooks exposed, when empty, no webhook server will be started (the only webhook right now is InstanceValidation)")
f.StringVar(&i.webhooks, "webhook", "", "List of webhooks to install separated by commas")

"list of webhooks exposed" isn't phrased corrected. "to be exposed" is closer. It is odd to add the "(...)" to a cli output or to specify limitations that will need to be edited and hopefully not forgotten.

The info I expect a user to be concerned with... is... is it the name of the class? does it include the package name? where can I find the mapping of strings -> webhooks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm I really wanted to help the user to know what to do here because if you know what are the supported options you have to go to the docs or code (?) and that's not very nice experience. I think the CLI should help you do this right. I rephrased it a bit.

Yes we will need to update it with every new webhook but this is the best documentation we have and the best documentation we can get to the user so I think we should be fine with it and just really take care of this docs and keep it up to date. We won't certainly add new webhook every week...


return cmd
}
Expand All @@ -118,13 +120,16 @@ func (initCmd *initCmd) validate(flags *flag.FlagSet) error {
if flags.Changed("wait-timeout") && !initCmd.wait {
return errors.New("wait-timeout is only useful when using the flag '--wait'")
}
if initCmd.webhooks != "" && initCmd.webhooks != "InstanceValidation" {
return errors.New("webhooks can be only empty or contain a single string 'InstanceValidation'. No other webhooks supported right now.")
}

return nil
}

// run initializes local config and installs KUDO manager to Kubernetes cluster.
func (initCmd *initCmd) run() error {
opts := cmdInit.NewOptions(initCmd.version, initCmd.ns)
opts := cmdInit.NewOptions(initCmd.version, initCmd.ns, []string{initCmd.webhooks})
// if image provided switch to it.
if initCmd.image != "" {
opts.Image = initCmd.image
Expand All @@ -149,6 +154,14 @@ func (initCmd *initCmd) run() error {
}
mans = append(mans, prereq...)

if len(opts.Webhooks) != 0 { // right now there's only 0 or 1 webhook, so this is good enough
prereq, err := cmdInit.WebhookManifests(opts.Namespace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WebhookManifests likely needs to passed a options with a slice... or another parameter of slice of options.

if err != nil {
return err
}
mans = append(mans, prereq...)
}

deploy, err := cmdInit.ManagerManifests(opts)
if err != nil {
return err
Expand Down
28 changes: 23 additions & 5 deletions pkg/kudoctl/cmd/init/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ type Options struct {
TerminationGracePeriodSeconds int64
// Image defines the image to be used
Image string
// Enable validation
Webhooks []string
}

// NewOptions provides an option struct with defaults
func NewOptions(v string, ns string) Options {
func NewOptions(v string, ns string, webhooks []string) Options {

if v == "" {
v = version.Get().GitVersion
Expand All @@ -57,28 +59,36 @@ func NewOptions(v string, ns string) Options {
Namespace: ns,
TerminationGracePeriodSeconds: defaultGracePeriod,
Image: fmt.Sprintf("kudobuilder/controller:v%v", v),
Webhooks: webhooks,
}
}

// Install uses Kubernetes client to install KUDO.
func Install(client *kube.Client, opts Options, crdOnly bool) error {

clog.Printf("✅ installing crds")
if err := installCrds(client.ExtClient); err != nil {
return err
}
clog.Printf("✅ installed crds")
if crdOnly {
return nil
}
clog.Printf("✅ preparing service accounts and other requirements for controller to run")
if err := installPrereqs(client.KubeClient, opts); err != nil {
return err
}
clog.Printf("✅ installed service accounts and other requirements for controller to run")

if opts.EnableValidation {
if err := installWebhook(client.KubeClient, client.DynamicClient, opts.Namespace); err != nil {
return err
}
clog.Printf("✅ installed webhook")
}

clog.Printf("✅ installing kudo controller")
if err := installManager(client.KubeClient, opts); err != nil {
return err
}
clog.Printf("✅ installed kudo controller")
return nil
}

Expand Down Expand Up @@ -184,13 +194,14 @@ func generateDeployment(opts Options) *appsv1.StatefulSet {
Env: []v1.EnvVar{
{Name: "POD_NAMESPACE", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.namespace"}}},
{Name: "SECRET_NAME", Value: "kudo-webhook-server-secret"},
{Name: "ENABLE_WEBHOOKS", Value: enableWebhooks(opts)},
},
Image: image,
ImagePullPolicy: "Always",
Name: "manager",
Ports: []v1.ContainerPort{
// name matters for service
{ContainerPort: 9876, Name: "webhook-server", Protocol: "TCP"},
{ContainerPort: 443, Name: "webhook-server", Protocol: "TCP"},
},
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
Expand All @@ -214,6 +225,13 @@ func generateDeployment(opts Options) *appsv1.StatefulSet {
return d
}

func enableWebhooks(options Options) string {
alenkacz marked this conversation as resolved.
Show resolved Hide resolved
if options.EnableValidation {
return "true"
}
return "false"
}

func managerLabels() labels.Set {
labels := generateLabels(map[string]string{"control-plane": "controller-manager", "controller-tools.k8s.io": "1.0"})
return labels
Expand Down
Loading