Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Surface cpu and mem requests forbidden errors (and other ones too) in KSVC creation #14453

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/apis/serving/v1/revision_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1
import (
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
net "knative.dev/networking/pkg/apis/networking"
"knative.dev/pkg/kmeta"
Expand Down Expand Up @@ -143,3 +144,9 @@ func (rs *RevisionStatus) IsActivationRequired() bool {
c := revisionCondSet.Manage(rs).GetCondition(RevisionConditionActive)
return c != nil && c.Status != corev1.ConditionTrue
}

// IsReplicaSetFailure returns true if the deployment replicaset failed to create
func (rs *RevisionStatus) IsReplicaSetFailure(deploymentStatus *appsv1.DeploymentStatus) bool {
ds := serving.TransformDeploymentStatus(deploymentStatus)
return ds != nil && ds.GetCondition(serving.DeploymentConditionReplicaSetReady).IsFalse()
}
43 changes: 43 additions & 0 deletions pkg/apis/serving/v1/revision_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"testing"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"

Expand Down Expand Up @@ -267,3 +268,45 @@ func TestSetRoutingState(t *testing.T) {
t.Error("Expected default value for unparsable annotationm but got:", got)
}
}

func TestIsReplicaSetFailure(t *testing.T) {
revisionStatus := RevisionStatus{}
cases := []struct {
name string
status appsv1.DeploymentStatus
IsReplicaSetFailure bool
}{{
name: "empty deployment status should not be a failure",
status: appsv1.DeploymentStatus{},
}, {
name: "Ready deployment status should not be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue,
}},
},
}, {
name: "ReplicasetFailure true should be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentReplicaFailure, Status: corev1.ConditionTrue,
}},
},
IsReplicaSetFailure: true,
}, {
name: "ReplicasetFailure false should not be a failure",
status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{{
Type: appsv1.DeploymentReplicaFailure, Status: corev1.ConditionFalse,
}},
},
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got, want := revisionStatus.IsReplicaSetFailure(&tc.status), tc.IsReplicaSetFailure; got != want {
t.Errorf("IsReplicaSetFailure = %v, want: %v", got, want)
}
})
}
}
2 changes: 1 addition & 1 deletion pkg/apis/serving/v1/revision_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const (
ReasonProgressDeadlineExceeded = "ProgressDeadlineExceeded"
)

// RevisionConditionActive is not part of the RevisionConditionSet because we can have Inactive Ready Revisions (scale to zero)
var revisionCondSet = apis.NewLivingConditionSet(
RevisionConditionResourcesAvailable,
RevisionConditionContainerHealthy,
Expand Down Expand Up @@ -171,7 +172,6 @@ func (rs *RevisionStatus) PropagateDeploymentStatus(original *appsv1.DeploymentS
func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodAutoscalerStatus) {
// Reflect the PA status in our own.
cond := ps.GetCondition(autoscalingv1alpha1.PodAutoscalerConditionReady)

rs.ActualReplicas = nil
if ps.ActualScale != nil && *ps.ActualScale >= 0 {
rs.ActualReplicas = ps.ActualScale
Expand Down
6 changes: 6 additions & 0 deletions pkg/reconciler/revision/reconcile_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)
}
}

// If the replicaset is failing we assume its an error we have to surface
if rev.Status.IsReplicaSetFailure(&deployment.Status) {
gabo1208 marked this conversation as resolved.
Show resolved Hide resolved
rev.Status.PropagateDeploymentStatus(&deployment.Status)
return nil
}
Comment on lines +83 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial thought was to have this in the else clause. But seeing this here I wonder if there is a case where creating the Deployment will succeed but response includes a status condition with a ReplicaFailure?

Otherwise this section might be easier to reason about if we push if !rev.Status.IsActivationRequired() down into
PropagateDeploymentStatus and we can do the replica failure check there

Copy link
Member Author

@gabo1208 gabo1208 Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I revisit the code, yes the reason to have it there is for newly created deployments with replicaset errors.

I couldn't move it inside propagate status inside since they are both different cases. The revision required surface an error but it does not care to be overwritten later, and you can have a replicaset error without a replicaset error, with the replicaset error we want to return immediately after so we need to check in the reconciler.


// If a container keeps crashing (no active pods in the deployment although we want some)
if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 {
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)})
Expand Down
28 changes: 28 additions & 0 deletions pkg/reconciler/revision/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,34 @@ func TestReconcile(t *testing.T) {
Object: pa("foo", "deploy-timeout", WithReachabilityUnreachable),
}},
Key: "foo/deploy-timeout",
}, {
Name: "revision failure because replicaset and deployment failed",
// This test defines a Revision InProgress with status Deploying but with a
// Deployment with a ReplicaSet failure, so the wanted status update is for
// the Deployment FailedCreate error to bubble up to the Revision
Objects: []runtime.Object{
Revision("foo", "deploy-replicaset-failure",
WithLogURL, MarkActivating("Deploying", ""),
WithRoutingState(v1.RoutingStateActive, fc),
withDefaultContainerStatuses(),
WithRevisionObservedGeneration(1),
MarkContainerHealthyUnknown("Deploying"),
),
pa("foo", "deploy-replicaset-failure", WithReachabilityUnreachable),
replicaFailureDeploy(deploy(t, "foo", "deploy-replicaset-failure"), "I ReplicaSet failed!"),
image("foo", "deploy-replicaset-failure"),
},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: Revision("foo", "deploy-replicaset-failure",
WithLogURL, MarkResourcesUnavailable("FailedCreate", "I ReplicaSet failed!"),
withDefaultContainerStatuses(),
WithRoutingState(v1.RoutingStateActive, fc),
MarkContainerHealthyUnknown("Deploying"),
WithRevisionObservedGeneration(1),
MarkActivating("Deploying", ""),
),
}},
Key: "foo/deploy-replicaset-failure",
}, {
Name: "surface replica failure",
// Test the propagation of FailedCreate from Deployment.
Expand Down
6 changes: 6 additions & 0 deletions pkg/testing/v1/revision.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ func MarkDeploying(reason string) RevisionOption {
}
}

func MarkContainerHealthyUnknown(reason string) RevisionOption {
return func(r *v1.Revision) {
r.Status.MarkContainerHealthyUnknown(reason, "")
}
}

// MarkProgressDeadlineExceeded calls the method of the same name on the Revision
// with the message we expect the Revision Reconciler to pass.
func MarkProgressDeadlineExceeded(message string) RevisionOption {
Expand Down
29 changes: 15 additions & 14 deletions test/e2e/resource_quota_error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ func TestResourceQuotaError(t *testing.T) {

clients := test.Setup(t, test.Options{Namespace: "rq-test"})
const (
errorReason = "RevisionFailed"
errorMsgScale = "Initial scale was never achieved"
errorMsgQuota = "forbidden: exceeded quota"
revisionReason = "ProgressDeadlineExceeded"
errorReason = "RevisionFailed"
progressDeadlineReason = "ProgressDeadlineExceeded"
waitReason = "ContainerCreating"
errorMsgQuota = "forbidden: exceeded quota"
revisionReason = "RevisionFailed"
)
names := test.ResourceNames{
Service: test.ObjectNameForTest(t),
Expand Down Expand Up @@ -80,16 +81,12 @@ func TestResourceQuotaError(t *testing.T) {
err = v1test.WaitForServiceState(clients.ServingClient, names.Service, func(r *v1.Service) (bool, error) {
cond = r.Status.GetCondition(v1.ServiceConditionConfigurationsReady)
if cond != nil && !cond.IsUnknown() {
// Can fail with either a progress deadline exceeded error or an exceeded resource quota error
if strings.Contains(cond.Message, errorMsgScale) && cond.IsFalse() {
return true, nil
}
if strings.Contains(cond.Message, errorMsgQuota) && cond.IsFalse() {
if cond.Reason == errorReason && cond.IsFalse() {
return true, nil
}
t.Logf("Reason: %s ; Message: %s ; Status: %s", cond.Reason, cond.Message, cond.Status)
return true, fmt.Errorf("the service %s was not marked with expected error condition (Reason=%q, Message=%q, Status=%q), but with (Reason=%q, Message=%q, Status=%q)",
names.Config, errorReason, errorMsgScale, "False", cond.Reason, cond.Message, cond.Status)
names.Config, errorReason, "", "False", cond.Reason, cond.Message, cond.Status)
}
return false, nil
}, "ContainerUnscheduleable")
Expand All @@ -113,15 +110,19 @@ func TestResourceQuotaError(t *testing.T) {
err = v1test.CheckRevisionState(clients.ServingClient, revisionName, func(r *v1.Revision) (bool, error) {
cond := r.Status.GetCondition(v1.RevisionConditionReady)
if cond != nil {
// Can fail with either a progress deadline exceeded error or an exceeded resource quota error
if cond.Reason == revisionReason && strings.Contains(cond.Message, errorMsgScale) {
if strings.Contains(cond.Message, errorMsgQuota) && cond.IsFalse() {
return true, nil
}
if strings.Contains(cond.Message, errorMsgQuota) && cond.IsFalse() {
// Can fail with either a progress deadline exceeded error
if cond.Reason == progressDeadlineReason {
return true, nil
}
// wait for the container creation
if cond.Reason == waitReason {
return false, nil
}
return true, fmt.Errorf("the revision %s was not marked with expected error condition (Reason=%q, Message=%q), but with (Reason=%q, Message=%q)",
revisionName, revisionReason, errorMsgScale, cond.Reason, cond.Message)
revisionName, revisionReason, errorMsgQuota, cond.Reason, cond.Message)
}
return false, nil
})
Expand Down
Loading