Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: skip pod when getTopologyHints in ORM #620

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions pkg/agent/orm/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,15 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
return nil
}

skipPod, err := isSkippedPod(pod, m.qosConfig)
if err != nil {
klog.Errorf("[ORM] check skip pod fail for pod: %v, err: %v", pod.Name, err)
return nil
}
if skipPod {
return nil
}

podUID := string(pod.UID)
contName := container.Name
containerType, containerIndex, err := GetContainerTypeAndIndex(pod, container)
Expand Down Expand Up @@ -415,13 +424,12 @@ func (m *ManagerImpl) processDeletePod(podUID string) error {
func (m *ManagerImpl) addContainer(pod *v1.Pod, container *v1.Container) error {
klog.V(5).Infof("[ORM] addContainer, pod: %v, container: %v", pod.Name, container.Name)

systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod)
skipPod, err := isSkippedPod(pod, m.qosConfig)
if err != nil {
klog.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err)
return err
}

if native.CheckDaemonPod(pod) && !systemCores {
if skipPod {
klog.Infof("[ORM] skip pod: %s/%s, container: %s resource allocation",
pod.Namespace, pod.Name, container.Name)
return nil
Expand Down Expand Up @@ -563,14 +571,15 @@ func (m *ManagerImpl) reconcile() {
if pod == nil {
continue
}
systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod)
skipPod, err := isSkippedPod(pod, m.qosConfig)
if err != nil {
klog.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err)
continue
}

if native.CheckDaemonPod(pod) && !systemCores {
if skipPod {
continue
}

for _, container := range pod.Spec.Containers {

needsReAllocate := false
Expand Down Expand Up @@ -732,13 +741,17 @@ func isSkippedContainer(pod *v1.Pod, container *v1.Container) bool {
return containerType == pluginapi.ContainerType_INIT
}

func isPodKatalystQoSLevelSystemCores(qosConfig *generic.QoSConfiguration, pod *v1.Pod) (bool, error) {
qosLevel, err := qosConfig.GetQoSLevelForPod(pod)
func isSkippedPod(pod *v1.Pod, qosConfig *generic.QoSConfiguration) (bool, error) {
systemCores, err := qosConfig.CheckSystemQoSForPod(pod)
if err != nil {
klog.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err)
return false, err
}

return qosLevel == pluginapi.KatalystQoSLevelSystemCores, nil
if native.CheckDaemonPod(pod) && !systemCores {
return true, nil
}
return false, nil
}

func ParseListOfTopologyHints(hintsList *pluginapi.ListOfTopologyHints) []topology.TopologyHint {
Expand Down
99 changes: 99 additions & 0 deletions pkg/agent/orm/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"

"github.com/kubewharf/katalyst-api/pkg/consts"
katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options"
"github.com/kubewharf/katalyst-core/pkg/agent/orm/endpoint"
Expand Down Expand Up @@ -278,6 +279,104 @@ func TestIsSkippedContainer(t *testing.T) {
}
}

func TestIsSkippedPod(t *testing.T) {
t.Parallel()

testCases := []struct {
Name string
Pod *v1.Pod
Expected bool
ExpectErr bool
}{
{
Name: "daemon and shared",
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
OwnerReferences: []metav1.OwnerReference{
{
Kind: "DaemonSet",
},
},
},
},
ExpectErr: false,
Expected: true,
},
{
Name: "daemon and system",
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores,
},
OwnerReferences: []metav1.OwnerReference{
{
Kind: "DaemonSet",
},
},
},
},
ExpectErr: false,
Expected: false,
},
{
Name: "deployment",
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
OwnerReferences: []metav1.OwnerReference{
{
Kind: "Deployment",
},
},
},
},
ExpectErr: false,
Expected: false,
},
{
Name: "fail",
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: "unknow value",
},
OwnerReferences: []metav1.OwnerReference{
{
Kind: "DaemonSet",
},
},
},
},
ExpectErr: true,
Expected: false,
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
qosCfg := generic.NewQoSConfiguration()
skip, err := isSkippedPod(tc.Pod, qosCfg)
if tc.ExpectErr {
assert.Error(t, err)
} else {
assert.Equal(t, tc.Expected, skip)
}
})
}
}

func TestGetMappedResourceName(t *testing.T) {
t.Parallel()

Expand Down
5 changes: 2 additions & 3 deletions pkg/agent/orm/resourceprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
maputil "k8s.io/kubernetes/pkg/util/maps"

"github.com/kubewharf/katalyst-core/pkg/metrics"
"github.com/kubewharf/katalyst-core/pkg/util/native"
)

func (m *ManagerImpl) GetTopologyAwareResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.TopologyAwareResource {
Expand Down Expand Up @@ -131,12 +130,12 @@ func (m *ManagerImpl) getTopologyAwareResources(pod *v1.Pod, container *v1.Conta
err := fmt.Errorf("GetTopologyAwareResources got nil pod: %v or container: %v", pod, container)
return nil, err
}
systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod)
skipPod, err := isSkippedPod(pod, m.qosConfig)
if err != nil {
err = fmt.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err)
return nil, err
}
if native.CheckDaemonPod(pod) && !systemCores {
if skipPod {
klog.V(5).Infof("[ORM] skip pod: %s, container: %v", pod.Name, container.Name)
return nil, nil
}
Expand Down
Loading