From 5117199fa5ec6673925146a7d33f8170832a9808 Mon Sep 17 00:00:00 2001 From: Robin Lu Date: Fri, 1 Mar 2024 11:19:34 +0800 Subject: [PATCH] feat: adding support to qos level memory.low protection This patch provied the feature of memory.low protection. There are a couple of benefits about memory.low protection: 1, it provides a gradient of protection. As a cgroup's usage grows past the protected amount, the protected amount remains protected, but reclaim pressure for the excess amount gradually increases. 2, it's work-conserving - if the protected cgroup doesn't use the memory, it's available for others to use. Signed-off-by: Robin Lu --- .../app/options/qrm/memory_plugin.go | 16 + .../memory/dynamicpolicy/policy.go | 12 + .../memory/dynamicpolicy/policy_test.go | 1 + .../memory/handlers/memprotection/const.go | 31 ++ .../memprotection/memprotection_linux.go | 200 ++++++++++ .../memprotection/memprotection_linux_test.go | 365 ++++++++++++++++++ .../memprotection_unsupported.go | 31 ++ pkg/config/agent/qrm/memory_plugin.go | 7 + pkg/util/cgroup/manager/cgroup_test.go | 41 ++ 9 files changed, 704 insertions(+) create mode 100644 pkg/agent/qrm-plugins/memory/handlers/memprotection/const.go create mode 100644 pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux.go create mode 100644 pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux_test.go create mode 100644 pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_unsupported.go diff --git a/cmd/katalyst-agent/app/options/qrm/memory_plugin.go b/cmd/katalyst-agent/app/options/qrm/memory_plugin.go index 64f16160da..8c8b323e50 100644 --- a/cmd/katalyst-agent/app/options/qrm/memory_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/memory_plugin.go @@ -33,6 +33,7 @@ type MemoryOptions struct { OOMPriorityPinnedMapAbsPath string SockMemOptions + MemProtectionOptions } type SockMemOptions struct { @@ -43,6 +44,11 @@ type SockMemOptions struct { SetCgroupTCPMemRatio int } +type MemProtectionOptions struct { + EnableSettingMemProtection bool + MemProtectionQoSLevelConfigFile string +} + func NewMemoryOptions() *MemoryOptions { return &MemoryOptions{ PolicyName: "dynamic", @@ -56,6 +62,10 @@ func NewMemoryOptions() *MemoryOptions { SetGlobalTCPMemRatio: 20, // default: 20% * {host total memory} SetCgroupTCPMemRatio: 100, // default: 100% * {cgroup memory} }, + MemProtectionOptions: MemProtectionOptions{ + EnableSettingMemProtection: false, + MemProtectionQoSLevelConfigFile: "", + }, } } @@ -84,6 +94,10 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.SetGlobalTCPMemRatio, "limit global max tcp memory usage") fs.IntVar(&o.SetCgroupTCPMemRatio, "qrm-memory-cgroup-tcpmem-ratio", o.SetCgroupTCPMemRatio, "limit cgroup max tcp memory usage") + fs.BoolVar(&o.EnableSettingMemProtection, "enable-setting-mem-protection", + o.EnableSettingMemProtection, "if set true, we will do memory protection in qos level") + fs.StringVar(&o.MemProtectionQoSLevelConfigFile, "mem-protection-qos-config-file", + o.MemProtectionQoSLevelConfigFile, "the absolute path of mem.low qos level config file") } func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error { @@ -98,5 +112,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error { conf.EnableSettingSockMem = o.EnableSettingSockMem conf.SetGlobalTCPMemRatio = o.SetGlobalTCPMemRatio conf.SetCgroupTCPMemRatio = o.SetCgroupTCPMemRatio + conf.EnableSettingMemProtection = o.EnableSettingMemProtection + conf.MemProtectionQoSLevelConfigFile = o.MemProtectionQoSLevelConfigFile return nil } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go index a40b1d5001..e50b7542c2 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go @@ -40,6 +40,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/oom" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/memprotection" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/sockmem" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler" @@ -144,6 +145,7 @@ type DynamicPolicy struct { enableSettingMemoryMigrate bool enableSettingSockMem bool + enableSettingMemProtection bool enableMemoryAdvisor bool memoryAdvisorSocketAbsPath string memoryPluginSocketAbsPath string @@ -207,6 +209,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration defaultAsyncLimitedWorkers: asyncworker.NewAsyncLimitedWorkers(memoryPluginAsyncWorkersName, defaultAsyncWorkLimit, wrappedEmitter), enableSettingMemoryMigrate: conf.EnableSettingMemoryMigrate, enableSettingSockMem: conf.EnableSettingSockMem, + enableSettingMemProtection: conf.EnableSettingMemProtection, enableMemoryAdvisor: conf.EnableMemoryAdvisor, memoryAdvisorSocketAbsPath: conf.MemoryAdvisorSocketAbsPath, memoryPluginSocketAbsPath: conf.MemoryPluginSocketAbsPath, @@ -363,6 +366,15 @@ func (p *DynamicPolicy) Start() (err error) { } } + if p.enableSettingMemProtection { + general.Infof("setMemProtection enabled") + err := periodicalhandler.RegisterPeriodicalHandler(qrm.QRMMemoryPluginPeriodicalHandlerGroupName, + memprotection.EnableSetMemProtectionPeriodicalHandlerName, memprotection.MemProtectionTaskFunc, 90*time.Second) + if err != nil { + general.Infof("setMemProtection failed, err=%v", err) + } + } + go wait.Until(func() { periodicalhandler.ReadyToStartHandlersByGroup(qrm.QRMMemoryPluginPeriodicalHandlerGroupName) }, 5*time.Second, p.stopCh) diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go index 7a25ba1bfc..b2e2bed6a4 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go @@ -893,6 +893,7 @@ func TestAllocate(t *testing.T) { } dynamicPolicy.enableMemoryAdvisor = true + dynamicPolicy.enableSettingMemProtection = true dynamicPolicy.advisorClient = advisorsvc.NewStubAdvisorServiceClient() resp, err := dynamicPolicy.Allocate(context.Background(), tc.req) diff --git a/pkg/agent/qrm-plugins/memory/handlers/memprotection/const.go b/pkg/agent/qrm-plugins/memory/handlers/memprotection/const.go new file mode 100644 index 0000000000..63136b4a93 --- /dev/null +++ b/pkg/agent/qrm-plugins/memory/handlers/memprotection/const.go @@ -0,0 +1,31 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memprotection + +const EnableSetMemProtectionPeriodicalHandlerName = "SetCGMemProtection" + +const ( + // Constants for cgroup memory statistics + cgroupMemory32M = 33554432 + cgroupMemoryUnlimited = 9223372036854771712 + + controlKnobKeyMemProtection = "mem_protection" +) + +const ( + metricNameMemLow = "async_handler_cgroup_memlow" +) diff --git a/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux.go b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux.go new file mode 100644 index 0000000000..76fadbb555 --- /dev/null +++ b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux.go @@ -0,0 +1,200 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memprotection + +import ( + "context" + "fmt" + "math" + "strconv" + + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" + cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" + cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager" + "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/native" +) + +func calculatedBestProtection(memUsage, memFileInactive, userProtection uint64) uint64 { + if memFileInactive > memUsage { + return 0 + } + minProtection := memUsage - memFileInactive + cgroupMemory32M + low := math.Max(float64(userProtection), float64(minProtection)) + return uint64(low) +} + +func getUserSpecifiedMemoryProtectionInBytes(memLimit, memUsage, ratio uint64) uint64 { + if ratio > 100 || ratio <= 0 { + general.Infof("Bad ratio %v", ratio) + return 0 + } + + maxLimit := memLimit + if memLimit >= cgroupMemoryUnlimited { + maxLimit = memUsage + cgroupMemory32M + } + low := uint64(float64(maxLimit) / 100.0 * float64(ratio)) + low = general.AlignToPageSize(low) + return low +} + +func calculateMemProtection(absCgPath string, ratio uint64) (uint64, error) { + /* + * I hope to protect cgroup from System-Thrashing(insufficient hot file memory) + * during kswapd reclaiming through cgv2 memory.low. + */ + // Step1, get cgroup memory.limit, memory.usage, inactive-file-memory. + memStat, err := cgroupmgr.GetMemoryWithAbsolutePath(absCgPath) + if err != nil { + general.Warningf("GetMemoryWithAbsolutePath failed with err: %v", err) + return 0, err + } + memDetailedStat, err := cgroupmgr.GetDetailedMemoryWithAbsolutePath(absCgPath) + if err != nil { + general.Warningf("GetDetailedMemoryWithAbsolutePath failed with err: %v", err) + return 0, err + } + + // Step2, Reserve a certain ratio of file memory for high-QoS cgroups. + userProtection := getUserSpecifiedMemoryProtectionInBytes(memStat.Limit, memStat.Usage, ratio) + if userProtection == 0 { + general.Warningf("getUserSpecifiedMemoryProtectionBytes return 0") + return 0, fmt.Errorf("getUserSpecifiedMemoryProtectionBytes return 0") + } + + // Step3, I don't want to hurt existing hot file-memory. + // If the reserve file memory is not sufficient for current hot file-memory, + // then the final memory.low will be based on current hot file-memory. + low := calculatedBestProtection(memStat.Usage, memDetailedStat.FileInactive, userProtection) + + general.Infof("calculateMemProtection: target=%v, usr=%v, ratio=%v, cg=%v, limit=%v, usage=%v, file-inactive=%v", low, userProtection, ratio, absCgPath, memStat.Limit, memStat.Usage, memDetailedStat.FileInactive) + return low, nil +} + +func applyMemProtectionQoSLevelConfig(conf *coreconfig.Configuration, + emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, +) { + if conf.MemProtectionQoSLevelConfigFile == "" { + general.Infof("no MemProtectionQoSLevelConfigFile found") + return + } + + var extraControlKnobConfigs commonstate.ExtraControlKnobConfigs + if err := general.LoadJsonConfig(conf.MemProtectionQoSLevelConfigFile, &extraControlKnobConfigs); err != nil { + general.Errorf("MemProtectionQoSLevelConfigFile load failed:%v", err) + return + } + ctx := context.Background() + podList, err := metaServer.GetPodList(ctx, native.PodIsActive) + if err != nil { + general.Infof("get pod list failed: %v", err) + return + } + + for _, pod := range podList { + if pod == nil { + general.Warningf("get nil pod from metaServer") + continue + } + if conf.QoSConfiguration == nil { + continue + } + qosConfig := conf.QoSConfiguration + qosLevel, err := qosConfig.GetQoSLevelForPod(pod) + if err != nil { + general.Warningf("GetQoSLevelForPod failed:%v", err) + continue + } + qosLevelDefaultValue, ok := extraControlKnobConfigs[controlKnobKeyMemProtection].QoSLevelToDefaultValue[qosLevel] + if !ok { + continue + } + + ratio, err := strconv.ParseFloat(qosLevelDefaultValue, 64) + if err != nil { + general.Infof("Atoi failed with err: %v", err) + continue + } + + absCgPath, err := common.GetPodAbsCgroupPath(common.CgroupSubsysMemory, string(pod.UID)) + if err != nil { + continue + } + low, err := calculateMemProtection(absCgPath, uint64(ratio*100)) + if err != nil { + general.Errorf("calculateMemProtection for relativeCgPath: %s failed with error: %v", + absCgPath, err) + continue + } + + // OK. Set the value for memory.low. + var data *cgroupcm.MemoryData + data = &cgroupcm.MemoryData{SoftLimitInBytes: int64(low)} + if err := cgroupmgr.ApplyMemoryWithRelativePath(absCgPath, data); err != nil { + general.Warningf("ApplyMemoryWithRelativePath failed, cgpath=%v, err=%v", absCgPath, err) + continue + } + _ = emitter.StoreInt64(metricNameMemLow, int64(low), metrics.MetricTypeNameRaw, + metrics.ConvertMapToTags(map[string]string{ + "podUID": string(pod.UID), + })...) + + } +} + +func MemProtectionTaskFunc(conf *coreconfig.Configuration, + _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, + emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, +) { + general.Infof("called") + + if conf == nil { + general.Errorf("nil extraConf") + return + } else if emitter == nil { + general.Errorf("nil emitter") + return + } else if metaServer == nil { + general.Errorf("nil metaServer") + return + } + + // SettingMemProtection featuregate. + if !conf.EnableSettingMemProtection { + general.Infof("EnableSettingMemProtection disabled") + return + } + + if !cgroupcm.CheckCgroup2UnifiedMode() { + general.Infof("not in cgv2 environment, skip MemProtectionTaskFunc") + return + } + + // checking qos-level memory.low configuration. + if len(conf.MemProtectionQoSLevelConfigFile) > 0 { + applyMemProtectionQoSLevelConfig(conf, emitter, metaServer) + } +} diff --git a/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux_test.go b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux_test.go new file mode 100644 index 0000000000..53370c5437 --- /dev/null +++ b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_linux_test.go @@ -0,0 +1,365 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memprotection + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options" + "github.com/kubewharf/katalyst-core/pkg/config" + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/config/agent" + configagent "github.com/kubewharf/katalyst-core/pkg/config/agent" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + metaagent "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/machine" +) + +func generateTestConfiguration(t *testing.T, checkpointDir, stateFileDir string) *config.Configuration { + testConfiguration, err := options.NewOptions().Config() + require.NoError(t, err) + require.NotNil(t, testConfiguration) + + testConfiguration.GenericSysAdvisorConfiguration.StateFileDirectory = stateFileDir + testConfiguration.MetaServerConfiguration.CheckpointManagerDir = checkpointDir + + return testConfiguration +} + +func makeMetaServer() (*metaserver.MetaServer, error) { + server := &metaserver.MetaServer{ + MetaAgent: &metaagent.MetaAgent{}, + } + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 1, 2) + if err != nil { + return nil, err + } + + server.KatalystMachineInfo = &machine.KatalystMachineInfo{ + CPUTopology: cpuTopology, + } + server.MetricsFetcher = metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}) + return server, nil +} + +func TestMemProtection(t *testing.T) { + t.Parallel() + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: false, + }, + }, + }, + }, + }, + }, nil, &dynamicconfig.DynamicAgentConfiguration{}, nil, nil) + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + }, + }, + }, + }, + }, + }, nil, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, nil) + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, nil) + + metaServer, err := makeMetaServer() + assert.NoError(t, err) + metaServer.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{}} + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: false, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + normalPod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "normalPod", + Name: "normalPod", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c", + }, + }, + }, + } + + metaServer.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{normalPod}} + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: "", + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + MemProtectionTaskFunc(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: "fake", + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + applyMemProtectionQoSLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: "", + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, metaServer) + + jsonContent := `{ + "mem_protection": { + "control_knob_info": { + "cgroup_subsys_name": "memory", + "cgroup_version_to_iface_name": { + "v1": "", + "v2": "memory.low" + }, + "control_knob_value": "0", + "oci_property_name": "" + }, + "pod_explicitly_annotation_key": "MemcgProtectionValue", + "qos_level_to_default_value": { + "dedicated_cores": "15", + "shared_cores": "15" + } + } + }` + + // Create a temporary file + tempFile, err := ioutil.TempFile("", "test.json") + if err != nil { + fmt.Println("Error creating temporary file:", err) + return + } + defer os.Remove(tempFile.Name()) // Defer removing the temporary file + + // Write the JSON content to the temporary file + if _, err := tempFile.WriteString(jsonContent); err != nil { + fmt.Println("Error writing to temporary file:", err) + return + } + + absPath, err := filepath.Abs(tempFile.Name()) + if err != nil { + fmt.Println("Error obtaining absolute path:", err) + return + } + + applyMemProtectionQoSLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: absPath, + }, + }, + }, + }, + }, + GenericConfiguration: &generic.GenericConfiguration{ + QoSConfiguration: nil, + }, + }, metrics.DummyMetrics{}, metaServer) + + checkpointDir, err := ioutil.TempDir("", "checkpoint-FetchModelResult") + require.NoError(t, err) + defer func() { _ = os.RemoveAll(checkpointDir) }() + + stateFileDir, err := ioutil.TempDir("", "statefile-FetchModelResult") + require.NoError(t, err) + defer func() { _ = os.RemoveAll(stateFileDir) }() + + conf := generateTestConfiguration(t, checkpointDir, stateFileDir) + + applyMemProtectionQoSLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: absPath, + }, + }, + }, + }, + }, + GenericConfiguration: &generic.GenericConfiguration{ + QoSConfiguration: conf.QoSConfiguration, + }, + }, metrics.DummyMetrics{}, metaServer) + + metaServerNil, err := makeMetaServer() + assert.NoError(t, err) + metaServerNil.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{}} + + applyMemProtectionQoSLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrm.MemoryQRMPluginConfig{ + MemProtectionOptions: qrm.MemProtectionOptions{ + EnableSettingMemProtection: true, + MemProtectionQoSLevelConfigFile: absPath, + }, + }, + }, + }, + }, + GenericConfiguration: &generic.GenericConfiguration{ + QoSConfiguration: nil, + }, + }, metrics.DummyMetrics{}, metaServerNil) + + calculateMemProtection("fake", 10) +} + +func TestGetUserSpecifiedMemoryProtectionInBytes(t *testing.T) { + t.Parallel() + + result := getUserSpecifiedMemoryProtectionInBytes(1073741824, 107374182, 10) + var expected uint64 = 107376640 + assert.Equal(t, expected, result, "Test getUserSpecifiedMemProtectionInBytes failed") + + result = getUserSpecifiedMemoryProtectionInBytes(1073741824, 107374182, 123) + expected = 0 + assert.Equal(t, expected, result, "Test getUserSpecifiedMemProtectionInBytes failed") + + result = getUserSpecifiedMemoryProtectionInBytes(9223372036854771712, 1073741824, 10) + expected = 0x699a000 + assert.Equal(t, expected, result, "Test getUserSpecifiedMemProtectionInBytes failed") +} + +func TestCalculatedBestProtection(t *testing.T) { + t.Parallel() + + result := calculatedBestProtection(1073741824.0, 536870912.0, 536870912.0) + var expected uint64 = 0x22000000 + assert.Equal(t, expected, result, "Test alculatedBestProtection failed") + + result = calculatedBestProtection(1073741824.0, 5368709120.0, 536870912.0) + expected = 0 + assert.Equal(t, expected, result, "Test alculatedBestProtection failed") +} diff --git a/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_unsupported.go b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_unsupported.go new file mode 100644 index 0000000000..c76352add2 --- /dev/null +++ b/pkg/agent/qrm-plugins/memory/handlers/memprotection/memprotection_unsupported.go @@ -0,0 +1,31 @@ +//go:build !linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package memprotection + +import ( + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metrics" +) + +func MemProtectionTaskFunc(conf *coreconfig.Configuration, + _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, + emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) { +} diff --git a/pkg/config/agent/qrm/memory_plugin.go b/pkg/config/agent/qrm/memory_plugin.go index 02004d7730..37747b0381 100644 --- a/pkg/config/agent/qrm/memory_plugin.go +++ b/pkg/config/agent/qrm/memory_plugin.go @@ -36,6 +36,8 @@ type MemoryQRMPluginConfig struct { // SockMemQRMPluginConfig: the configuration for sockmem limitation in cgroup and host level SockMemQRMPluginConfig + // MemProtectionOptions: the configuration for cgroup memory protection in qos level + MemProtectionOptions } type SockMemQRMPluginConfig struct { @@ -47,6 +49,11 @@ type SockMemQRMPluginConfig struct { SetCgroupTCPMemRatio int } +type MemProtectionOptions struct { + EnableSettingMemProtection bool + MemProtectionQoSLevelConfigFile string +} + func NewMemoryQRMPluginConfig() *MemoryQRMPluginConfig { return &MemoryQRMPluginConfig{} } diff --git a/pkg/util/cgroup/manager/cgroup_test.go b/pkg/util/cgroup/manager/cgroup_test.go index aff6027a62..32ab17260e 100644 --- a/pkg/util/cgroup/manager/cgroup_test.go +++ b/pkg/util/cgroup/manager/cgroup_test.go @@ -58,6 +58,7 @@ func testV2Manager(t *testing.T) { testManager(t, "v2") testSwapMax(t) + testDetailedMem(t) } func testManager(t *testing.T, version string) { @@ -168,3 +169,43 @@ func testSwapMax(t *testing.T) { assert.NoError(t, err) assert.Equal(t, fmt.Sprintf("%v", 0), string(s)) } + +func testDetailedMem(t *testing.T) { + defer monkey.UnpatchAll() + monkey.Patch(common.CheckCgroup2UnifiedMode, func() bool { return true }) + monkey.Patch(GetManager, func() Manager { return v2.NewManager() }) + monkey.Patch(cgroups.ReadFile, func(dir, file string) (string, error) { + f := filepath.Join(dir, file) + tmp, err := ioutil.ReadFile(f) + if err != nil { + return "", err + } + return string(tmp), nil + }) + monkey.Patch(cgroups.WriteFile, func(dir, file, data string) error { + f := filepath.Join(dir, file) + return ioutil.WriteFile(f, []byte(data), 0o700) + }) + + rootDir := os.TempDir() + dir := filepath.Join(rootDir, "tmp") + err := os.Mkdir(dir, 0o700) + assert.NoError(t, err) + + tmpDir, err := ioutil.TempDir(dir, "fake-cgroup") + assert.NoError(t, err) + defer os.RemoveAll(dir) + + monkey.Patch(common.GetCgroupRootPath, func(s string) string { + t.Logf("rootDir=%v", rootDir) + return rootDir + }) + + content := "file 1234\ninactive_file 2222\n" + statFile := filepath.Join(tmpDir, "memory.stat") + err = ioutil.WriteFile(statFile, []byte(content), 0o700) + assert.NoError(t, err) + + _, err = GetManager().GetDetailedMemory(tmpDir) + assert.NoError(t, err) +}