Skip to content

Commit

Permalink
Add a proto for cgroup settings
Browse files Browse the repository at this point in the history
  • Loading branch information
bduffany committed Nov 14, 2024
1 parent de4208a commit cfad81b
Showing 1 changed file with 116 additions and 0 deletions.
116 changes: 116 additions & 0 deletions proto/scheduler.proto
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,119 @@ message TaskSize {
repeated CustomResource custom_resources = 4;
}

// CgroupSettings defines Linux cgroup2 options for an execution.
//
// Where applicable, the value -1 means "unlimited", which translates to the
// "max" string in cgroup2 terms.
//
// Note: for numeric fields, a corresponding field prefixed with "use_" is
// defined, which indicates whether the executor should apply the value. If it's
// false, the setting should not be applied, meaning the default value is used
// instead. This convention is used instead of defining 0 to mean "do not set,"
// because in some cases it's valid to set a value to 0. proto3 optional is not
// used because of the awkward Go API.
message CgroupSettings {
// Proportion of CPU given to this task relative to other tasks in the parent
// cgroup. This provides for a best-effort CPU guarantee.
//
// Values 1 to 10000 are supported.
//
// Maps to "cpu.weight" in cgroup2.
int64 cpu_weight = 1;
bool use_cpu_weight = 2;

// Maximum CPU usage allowed per quota period.
//
// Maps to the "cpu.max" quota field in cgroup2.
int64 cpu_quota_limit_usec = 3;
// How often the CPU quota is refreshed. Longer periods may allow for higher
// burst CPU usage but may result in more stalling if the quota is exhausted
// very early in the period.
//
// Maps to the "cpu.max" period field in cgroup2.
int64 cpu_quota_period_usec = 4;
bool use_cpu_quota = 5;

// Limit after which memory usage is throttled and processes are put under
// heavy reclaim pressure.
//
// Maps to the "memory.high" field in cgroup2.
int64 memory_throttle_limit_bytes = 6;
bool use_memory_throttle_limit = 7;

// Limit after which processes in the cgroup are killed by the OOM killer.
//
// Maps to the "memory.max" field in cgroup2.
int64 memory_limit_bytes = 8;
bool use_memory_limit = 9;

// Best-effort memory protection - if the cgroup and its descendants are below
// this threshold then memory won't be reclaimed unless memory can't be
// reclaimed from other unprotected cgroups.
//
// Maps to "memory.low" in cgroup2.
int64 memory_soft_guarantee_bytes = 10;
bool use_memory_soft_guarantee = 11;

// Guaranteed minimum memory that can never be reclaimed by the system. If
// there is not enough memory to provide this guarantee then the OOM killer
// will be invoked.
//
// Maps to "memory.min" in cgroup2.
int64 memory_minimum_bytes = 12;
bool use_memory_minimum = 13;

// Hard limit for anonymous swap memory.
//
// Maps to "memory.swap.max" in cgroup2.
int64 swap_limit_bytes = 14;
bool use_swap_limit = 15;

// IO quality of service mechanism. Specifies the number of milliseconds a
// process can wait before IO from other processes is given to it.
//
// Maps to "io.latency" in cgroup2.
int64 io_latency_target_ms = 16;
bool use_io_latency_target = 17;

// IO limit for the disk where all action IO is performed.
//
// Maps to "io.max" in cgroup2. The major/minor device numbers are not defined
// here because these may differ from one executor to another.
IOLimits disk_io_limit = 18;

message IOLimits {
// Max read operations per second
int64 riops = 1;
// Max write operations per second
int64 wiops = 2;
// Max read bytes per second
int64 rbps = 3;
// Max write bytes per second
int64 wbps = 4;
}

// IO quality-of-service parameters using the advanced IO cost model. This
// should be ignored if the executor does not have a cost model configured for
// the disk.
IOCostQOS io_cost_qos = 19;

message IOCostQOS {
// Read latency percentile
int64 rpct = 1;
// Read target latency
int64 rlat = 2;
// Write latency percentile
int64 wpct = 3;
// Write target latency
int64 wlat = 4;
// vrate bound minimum
int64 min = 5;
// vrate bound maximum
int64 max = 6;
}
}

// Next ID: 9
message SchedulingMetadata {
// Task size used for scheduling purposes, when the scheduler is deciding
Expand Down Expand Up @@ -179,6 +292,9 @@ message SchedulingMetadata {
// priority of tasks belonging to different groups; it only affects the
// relative priority of tasks within a group.
int32 priority = 11;

// cgroup2 settings. Will be set only for Linux executions.
CgroupSettings cgroup_settings = 12;
}

message ScheduleTaskRequest {
Expand Down

0 comments on commit cfad81b

Please sign in to comment.