Add a proto for cgroup settings

buildbuddy-io · Nov 14, 2024 · cfad81b · cfad81b
1 parent de4208a
commit cfad81b
Showing 1 changed file with 116 additions and 0 deletions.
diff --git a/proto/scheduler.proto b/proto/scheduler.proto
@@ -116,6 +116,119 @@ message TaskSize {
   repeated CustomResource custom_resources = 4;
 }
 
+// CgroupSettings defines Linux cgroup2 options for an execution.
+//
+// Where applicable, the value -1 means "unlimited", which translates to the
+// "max" string in cgroup2 terms.
+//
+// Note: for numeric fields, a corresponding field prefixed with "use_" is
+// defined, which indicates whether the executor should apply the value. If it's
+// false, the setting should not be applied, meaning the default value is used
+// instead. This convention is used instead of defining 0 to mean "do not set,"
+// because in some cases it's valid to set a value to 0. proto3 optional is not
+// used because of the awkward Go API.
+message CgroupSettings {
+  // Proportion of CPU given to this task relative to other tasks in the parent
+  // cgroup. This provides for a best-effort CPU guarantee.
+  //
+  // Values 1 to 10000 are supported.
+  //
+  // Maps to "cpu.weight" in cgroup2.
+  int64 cpu_weight = 1;
+  bool use_cpu_weight = 2;
+
+  // Maximum CPU usage allowed per quota period.
+  //
+  // Maps to the "cpu.max" quota field in cgroup2.
+  int64 cpu_quota_limit_usec = 3;
+  // How often the CPU quota is refreshed. Longer periods may allow for higher
+  // burst CPU usage but may result in more stalling if the quota is exhausted
+  // very early in the period.
+  //
+  // Maps to the "cpu.max" period field in cgroup2.
+  int64 cpu_quota_period_usec = 4;
+  bool use_cpu_quota = 5;
+
+  // Limit after which memory usage is throttled and processes are put under
+  // heavy reclaim pressure.
+  //
+  // Maps to the "memory.high" field in cgroup2.
+  int64 memory_throttle_limit_bytes = 6;
+  bool use_memory_throttle_limit = 7;
+
+  // Limit after which processes in the cgroup are killed by the OOM killer.
+  //
+  // Maps to the "memory.max" field in cgroup2.
+  int64 memory_limit_bytes = 8;
+  bool use_memory_limit = 9;
+
+  // Best-effort memory protection - if the cgroup and its descendants are below
+  // this threshold then memory won't be reclaimed unless memory can't be
+  // reclaimed from other unprotected cgroups.
+  //
+  // Maps to "memory.low" in cgroup2.
+  int64 memory_soft_guarantee_bytes = 10;
+  bool use_memory_soft_guarantee = 11;
+
+  // Guaranteed minimum memory that can never be reclaimed by the system. If
+  // there is not enough memory to provide this guarantee then the OOM killer
+  // will be invoked.
+  //
+  // Maps to "memory.min" in cgroup2.
+  int64 memory_minimum_bytes = 12;
+  bool use_memory_minimum = 13;
+
+  // Hard limit for anonymous swap memory.
+  //
+  // Maps to "memory.swap.max" in cgroup2.
+  int64 swap_limit_bytes = 14;
+  bool use_swap_limit = 15;
+
+  // IO quality of service mechanism. Specifies the number of milliseconds a
+  // process can wait before IO from other processes is given to it.
+  //
+  // Maps to "io.latency" in cgroup2.
+  int64 io_latency_target_ms = 16;
+  bool use_io_latency_target = 17;
+
+  // IO limit for the disk where all action IO is performed.
+  //
+  // Maps to "io.max" in cgroup2. The major/minor device numbers are not defined
+  // here because these may differ from one executor to another.
+  IOLimits disk_io_limit = 18;
+
+  message IOLimits {
+    // Max read operations per second
+    int64 riops = 1;
+    // Max write operations per second
+    int64 wiops = 2;
+    // Max read bytes per second
+    int64 rbps = 3;
+    // Max write bytes per second
+    int64 wbps = 4;
+  }
+
+  // IO quality-of-service parameters using the advanced IO cost model. This
+  // should be ignored if the executor does not have a cost model configured for
+  // the disk.
+  IOCostQOS io_cost_qos = 19;
+
+  message IOCostQOS {
+    // Read latency percentile
+    int64 rpct = 1;
+    // Read target latency
+    int64 rlat = 2;
+    // Write latency percentile
+    int64 wpct = 3;
+    // Write target latency
+    int64 wlat = 4;
+    // vrate bound minimum
+    int64 min = 5;
+    // vrate bound maximum
+    int64 max = 6;
+  }
+}
+
 // Next ID: 9
 message SchedulingMetadata {
   // Task size used for scheduling purposes, when the scheduler is deciding
@@ -179,6 +292,9 @@ message SchedulingMetadata {
   // priority of tasks belonging to different groups; it only affects the
   // relative priority of tasks within a group.
   int32 priority = 11;
+
+  // cgroup2 settings. Will be set only for Linux executions.
+  CgroupSettings cgroup_settings = 12;
 }
 
 message ScheduleTaskRequest {