Add kipc::find_faulted_task.

This resolves a four-year-old TODO in El Jefe asking for a way to process faulted tasks without making so many kipcs. The original supervisor kipc interface was, by definition, designed before we knew what we were doing. Now that we have some miles on the system, some things are more clear: 1. The supervisor doesn't use the TaskState data to make its decisions. 2. The TaskState data is pretty expensive to serialize/deserialize, and produces code containing panic sites. 3. Panic sites in the supervisor are bad, since it is not allowed to panic. The new find_faulted_task operation can detect all N faulted tasks using N+1 kipcs, instead of one per potentially faulted task, and the request and response messages are trivial to serialize (one four-byte integer each way). This has allowed me to write (out-of-tree) "minisuper," a supervisor in 256 bytes that cannot panic. In-tree, this has the advantage of knocking 33% off Jefe's flash size and reducing statically-analyzable max stack depth by 20%.
oxidecomputer · Nov 22, 2024 · c25904c · c25904c
1 parent 134b7d7
commit c25904c
Show file tree

Hide file tree

Showing 5 changed files with 133 additions and 24 deletions.
diff --git a/doc/kipc.adoc b/doc/kipc.adoc
@@ -290,6 +290,62 @@ returned by a call to `get_task_dump_region` for the specified task.
 A copy of the memory referred to by the specified region, starting
 at `base` and running for `size` bytes.
 
+=== `find_faulted_task` (8)
+
+Scans forward from a given task index searching for a faulted task. If a faulted
+task is found, returns its index. Otherwise, returns zero, the index of the
+supervisor task, which is by definition not faulted.
+
+==== Request
+
+[source,rust]
+----
+struct FindFaultedTaskRequest {
+    starting_index: u32,
+}
+----
+
+==== Preconditions
+
+The `starting_index` must be a valid index for this system.
+
+==== Response
+
+[source,rust]
+----
+struct FindFaultedTaskResponse {
+    fault_index: u32,
+}
+----
+
+==== Notes
+
+This is intended for the common case of a supervisor scanning the task table in
+search of a fault. Compared to doing this manually with `read_task_status`,
+`find_faulted_task` has several advantages:
+
+1. Generates one kipc per failed task, plus one, rather than one kipc per
+   potentially failed task.
+
+2. Avoids serializing/transferring/deserializing the relatively complex
+   `TaskState` type, which supervisors rarely make use of in practice.
+
+3. Can be implemented in a way that doesn't potentially panic the supervisor.
+
+The "task ID or zero" return value is represented as an `Option<NonZeroUsize>`
+in the Rust API, so a typical use of this kipc looks like:
+
+[source,rust]
+----
+let mut next_task = 1; // skip supervisor
+while let Some(fault) = kipc::find_faulted_task(next_task) {
+    let fault = usize::from(fault);
+    // do things with the faulted task
+
+    next_task = fault + 1;
+}
+----
+
 == Receiving from the kernel
 
 The kernel never sends messages to tasks. It's simply not equipped to do so.

diff --git a/sys/abi/src/lib.rs b/sys/abi/src/lib.rs
@@ -490,6 +490,7 @@ pub enum Kipcnum {
     GetTaskDumpRegion = 6,
     ReadTaskDumpRegion = 7,
     SoftwareIrq = 8,
+    FindFaultedTask = 9,
 }
 
 impl core::convert::TryFrom<u16> for Kipcnum {
@@ -505,6 +506,7 @@ impl core::convert::TryFrom<u16> for Kipcnum {
             6 => Ok(Self::GetTaskDumpRegion),
             7 => Ok(Self::ReadTaskDumpRegion),
             8 => Ok(Self::SoftwareIrq),
+            9 => Ok(Self::FindFaultedTask),
             _ => Err(()),
         }
     }

diff --git a/sys/kern/src/kipc.rs b/sys/kern/src/kipc.rs
@@ -39,6 +39,7 @@ pub fn handle_kernel_message(
             read_task_dump_region(tasks, caller, args.message?, args.response?)
         }
         Ok(Kipcnum::SoftwareIrq) => software_irq(tasks, caller, args.message?),
+        Ok(Kipcnum::FindFaultedTask) => find_faulted_task(tasks, caller, args.message?, args.response?),
 
         _ => {
             // Task has sent an unknown message to the kernel. That's bad.
@@ -465,3 +466,38 @@ fn software_irq(
     tasks[caller].save_mut().set_send_response_and_length(0, 0);
     Ok(NextTask::Same)
 }
+
+fn find_faulted_task(
+    tasks: &mut [Task],
+    caller: usize,
+    message: USlice<u8>,
+    response: USlice<u8>,
+) -> Result<NextTask, UserError> {
+    if caller != 0 {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::NotSupervisor,
+        )));
+    }
+
+    let index = deserialize_message::<u32>(&tasks[caller], message)? as usize;
+
+    // Note: we explicitly permit index == tasks.len(), which causes us to wrap
+    // and end the search.
+    if index > tasks.len() {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::TaskOutOfRange,
+        )));
+    }
+
+    for i in index..tasks.len() {
+        if let TaskState::Faulted { .. } = tasks[i].state() {
+            let response_len = serialize_response(&mut tasks[caller], response, &(i as u32))?;
+            tasks[caller].save_mut().set_send_response_and_length(0, response_len);
+            return Ok(NextTask::Same);
+        }
+    }
+
+    let response_len = serialize_response(&mut tasks[caller], response, &0_u32)?;
+    tasks[caller].save_mut().set_send_response_and_length(0, response_len);
+    Ok(NextTask::Same)
+}
diff --git a/sys/userlib/src/kipc.rs b/sys/userlib/src/kipc.rs
@@ -4,6 +4,8 @@
 
 //! Operations implemented by IPC with the kernel task.
 
+use core::num::NonZeroUsize;
+
 use abi::{Kipcnum, TaskId};
 use zerocopy::AsBytes;
 
@@ -24,6 +26,20 @@ pub fn read_task_status(task: usize) -> abi::TaskState {
     ssmarshal::deserialize(&response[..len]).unwrap_lite().0
 }
 
+pub fn find_faulted_task(task: usize) -> Option<NonZeroUsize> {
+    // Coerce `task` to a known size (Rust doesn't assume that usize == u32)
+    let task = task as u32;
+    let mut response = 0_u32;
+    let (_, _) = sys_send(
+        TaskId::KERNEL,
+        Kipcnum::ReadTaskStatus as u16,
+        task.as_bytes(),
+        response.as_bytes_mut(),
+        &[],
+    );
+    NonZeroUsize::new(response as usize)
+}
+
 pub fn get_task_dump_region(
     task: usize,
     region: usize,

diff --git a/task/jefe/src/main.rs b/task/jefe/src/main.rs
@@ -307,37 +307,36 @@ impl idol_runtime::NotificationHandler for ServerImpl<'_> {
             // one task to have faulted since we last looked, but it's somewhat
             // unlikely since a fault causes us to immediately preempt. In any
             // case, let's assume we might have to handle multiple tasks.
-            //
-            // TODO: it would be fantastic to have a way of finding this out in
-            // one syscall.
-            for (i, status) in self.task_states.iter_mut().enumerate() {
+            let mut next_task = 1;
+            while let Some(fault_index) = kipc::find_faulted_task(next_task) {
+                let fault_index = usize::from(fault_index);
+                next_task = fault_index + 1;
+
+                let status = &mut self.task_states[fault_index];
+
                 // If we're aware that this task is in a fault state, don't
                 // bother making a syscall to enquire.
                 if status.holding_fault {
                     continue;
                 }
 
-                if let abi::TaskState::Faulted { .. } =
-                    kipc::read_task_status(i)
+                #[cfg(feature = "dump")]
                 {
-                    #[cfg(feature = "dump")]
-                    {
-                        // We'll ignore the result of dumping; it could fail
-                        // if we're out of space, but we don't have a way of
-                        // dealing with that right now.
-                        //
-                        // TODO: some kind of circular buffer?
-                        _ = dump::dump_task(self.dump_areas, i);
-                    }
-
-                    if status.disposition == Disposition::Restart {
-                        // Stand it back up
-                        kipc::restart_task(i, true);
-                    } else {
-                        // Mark this one off so we don't revisit it until
-                        // requested.
-                        status.holding_fault = true;
-                    }
+                    // We'll ignore the result of dumping; it could fail
+                    // if we're out of space, but we don't have a way of
+                    // dealing with that right now.
+                    //
+                    // TODO: some kind of circular buffer?
+                    _ = dump::dump_task(self.dump_areas, fault_index);
+                }
+
+                if status.disposition == Disposition::Restart {
+                    // Stand it back up
+                    kipc::restart_task(fault_index, true);
+                } else {
+                    // Mark this one off so we don't revisit it until
+                    // requested.
+                    status.holding_fault = true;
                 }
             }
         }