From a8a8a26a2e6948535b9b4134f8821f5bae897419 Mon Sep 17 00:00:00 2001 From: Jon Lange Date: Tue, 24 Dec 2024 20:19:06 -0800 Subject: [PATCH] task: make task address ranges non-overlapping Each task allocates task-specific memory within a per-task virtual address range, which is specific to that task. This means that each task-specific allocation is only accessible within that task. But for the sake of memory safety, where pointers may be passed across tasks, it is important to make sure that no single address is ever valid in more than one task at a time, because that could cause a reference from one task to point to different data when interpreted in a different task. Ensuring unique addresses guarantees that any per-task address that crosses a task boundary will cause a fault instead of referring to the wrong data. Signed-off-by: Jon Lange --- kernel/src/mm/address_space.rs | 14 +++--- kernel/src/task/tasks.rs | 84 ++++++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 25 deletions(-) diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 506e5f580..499b46dbb 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -219,19 +219,19 @@ pub const SVSM_PERTASK_BASE: VirtAddr = virt_from_idx(PGTABLE_LVL3_IDX_PERTASK); pub const SVSM_PERTASK_END: VirtAddr = SVSM_PERTASK_BASE.const_add(SIZE_LEVEL3); /// Kernel stack for a task -pub const SVSM_PERTASK_STACK_BASE: VirtAddr = SVSM_PERTASK_BASE; +pub const SVSM_PERTASK_STACK_BASE_OFFSET: usize = 0; /// Kernel shadow stack for normal execution of a task -pub const SVSM_PERTASK_SHADOW_STACK_BASE: VirtAddr = - SVSM_PERTASK_STACK_BASE.const_add(STACK_TOTAL_SIZE); +pub const SVSM_PERTASK_SHADOW_STACK_BASE_OFFSET: usize = + SVSM_PERTASK_STACK_BASE_OFFSET + STACK_TOTAL_SIZE; /// Kernel shadow stack for exception handling -pub const SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE: VirtAddr = - SVSM_PERTASK_SHADOW_STACK_BASE.const_add(PAGE_SIZE); +pub const SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE_OFFSET: usize = + SVSM_PERTASK_SHADOW_STACK_BASE_OFFSET + PAGE_SIZE; /// SSE context save area for a task -pub const SVSM_PERTASK_XSAVE_AREA_BASE: VirtAddr = - SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE.const_add(PAGE_SIZE); +pub const SVSM_PERTASK_XSAVE_AREA_BASE: usize = + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE_OFFSET + PAGE_SIZE; /// Page table self-map level 3 index pub const PGTABLE_LVL3_IDX_PTE_SELFMAP: usize = 493; diff --git a/kernel/src/task/tasks.rs b/kernel/src/task/tasks.rs index 8c019f7c1..5c16f71db 100644 --- a/kernel/src/task/tasks.rs +++ b/kernel/src/task/tasks.rs @@ -20,8 +20,7 @@ use crate::cpu::irq_state::EFLAGS_IF; use crate::cpu::percpu::{current_task, PerCpu}; use crate::cpu::shadow_stack::is_cet_ss_supported; use crate::cpu::sse::{get_xsave_area_size, sse_restore_context}; -use crate::cpu::X86ExceptionContext; -use crate::cpu::{irqs_enable, X86GeneralRegs}; +use crate::cpu::{irqs_enable, X86ExceptionContext, X86GeneralRegs}; use crate::error::SvsmError; use crate::fs::{opendir, stdout_open, Directory, FileHandle}; use crate::locking::{RWLock, SpinLock}; @@ -30,20 +29,55 @@ use crate::mm::vm::{ Mapping, ShadowStackInit, VMFileMappingFlags, VMKernelShadowStack, VMKernelStack, VMR, }; use crate::mm::{ - mappings::create_anon_mapping, mappings::create_file_mapping, PageBox, VMMappingGuard, - SVSM_PERTASK_BASE, SVSM_PERTASK_END, SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, - SVSM_PERTASK_SHADOW_STACK_BASE, SVSM_PERTASK_STACK_BASE, USER_MEM_END, USER_MEM_START, + alloc::AllocError, mappings::create_anon_mapping, mappings::create_file_mapping, PageBox, + VMMappingGuard, SIZE_LEVEL3, SVSM_PERTASK_BASE, SVSM_PERTASK_END, + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE_OFFSET, SVSM_PERTASK_SHADOW_STACK_BASE_OFFSET, + SVSM_PERTASK_STACK_BASE_OFFSET, USER_MEM_END, USER_MEM_START, }; use crate::platform::SVSM_PLATFORM; use crate::syscall::{Obj, ObjError, ObjHandle}; use crate::types::{SVSM_USER_CS, SVSM_USER_DS}; +use crate::utils::bitmap_allocator::{BitmapAllocator, BitmapAllocator1024}; use crate::utils::MemoryRegion; use intrusive_collections::{intrusive_adapter, LinkedListAtomicLink}; use super::schedule::{current_task_terminated, schedule}; +pub static KTASK_VADDR_BITMAP: SpinLock = + SpinLock::new(BitmapAllocator1024::new_empty()); + pub const INITIAL_TASK_ID: u32 = 1; +// The task virtual range guard manages the allocation of a task virtual +// address range within the task address space. The address range is reserved +// as long as the guard continues to exist. +#[derive(Debug)] +struct TaskVirtualRegionGuard { + index: usize, +} + +impl TaskVirtualRegionGuard { + fn alloc() -> Result { + let index = KTASK_VADDR_BITMAP + .lock() + .alloc(1, 1) + .ok_or(SvsmError::Alloc(AllocError::OutOfMemory))?; + Ok(Self { index }) + } + + fn vaddr_region(&self) -> MemoryRegion { + const SPAN: usize = SIZE_LEVEL3 / BitmapAllocator1024::CAPACITY; + let base = SVSM_PERTASK_BASE + (self.index * SPAN); + MemoryRegion::::new(base, SPAN) + } +} + +impl Drop for TaskVirtualRegionGuard { + fn drop(&mut self) { + KTASK_VADDR_BITMAP.lock().free(self.index, 1); + } +} + #[derive(PartialEq, Debug, Copy, Clone, Default)] pub enum TaskState { RUNNING, @@ -137,6 +171,11 @@ pub struct Task { /// Page table that is loaded when the task is scheduled pub page_table: SpinLock>, + /// Virtual address region that has been allocated for this task. + /// This is not referenced but must be stored so that it is dropped when + /// the Task is dropped. + _ktask_region: TaskVirtualRegionGuard, + /// Task virtual memory range for use at CPL 0 vm_kernel_range: VMR, @@ -168,7 +207,10 @@ pub struct Task { // SAFETY: Send + Sync is required for Arc to implement Send. All members // of `Task` are Send + Sync except for the intrusive_collection links, which // are only Send. The only access to these is via the intrusive_adapter! -// generated code which does not use them concurrently across threads. +// generated code which does not use them concurrently across threads. The +// kernal address cell is also not Sync, but this is only populated during +// task creation, and can safely be accessed by multiple threads once it has +// been populated. unsafe impl Sync for Task {} pub type TaskPointer = Arc; @@ -214,9 +256,16 @@ impl Task { cpu.populate_page_table(&mut pgtable); - let vm_kernel_range = VMR::new(SVSM_PERTASK_BASE, SVSM_PERTASK_END, PTEntryFlags::empty()); - // SAFETY: The kernel mode task address range is fully aligned to - // top-level paging boundaries. + let ktask_region = TaskVirtualRegionGuard::alloc()?; + let vaddr_region = ktask_region.vaddr_region(); + let vm_kernel_range = VMR::new( + vaddr_region.start(), + vaddr_region.end(), + PTEntryFlags::empty(), + ); + // SAFETY: The selected kernel mode task address range is the only + // range that will live within the top-level entry associated with the + // task address space. unsafe { vm_kernel_range.initialize()?; } @@ -237,24 +286,24 @@ impl Task { if is_cet_ss_supported() { let shadow_stack; (shadow_stack, shadow_stack_offset) = VMKernelShadowStack::new( - SVSM_PERTASK_SHADOW_STACK_BASE, + vaddr_region.start() + SVSM_PERTASK_SHADOW_STACK_BASE_OFFSET, ShadowStackInit::Normal { entry_return, exit_return, }, )?; vm_kernel_range.insert_at( - SVSM_PERTASK_SHADOW_STACK_BASE, + vaddr_region.start() + SVSM_PERTASK_SHADOW_STACK_BASE_OFFSET, Arc::new(Mapping::new(shadow_stack)), )?; let shadow_stack; (shadow_stack, exception_shadow_stack) = VMKernelShadowStack::new( - SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + vaddr_region.start() + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE_OFFSET, ShadowStackInit::Exception, )?; vm_kernel_range.insert_at( - SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + vaddr_region.start() + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE_OFFSET, Arc::new(Mapping::new(shadow_stack)), )?; } @@ -265,15 +314,13 @@ impl Task { } else { Self::allocate_ktask_stack(cpu, args.entry, xsa_addr)? }; - vm_kernel_range.insert_at(SVSM_PERTASK_STACK_BASE, stack)?; + let stack_start = vaddr_region.start() + SVSM_PERTASK_STACK_BASE_OFFSET; + vm_kernel_range.insert_at(stack_start, stack)?; vm_kernel_range.populate(&mut pgtable); // Remap at the per-task offset - let bounds = MemoryRegion::new( - SVSM_PERTASK_STACK_BASE + raw_bounds.start().into(), - raw_bounds.len(), - ); + let bounds = MemoryRegion::new(stack_start + raw_bounds.start().into(), raw_bounds.len()); Ok(Arc::new(Task { rsp: bounds @@ -286,6 +333,7 @@ impl Task { stack_bounds: bounds, exception_shadow_stack, page_table: SpinLock::new(pgtable), + _ktask_region: ktask_region, vm_kernel_range, vm_user_range: args.vm_user_range, sched_state: RWLock::new(TaskSchedState {