From 59643ff7bf5d5704ca9fbddb782fa3d3be231c81 Mon Sep 17 00:00:00 2001 From: JJ Roberts-White Date: Wed, 26 Jan 2022 13:12:02 +1100 Subject: [PATCH] Kernel: Fix SMP magic, signal handler stack alignment, other minor fixes --- .gitignore | 1 + Kernel/include/Arch/x86_64/CPU.h | 17 +++++++++-------- Kernel/include/List.h | 2 +- Kernel/src/Arch/x86_64/ELF.cpp | 9 +++++---- Kernel/src/Arch/x86_64/Entry.asm | 7 +++++-- Kernel/src/Arch/x86_64/HAL.cpp | 22 ++++++++++------------ Kernel/src/Arch/x86_64/PCI.cpp | 24 ++++++++++++++++++++---- Kernel/src/Arch/x86_64/SMP.cpp | 20 ++++++++++---------- Kernel/src/Arch/x86_64/Syscalls.cpp | 1 + Kernel/src/Arch/x86_64/Thread.cpp | 9 +++++---- Kernel/src/Arch/x86_64/Timer.cpp | 2 ++ 11 files changed, 69 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index 5dcfdca4..0daffe67 100755 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ .cache Build serial.log +mlibc Kernel/subprojects/* !Kernel/subprojects/*.wrap Initrd/* diff --git a/Kernel/include/Arch/x86_64/CPU.h b/Kernel/include/Arch/x86_64/CPU.h index 31a6b402..df1017d0 100755 --- a/Kernel/include/Arch/x86_64/CPU.h +++ b/Kernel/include/Arch/x86_64/CPU.h @@ -138,12 +138,13 @@ typedef struct { uint8_t xmm[16][16]; // XMM Registers } __attribute__((packed)) fx_state_t; -static inline int CheckInterrupts() { - unsigned long flags; +ALWAYS_INLINE static bool CheckInterrupts() { + volatile unsigned long flags; asm volatile("pushf;" - "pop %%rax;" - : "=a"(flags)::"cc"); - return (flags & 0x200); + "pop %0;" + "andq $0x200, %0;" + : "=rm"(flags)::"memory", "cc"); + return flags != 0; } cpuid_info_t CPUID(); @@ -155,14 +156,14 @@ ALWAYS_INLINE uintptr_t GetRBP() { return val; } -ALWAYS_INLINE uintptr_t GetCR3() { +ALWAYS_INLINE uintptr_t GetCR3() { volatile uintptr_t val; asm volatile("mov %%cr3, %0" : "=r"(val)); return val; } -static ALWAYS_INLINE void SetCPULocal(CPU* val) { +static ALWAYS_INLINE void SetCPULocal(CPU* val) { val->self = val; asm volatile("wrmsr" ::"a"((uintptr_t)val & 0xFFFFFFFF) /*Value low*/, "d"(((uintptr_t)val >> 32) & 0xFFFFFFFF) /*Value high*/, "c"(0xC0000102) /*Set Kernel GS Base*/); @@ -170,7 +171,7 @@ static ALWAYS_INLINE void SetCPULocal(CPU* val) { "d"(((uintptr_t)val >> 32) & 0xFFFFFFFF) /*Value high*/, "c"(0xC0000101) /*Set Kernel GS Base*/); } -static ALWAYS_INLINE CPU* GetCPULocal() { +static ALWAYS_INLINE CPU* GetCPULocal() { CPU* ret; int intEnable = CheckInterrupts(); asm("cli"); diff --git a/Kernel/include/List.h b/Kernel/include/List.h index ed617bb5..b177f03c 100755 --- a/Kernel/include/List.h +++ b/Kernel/include/List.h @@ -104,7 +104,7 @@ template class FastList final { } // Copying a FastList is asking for trouble - FastList(FastList& other) = delete; + FastList(const FastList& other) = delete; ALWAYS_INLINE FastList& operator=(const FastList& other) = delete; ~FastList() {} diff --git a/Kernel/src/Arch/x86_64/ELF.cpp b/Kernel/src/Arch/x86_64/ELF.cpp index 3ec27080..67c16715 100755 --- a/Kernel/src/Arch/x86_64/ELF.cpp +++ b/Kernel/src/Arch/x86_64/ELF.cpp @@ -53,13 +53,14 @@ elf_info_t LoadELFSegments(Process* proc, void* _elf, uintptr_t base) { for (int i = 0; i < elfHdr.phNum; i++) { elf64_program_header_t elfPHdr = *((elf64_program_header_t*)(elf + elfHdr.phOff + i * elfHdr.phEntrySize)); + assert(elfPHdr.fileSize <= elfPHdr.memSize); + if (elfPHdr.type == PT_LOAD && elfPHdr.memSize > 0) { - asm("cli"); - asm volatile("mov %%rax, %%cr3" ::"a"(proc->GetPageMap()->pml4Phys)); + asm volatile("cli; mov %%rax, %%cr3" ::"a"(proc->GetPageMap()->pml4Phys) : "memory"); + Log::Info("dst: %x", base + elfPHdr.vaddr); memset((void*)(base + elfPHdr.vaddr + elfPHdr.fileSize), 0, (elfPHdr.memSize - elfPHdr.fileSize)); memcpy((void*)(base + elfPHdr.vaddr), (void*)(elf + elfPHdr.offset), elfPHdr.fileSize); - asm volatile("mov %%rax, %%cr3" ::"a"(pml4Phys)); - asm("sti"); + asm volatile("mov %%rax, %%cr3; sti" ::"a"(pml4Phys) : "memory"); } else if (elfPHdr.type == PT_PHDR) { elfInfo.pHdrSegment = base + elfPHdr.vaddr; } else if (elfPHdr.type == PT_INTERP) { diff --git a/Kernel/src/Arch/x86_64/Entry.asm b/Kernel/src/Arch/x86_64/Entry.asm index b9638b0e..c5ef1f51 100755 --- a/Kernel/src/Arch/x86_64/Entry.asm +++ b/Kernel/src/Arch/x86_64/Entry.asm @@ -214,7 +214,6 @@ BITS 64 cli hlt - fb_addr: dd 0 fb_pitch: @@ -293,6 +292,9 @@ entry64: hlt entryst2: + cli + cld + lgdt [GDT64Pointer64] mov rbx, rdi ; Save RDI as it contains bootloader information @@ -303,9 +305,10 @@ entryst2: xor rax, rax rep stosb + mov rsp, stack_top + mov rdi, rbx ; Restore RDI - mov rsp, stack_top mov rbp, rsp push 0x10 diff --git a/Kernel/src/Arch/x86_64/HAL.cpp b/Kernel/src/Arch/x86_64/HAL.cpp index 40c7ed2d..96850d7f 100755 --- a/Kernel/src/Arch/x86_64/HAL.cpp +++ b/Kernel/src/Arch/x86_64/HAL.cpp @@ -40,23 +40,18 @@ void InitMultiboot2(multiboot2_info_header_t* mbInfo); void InitStivale2(stivale2_info_header_t* st2Info); void InitCore() { // ALWAYS call this first + asm volatile("cli"); Serial::Initialize(); - Log::Info("Initializing Lemon...\r\n"); - - asm("cli"); + Serial::Write("Initializing Lemon...\r\n"); + + // Initialize Paging/Virtual Memory Manager + Memory::InitializeVirtualMemory(); // Initialize IDT IDT::Initialize(); - // Initialize Paging/Virtual Memory Manager - Memory::InitializeVirtualMemory(); - // Initialize Physical Memory Allocator Memory::InitializePhysicalAllocator(&mem_info); - - Log::Info("Initializing System Timer..."); - Timer::Initialize(1600); - Log::Write("OK"); } void InitVideo() { @@ -96,6 +91,10 @@ void InitExtra() { PCI::Init(); Log::Write("OK"); + Log::Info("Initializing System Timer..."); + Timer::Initialize(1600); + Log::Write("OK"); + Log::Info("Initializing Local and I/O APIC..."); APIC::Initialize(); Log::Write("OK"); @@ -258,10 +257,9 @@ void InitMultiboot2(multiboot2_info_header_t* mbInfo) { } void InitStivale2(stivale2_info_header_t* st2Info) { - uintptr_t tagPhys = st2Info->tags; - InitCore(); + uintptr_t tagPhys = st2Info->tags; char* cmdLine = nullptr; while (tagPhys) { diff --git a/Kernel/src/Arch/x86_64/PCI.cpp b/Kernel/src/Arch/x86_64/PCI.cpp index cf6d6d8c..a6b6e206 100755 --- a/Kernel/src/Arch/x86_64/PCI.cpp +++ b/Kernel/src/Arch/x86_64/PCI.cpp @@ -9,11 +9,12 @@ #include namespace PCI { -Vector* devices; -PCIInfo* unknownDevice; -PCIMCFG* mcfgTable; +lock_t devicesLock = 0; +Vector* devices = nullptr; +PCIInfo* unknownDevice = nullptr; +PCIMCFG* mcfgTable = nullptr; PCIConfigurationAccessMode configMode = PCIConfigurationAccessMode::Legacy; -Vector* enhancedBaseAddresses; // Base addresses for enhanced (PCI Express) configuration mechanism +Vector* enhancedBaseAddresses = nullptr; // Base addresses for enhanced (PCI Express) configuration mechanism uint32_t ConfigReadDword(uint8_t bus, uint8_t slot, uint8_t func, uint8_t offset) { uint32_t address = (uint32_t)((bus << 16) | (slot << 11) | (func << 8) | (offset & 0xfc) | 0x80000000); @@ -102,6 +103,8 @@ bool CheckDevice(uint8_t bus, uint8_t device, uint8_t func) { } bool FindDevice(uint16_t deviceID, uint16_t vendorID) { + ScopedSpinLock lockDevs(devicesLock); + for (unsigned i = 0; i < devices->get_length(); i++) { if (devices->get_at(i).deviceID == deviceID && devices->get_at(i).vendorID == vendorID) { return true; @@ -112,6 +115,8 @@ bool FindDevice(uint16_t deviceID, uint16_t vendorID) { } bool FindGenericDevice(uint16_t classCode, uint16_t subclass) { + ScopedSpinLock lockDevs(devicesLock); + for (unsigned i = 0; i < devices->get_length(); i++) { if (devices->get_at(i).classCode == classCode && devices->get_at(i).subclass == subclass) { return true; @@ -122,6 +127,8 @@ bool FindGenericDevice(uint16_t classCode, uint16_t subclass) { } const PCIInfo& GetPCIDevice(uint16_t deviceID, uint16_t vendorID) { + ScopedSpinLock lockDevs(devicesLock); + for (PCIInfo& dev : *devices) { if (dev.deviceID == deviceID && dev.vendorID == vendorID) { return dev; @@ -133,6 +140,9 @@ const PCIInfo& GetPCIDevice(uint16_t deviceID, uint16_t vendorID) { } const PCIInfo& GetGenericPCIDevice(uint8_t classCode, uint8_t subclass) { + assert(devices); + ScopedSpinLock lockDevs(devicesLock); + for (PCIInfo& dev : *devices) { if (dev.classCode == classCode && dev.subclass == subclass) { return dev; @@ -144,6 +154,8 @@ const PCIInfo& GetGenericPCIDevice(uint8_t classCode, uint8_t subclass) { } void EnumeratePCIDevices(uint16_t deviceID, uint16_t vendorID, void (*func)(const PCIInfo&)) { + ScopedSpinLock lockDevs(devicesLock); + for (PCIInfo& dev : *devices) { if (dev.deviceID == deviceID && dev.vendorID == vendorID) { func(dev); @@ -152,6 +164,8 @@ void EnumeratePCIDevices(uint16_t deviceID, uint16_t vendorID, void (*func)(cons } void EnumerateGenericPCIDevices(uint8_t classCode, uint8_t subclass, void (*func)(const PCIInfo&)) { + ScopedSpinLock lockDevs(devicesLock); + for (PCIInfo& dev : *devices) { if (dev.classCode == classCode && dev.subclass == subclass) { func(dev); @@ -160,6 +174,8 @@ void EnumerateGenericPCIDevices(uint8_t classCode, uint8_t subclass, void (*func } int AddDevice(int bus, int slot, int func) { + ScopedSpinLock lockDevs(devicesLock); + PCIInfo device; device.vendorID = GetVendor(bus, slot, func); diff --git a/Kernel/src/Arch/x86_64/SMP.cpp b/Kernel/src/Arch/x86_64/SMP.cpp index bdd8a691..c05db3ed 100755 --- a/Kernel/src/Arch/x86_64/SMP.cpp +++ b/Kernel/src/Arch/x86_64/SMP.cpp @@ -24,7 +24,7 @@ static inline void wait(uint64_t ms) { extern void* _binary_SMPTrampoline_bin_start; extern void* _binary_SMPTrampoline_bin_size; -volatile uint16_t* smpMagic = (uint16_t*)SMP_MAGIC; +volatile uint16_t* smpMagic = (uint16_t*)SMP_TRAMPOLINE_DATA_START_FLAG; volatile uint16_t* smpID = (uint16_t*)SMP_TRAMPOLINE_CPU_ID; gdt_ptr_t* smpGDT = (gdt_ptr_t*)SMP_TRAMPOLINE_GDT_PTR; volatile uint64_t* smpCR3 = (uint64_t*)SMP_TRAMPOLINE_CR3; @@ -58,7 +58,6 @@ void SMPEntry(uint16_t id) { asm volatile("lidt %0" ::"m"(cpu->idtPtr)); TSS::InitializeTSS(&cpu->tss, cpu->gdt); - APIC::Local::Enable(); cpu->runQueue = new FastList(); @@ -66,7 +65,7 @@ void SMPEntry(uint16_t id) { doneInit = true; asm("sti"); - + for (;;) ; } @@ -84,29 +83,30 @@ void InitializeCPU(uint16_t id) { *smpMagic = 0; // Set magic to 0 *smpID = id; // Set ID to our CPU's ID *smpEntry2 = (uint64_t)SMPEntry; // Our second entry point - *smpStack = (uint64_t)Memory::KernelAllocate4KPages(1); // 4K stack - Memory::KernelMapVirtualMemory4K(Memory::AllocatePhysicalMemoryBlock(), *smpStack, 1); - *smpStack += PAGE_SIZE_4K; + *smpStack = (uint64_t)kmalloc(8192); + *smpStack += 8192; *smpGDT = GDT64Pointer64; asm volatile("mov %%cr3, %%rax" : "=a"(*smpCR3)); APIC::Local::SendIPI(id, ICR_DSH_DEST, ICR_MESSAGE_TYPE_INIT, 0); + wait(50); - wait(20); + APIC::Local::SendIPI(id, ICR_DSH_DEST, ICR_MESSAGE_TYPE_STARTUP, (SMP_TRAMPOLINE_ENTRY >> 12)); + wait(50); if ((*smpMagic) != 0xB33F) { // Check if the trampoline code set the flag to let us know it has started APIC::Local::SendIPI(id, ICR_DSH_DEST, ICR_MESSAGE_TYPE_STARTUP, (SMP_TRAMPOLINE_ENTRY >> 12)); - wait(80); + wait(100); } if ((*smpMagic) != 0xB33F) { - wait(100); + wait(200); } if ((*smpMagic) != 0xB33F) { - Log::Error("[SMP] Failed to start CPU #%d", id); + Log::Error("[SMP] Failed to start CPU #%d, magic: %x", id, *smpMagic); return; } diff --git a/Kernel/src/Arch/x86_64/Syscalls.cpp b/Kernel/src/Arch/x86_64/Syscalls.cpp index 49794ae0..b1127816 100755 --- a/Kernel/src/Arch/x86_64/Syscalls.cpp +++ b/Kernel/src/Arch/x86_64/Syscalls.cpp @@ -3535,6 +3535,7 @@ long SysSignalReturn(RegisterContext* r) { uint64_t* threadStack = reinterpret_cast(r->rsp); threadStack++; // Discard signal handler address + threadStack++; // Discard padding th->signalMask = *(threadStack++); // Get the old signal mask // Do not allow the thread to modify CS or SS memcpy(r, threadStack, offsetof(RegisterContext, cs)); diff --git a/Kernel/src/Arch/x86_64/Thread.cpp b/Kernel/src/Arch/x86_64/Thread.cpp index 291f22cf..cff5b90e 100644 --- a/Kernel/src/Arch/x86_64/Thread.cpp +++ b/Kernel/src/Arch/x86_64/Thread.cpp @@ -48,13 +48,13 @@ Thread::Thread(Process* _parent, pid_t _tid) ((fx_state_t*)fxState)->mxcsrMask = 0xffbf; ((fx_state_t*)fxState)->fcw = 0x33f; // Default FPU Control Word State - kernelStack = Memory::KernelAllocate4KPages(32); // Allocate Memory For Kernel Stack (128KB) - for (int i = 0; i < 32; i++) { + kernelStack = Memory::KernelAllocate4KPages(64); // Allocate Memory For Kernel Stack (256KB) + for (int i = 0; i < 64; i++) { Memory::KernelMapVirtualMemory4K(Memory::AllocatePhysicalMemoryBlock(), reinterpret_cast(kernelStack) + PAGE_SIZE_4K * i, 1); } - memset(kernelStack, 0, PAGE_SIZE_4K * 32); - kernelStack = reinterpret_cast(reinterpret_cast(kernelStack) + PAGE_SIZE_4K * 32); + memset(kernelStack, 0, PAGE_SIZE_4K * 64); + kernelStack = reinterpret_cast(reinterpret_cast(kernelStack) + PAGE_SIZE_4K * 64); } void Thread::Signal(int signal) { @@ -166,6 +166,7 @@ void Thread::HandlePendingSignal(RegisterContext* regs) { uint64_t* stack = reinterpret_cast((regs->rsp & (~0xfULL)) - sizeof(RegisterContext)); *reinterpret_cast(stack) = *regs; + *(--stack) = 0; // Pad out the stack *(--stack) = oldSignalMask; // This could probably be placed in a register but it makes our stack nice and aligned *(--stack) = reinterpret_cast(handler.userHandler); diff --git a/Kernel/src/Arch/x86_64/Timer.cpp b/Kernel/src/Arch/x86_64/Timer.cpp index 7e82019f..a40ae0d8 100755 --- a/Kernel/src/Arch/x86_64/Timer.cpp +++ b/Kernel/src/Arch/x86_64/Timer.cpp @@ -156,6 +156,8 @@ void Handler(void*, RegisterContext* r) { void Initialize(uint32_t freq) { IDT::RegisterInterruptHandler(IRQ0, Handler); + new (&sleeping) FastList(); + frequency = freq; uint32_t divisor = 1193182 / freq;