diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index c62662e991..158804300e 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -65,7 +65,7 @@ DEFINE_string(apu, "any", "Audio system. Use: [any, nop, sdl, xaudio2]", "APU"); DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, vulkan, null]", "GPU"); -DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]", +DEFINE_string(hid, "xinput", "Input system. Use: [any, nop, sdl, winkey, xinput]", "HID"); DEFINE_path( diff --git a/src/xenia/apu/audio_system.cc b/src/xenia/apu/audio_system.cc index 1b0093424d..0fadc9cb44 100644 --- a/src/xenia/apu/audio_system.cc +++ b/src/xenia/apu/audio_system.cc @@ -21,8 +21,9 @@ #include "xenia/base/string_buffer.h" #include "xenia/base/threading.h" #include "xenia/cpu/thread_state.h" +#include "xenia/emulator.h" #include "xenia/kernel/kernel_state.h" - +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" // As with normal Microsoft, there are like twelve different ways to access // the audio APIs. Early games use XMA*() methods almost exclusively to touch // decoders. Later games use XAudio*() and direct memory writes to the XMA @@ -35,13 +36,19 @@ // and let the normal AudioSystem handling take it, to prevent duplicate // implementations. They can be found in xboxkrnl_audio_xma.cc -DEFINE_uint32( - apu_max_queued_frames, 64, - "Allows changing max buffered audio frames to reduce audio delay. Minimum is 16.", "APU"); +DEFINE_uint32(apu_max_queued_frames, 64, + "Allows changing max buffered audio frames to reduce audio " + "delay. Minimum is 16.", + "APU"); +#define AUDIOSYSTEM_NOWAIT_FOR_CALLBACK 1 namespace xe { namespace apu { - +struct GuestMessage { + threading::AtomicListEntry list_entry; + uint32_t client_callback_; + uint32_t client_callback_arg_; +}; AudioSystem::AudioSystem(cpu::Processor* processor) : memory_(processor->memory()), processor_(processor), @@ -60,6 +67,7 @@ AudioSystem::AudioSystem(cpu::Processor* processor) xma_decoder_ = std::make_unique(processor_); resume_event_ = xe::threading::Event::CreateAutoResetEvent(false); + signal_event_ = xe::threading::Event::CreateAutoResetEvent(false); assert_not_null(resume_event_); } @@ -74,21 +82,78 @@ X_STATUS AudioSystem::Setup(kernel::KernelState* kernel_state) { if (result) { return result; } - + kernel_state_ = kernel_state; worker_running_ = true; - worker_thread_ = kernel::object_ref( - new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { - WorkerThreadMain(); - return 0; - }, kernel_state->GetSystemProcess())); + + threading::Thread::CreationParameters crparams{}; + worker_thread_ = threading::Thread::Create( + crparams, std::bind(&AudioSystem::WorkerThreadMain, this)); + Emulator::Get()->RegisterGuestHardwareBlockThread(worker_thread_.get()); // As we run audio callbacks the debugger must be able to suspend us. - worker_thread_->set_can_debugger_suspend(true); worker_thread_->set_name("Audio Worker"); - worker_thread_->Create(); + worker_thread_->set_affinity_mask(0b11000000); return X_STATUS_SUCCESS; } - +void AudioSystem::StartGuestWorkerThread(kernel::KernelState* kernel) { + xenia_assert(!guest_thread_); + auto context = cpu::ThreadState::GetContext(); + guest_thread_ = + kernel::object_ref(new kernel::XHostThread( + kernel, 65536U, 0x10000083u, + [this]() { + std::vector messages_rev{}; + messages_rev.reserve(128); + auto context = cpu::ThreadState::GetContext(); + while (true) { + kernel::xboxkrnl::xeKeWaitForSingleObject( + context, + &context->kernel_state->GetKernelGuestGlobals(context) + ->audio_interrupt_dpc_event_.header, + 0, 0, 0, nullptr); + + auto callbacks = guest_worker_messages_.Flush(); + + if (!callbacks) { + //xenia_assert(false); + continue; + } + kernel::xboxkrnl::xeKeEnterCriticalRegion(context); + while (callbacks) { + messages_rev.push_back((GuestMessage*)callbacks); + callbacks = callbacks->next_; + context->CheckInterrupt(); + } + std::reverse(messages_rev.begin(), messages_rev.end()); + + for (auto&& order : messages_rev) { + uint64_t args[] = {order->client_callback_arg_}; + auto kpcr = kernel::GetKPCR(context); + + auto current_irql = kpcr->current_irql; + + xenia_assert(current_irql == kernel::IRQL_PASSIVE); + this->processor()->Execute(context->thread_state(), + order->client_callback_, args, + countof(args)); + delete order; + context->CheckInterrupt(); +#if AUDIOSYSTEM_NOWAIT_FOR_CALLBACK == 0 + signal_event_->Set(); +#endif + } + messages_rev.clear(); + kernel::xboxkrnl::xeKeLeaveCriticalRegion(context); + } + return true; + }, + kernel->GetSystemProcess())); + guest_thread_->Create(); + kernel::xboxkrnl::xeKeSetPriorityThread( + context, guest_thread_->guest_object(), 25); + kernel::xboxkrnl::xeKeResumeThread( + context, guest_thread_->guest_object()); +} void AudioSystem::WorkerThreadMain() { // Initialize driver and ringbuffer. Initialize(); @@ -120,19 +185,28 @@ void AudioSystem::WorkerThreadMain() { bool pumped = false; if (result.first == xe::threading::WaitResult::kSuccess) { auto index = result.second; - auto global_lock = global_critical_region_.Acquire(); uint32_t client_callback = clients_[index].callback; uint32_t client_callback_arg = clients_[index].wrapped_callback_arg; global_lock.unlock(); - - if (client_callback) { - SCOPE_profile_cpu_i("apu", "xe::apu::AudioSystem->client_callback"); - uint64_t args[] = {client_callback_arg}; - processor_->Execute(worker_thread_->thread_state(), client_callback, - args, xe::countof(args)); + client_callback_arg_in_ = client_callback_arg; + client_callback_in_ = client_callback; + + auto msg = new GuestMessage(); + msg->client_callback_ = client_callback_in_; + msg->client_callback_arg_ = client_callback_arg_in_; + guest_worker_messages_.Push(&msg->list_entry); + { + cpu::SendInterruptArguments interrupt_arguments{}; + interrupt_arguments.ipi_func = &kernel::KernelState::AudioInterrupt; + interrupt_arguments.ud = nullptr; + interrupt_arguments.wait_done = false; + interrupt_arguments.irql_ = kernel::IRQL_AUDIO; + processor()->GetCPUThread(4)->SendGuestIPI(interrupt_arguments); } - +#if AUDIOSYSTEM_NOWAIT_FOR_CALLBACK == 0 + threading::Wait(signal_event_.get(), false); +#endif pumped = true; } @@ -167,7 +241,8 @@ void AudioSystem::Shutdown() { worker_running_ = false; shutdown_event_->Set(); if (worker_thread_) { - worker_thread_->Wait(0, 0, 0, nullptr); + threading::Wait(worker_thread_.get(), false); + Emulator::Get()->UnregisterGuestHardwareBlockThread(worker_thread_.get()); worker_thread_.reset(); } } @@ -290,7 +365,9 @@ bool AudioSystem::Restore(ByteStream* stream) { auto status = CreateDriver(id, client_semaphore, &driver); if (XFAILED(status)) { XELOGE( - "AudioSystem::Restore - Call to CreateDriver failed with status " + "AudioSystem::Restore - " + "Call to CreateDriver " + "failed with status " "{:08X}", status); return false; diff --git a/src/xenia/apu/audio_system.h b/src/xenia/apu/audio_system.h index 54e2380d91..7159d2c4c4 100644 --- a/src/xenia/apu/audio_system.h +++ b/src/xenia/apu/audio_system.h @@ -51,6 +51,8 @@ class AudioSystem { void Pause(); void Resume(); + //called by kernelstate in boot. actually called prior to Setup + void StartGuestWorkerThread(kernel::KernelState* kernel); protected: explicit AudioSystem(cpu::Processor* processor); @@ -58,6 +60,8 @@ class AudioSystem { void WorkerThreadMain(); + + virtual X_STATUS CreateDriver(size_t index, xe::threading::Semaphore* semaphore, AudioDriver** out_driver) = 0; @@ -69,12 +73,13 @@ class AudioSystem { Memory* memory_ = nullptr; cpu::Processor* processor_ = nullptr; + kernel::KernelState* kernel_state_ = nullptr; std::unique_ptr xma_decoder_; uint32_t queued_frames_; std::atomic worker_running_ = {false}; - kernel::object_ref worker_thread_; - + std::unique_ptr worker_thread_; + kernel::object_ref guest_thread_; xe::global_critical_region global_critical_region_; static const size_t kMaximumClientCount = 8; struct { @@ -96,6 +101,11 @@ class AudioSystem { bool paused_ = false; threading::Fence pause_fence_; std::unique_ptr resume_event_; + std::unique_ptr signal_event_; + uint32_t client_callback_in_; + uint32_t client_callback_arg_in_; + + threading::AtomicListHeader guest_worker_messages_; }; } // namespace apu diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index a619e8b31e..0a40ca2e9f 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -92,6 +92,20 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) { return 0; } +bool XmaContext::is_allocated() { + return XmaDecoder::BoolsForContext(this)->is_allocated_; +} +bool XmaContext::is_enabled() { + return XmaDecoder::BoolsForContext(this)->is_enabled_; +} + +void XmaContext::set_is_allocated(bool is_allocated) { + XmaDecoder::BoolsForContext(this)->is_allocated_ = is_allocated; +} +void XmaContext::set_is_enabled(bool is_enabled) { + XmaDecoder::BoolsForContext(this)->is_enabled_ = is_enabled; +} + bool XmaContext::Work() { if (!is_enabled() || !is_allocated()) { return false; @@ -168,7 +182,7 @@ void XmaContext::Disable() { void XmaContext::Release() { // Lock it in case the decoder thread is working on it now. std::lock_guard lock(lock_); - assert_true(is_allocated_ == true); + assert_true(is_allocated() == true); set_is_allocated(false); auto context_ptr = memory()->TranslateVirtual(guest_ptr()); diff --git a/src/xenia/apu/xma_context.h b/src/xenia/apu/xma_context.h index baa70643ea..9daa2d0f8d 100644 --- a/src/xenia/apu/xma_context.h +++ b/src/xenia/apu/xma_context.h @@ -130,6 +130,11 @@ struct Xma2ExtraData { static_assert_size(Xma2ExtraData, 34); #pragma pack(pop) +struct XmaContextBools { + volatile bool is_allocated_ = false; + volatile bool is_enabled_ = false; +}; + class XmaContext { public: static const uint32_t kBytesPerPacket = 2048; @@ -163,11 +168,11 @@ class XmaContext { uint32_t id() { return id_; } uint32_t guest_ptr() { return guest_ptr_; } - bool is_allocated() { return is_allocated_; } - bool is_enabled() { return is_enabled_; } + bool is_allocated(); + bool is_enabled(); - void set_is_allocated(bool is_allocated) { is_allocated_ = is_allocated; } - void set_is_enabled(bool is_enabled) { is_enabled_ = is_enabled; } + void set_is_allocated(bool is_allocated); + void set_is_enabled(bool is_enabled); private: static void SwapInputBuffer(XMA_CONTEXT_DATA* data); @@ -205,8 +210,6 @@ class XmaContext { uint32_t id_ = 0; uint32_t guest_ptr_ = 0; xe_mutex lock_; - volatile bool is_allocated_ = false; - volatile bool is_enabled_ = false; // bool is_dirty_ = true; // ffmpeg structures diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index fd07f84bc8..2dcfddf11a 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -18,8 +18,9 @@ #include "xenia/base/string_buffer.h" #include "xenia/cpu/processor.h" #include "xenia/cpu/thread_state.h" -#include "xenia/kernel/xthread.h" +#include "xenia/emulator.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/xthread.h" extern "C" { #include "third_party/FFmpeg/libavutil/log.h" } // extern "C" @@ -102,8 +103,7 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) { StringBuffer buff; buff.AppendVarargs(fmt, va); xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char, - "ffmpeg: {}", - buff.to_string_view()); + "ffmpeg: {}", buff.to_string_view()); } X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { @@ -141,17 +141,27 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { worker_running_ = true; work_event_ = xe::threading::Event::CreateAutoResetEvent(false); assert_not_null(work_event_); - worker_thread_ = kernel::object_ref( - new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { - WorkerThreadMain(); - return 0; - }, kernel_state->GetIdleProcess()));//this one doesnt need any process actually. never calls any guest code + threading::Thread::CreationParameters crparams{}; + crparams.stack_size = 16 * 1024 * 1024; + worker_thread_ = threading::Thread::Create( + crparams, std::bind(&XmaDecoder::WorkerThreadMain, this)); + Emulator::Get()->RegisterGuestHardwareBlockThread(worker_thread_.get()); worker_thread_->set_name("XMA Decoder"); - worker_thread_->set_can_debugger_suspend(true); - worker_thread_->Create(); + worker_thread_->set_affinity_mask(0b11000000); return X_STATUS_SUCCESS; } +XmaContextBools* XmaDecoder::BoolsForContext(XmaContext* context) { + size_t delta_to_context_bools = + offsetof(XmaDecoder, contexts_) - offsetof(XmaDecoder, context_bools_); + + size_t delta_to_context_base = context->id() * sizeof(XmaContext); + + return reinterpret_cast( + ((reinterpret_cast(context) - delta_to_context_base) - + delta_to_context_bools) + + context->id() * sizeof(XmaContextBools)); +} void XmaDecoder::WorkerThreadMain() { uint32_t idle_loop_count = 0; @@ -159,6 +169,10 @@ void XmaDecoder::WorkerThreadMain() { // Okay, let's loop through XMA contexts to find ones we need to decode! bool did_work = false; for (uint32_t n = 0; n < kContextCount; n++) { + if (!this->context_bools_[n].is_enabled_ || + !this->context_bools_[n].is_allocated_) { + continue; + } XmaContext& context = contexts_[n]; did_work = context.Work() || did_work; @@ -195,7 +209,8 @@ void XmaDecoder::Shutdown() { if (worker_thread_) { // Wait for work thread. - xe::threading::Wait(worker_thread_->thread(), false); + xe::threading::Wait(worker_thread_.get(), false); + Emulator::Get()->UnregisterGuestHardwareBlockThread(worker_thread_.get()); worker_thread_.reset(); } diff --git a/src/xenia/apu/xma_decoder.h b/src/xenia/apu/xma_decoder.h index 73da02c2ec..e8049f0683 100644 --- a/src/xenia/apu/xma_decoder.h +++ b/src/xenia/apu/xma_decoder.h @@ -50,7 +50,7 @@ class XmaDecoder { bool is_paused() const { return paused_; } void Pause(); void Resume(); - + static XmaContextBools* BoolsForContext(XmaContext* context); protected: int GetContextId(uint32_t guest_ptr); @@ -71,21 +71,25 @@ class XmaDecoder { cpu::Processor* processor_ = nullptr; std::atomic worker_running_ = {false}; - kernel::object_ref worker_thread_; + std::unique_ptr worker_thread_; std::unique_ptr work_event_ = nullptr; bool paused_ = false; xe::threading::Fence pause_fence_; // Signaled when worker paused. xe::threading::Fence resume_fence_; // Signaled when resume requested. - XmaRegisterFile register_file_; - - static const uint32_t kContextCount = 320; - XmaContext contexts_[kContextCount]; BitMap context_bitmap_; uint32_t context_data_first_ptr_ = 0; uint32_t context_data_last_ptr_ = 0; + static const uint32_t kContextCount = 320; + XmaContextBools context_bools_[kContextCount]; + + XmaRegisterFile register_file_; + + + XmaContext contexts_[kContextCount]; + }; } // namespace apu diff --git a/src/xenia/base/byte_order.h b/src/xenia/base/byte_order.h index 5a076f319f..8466668612 100644 --- a/src/xenia/base/byte_order.h +++ b/src/xenia/base/byte_order.h @@ -106,6 +106,19 @@ struct endian_store { *this = *this + a; return *this; } + endian_store& operator&=(int a) { + *this = *this & a; + return *this; + } + endian_store& operator^=(int a) { + *this = *this ^ a; + return *this; + } + endian_store& operator|=(int a) { + *this = *this | a; + return *this; + } + endian_store& operator-=(int a) { *this = *this - a; return *this; @@ -129,12 +142,17 @@ struct endian_store { T value; }; - +#if XE_COMPARISON_BUILD +template +using be = endian_store; +template +using le = endian_store; +#else template using be = endian_store; template using le = endian_store; - +#endif } // namespace xe #endif // XENIA_BASE_BYTE_ORDER_H_ diff --git a/src/xenia/base/clock.cc b/src/xenia/base/clock.cc index 5cbb48bd0c..2f66406720 100644 --- a/src/xenia/base/clock.cc +++ b/src/xenia/base/clock.cc @@ -37,9 +37,9 @@ namespace xe { // Time scalar applied to all time operations. double guest_time_scalar_ = 1.0; // Tick frequency of guest. -uint64_t guest_tick_frequency_ = Clock::host_tick_frequency_platform(); +uint64_t guest_tick_frequency_; // Base FILETIME of the guest system from app start. -uint64_t guest_system_time_base_ = Clock::QueryHostSystemTime(); +uint64_t guest_system_time_base_ ; // Combined time and frequency ratio between host and guest. // Split in numerator (first) and denominator (second). // Computed by RecomputeGuestTickScalar. @@ -48,7 +48,7 @@ std::pair guest_tick_ratio_ = std::make_pair(1, 1); // Native guest ticks. uint64_t last_guest_tick_count_ = 0; // Last sampled host tick count. -uint64_t last_host_tick_count_ = Clock::QueryHostTickCount(); +uint64_t last_host_tick_count_; using tick_mutex_type = std::mutex; diff --git a/src/xenia/base/clock.h b/src/xenia/base/clock.h index be3e4b37c3..80c62ec921 100644 --- a/src/xenia/base/clock.h +++ b/src/xenia/base/clock.h @@ -39,8 +39,8 @@ class Clock { // Host tick count. Generally QueryHostTickCount() should be used. static uint64_t host_tick_count_platform(); #if XE_CLOCK_RAW_AVAILABLE - //chrispy: the way msvc was ordering the branches was causing rdtsc to be speculatively executed each time - //the branch history was lost + // chrispy: the way msvc was ordering the branches was causing rdtsc to be + // speculatively executed each time the branch history was lost XE_NOINLINE static uint64_t host_tick_count_raw(); #endif @@ -94,6 +94,37 @@ class Clock { static int64_t ScaleGuestDurationFileTime(int64_t guest_file_time); // Scales a time duration represented as a timeval, from guest time. static void ScaleGuestDurationTimeval(int32_t* tv_sec, int32_t* tv_usec); + + // QueryQuickCounter maps to the lowest latency timestamp query possible + +#if XE_ARCH_AMD64 == 1 + XE_FORCEINLINE + static uint64_t QueryQuickCounter() { return __rdtsc(); } + + struct QpcParams { + uint64_t performance_frequency; + uint64_t shared_user_va_bias; + uint64_t shared_user_va_multiplier; + uint64_t qpc_bias; + char qpc_shift; + inline bool operator==(QpcParams other) { + return performance_frequency == other.performance_frequency && + shared_user_va_bias == other.shared_user_va_bias && + shared_user_va_multiplier == other.shared_user_va_multiplier && + qpc_bias == other.qpc_bias && qpc_shift == other.qpc_shift; + } + }; + + static QpcParams GetQpcParams(); + +#else + static uint64_t QueryQuickCounter(); +#endif + // converts a timestamp in host tick frequency format to + // one that is comparable with the quick counter + static uint64_t HostTickTimestampToQuickTimestamp(uint64_t host_ticks); + + static void Initialize(); }; } // namespace xe diff --git a/src/xenia/base/clock_win.cc b/src/xenia/base/clock_win.cc index 466d4deac1..92856c2064 100644 --- a/src/xenia/base/clock_win.cc +++ b/src/xenia/base/clock_win.cc @@ -10,8 +10,13 @@ #include "xenia/base/clock.h" #include "xenia/base/platform_win.h" +XE_NTDLL_IMPORT(NtQuerySystemInformation, NtQuerySystemInformation_cls, + NtQuerySystemInformationPtr); +static __int64 RtlpHypervisorSharedUserVa = 0; namespace xe { +constexpr uint32_t SystemHypervisorSharedPageInformation = 0xC5; + #if XE_USE_KUSER_SHARED == 1 uint64_t Clock::host_tick_frequency_platform() { return 10000000ULL; } @@ -23,20 +28,64 @@ uint64_t Clock::QueryHostSystemTime() { } #else + +Clock::QpcParams Clock::GetQpcParams() { + QpcParams result; + result.performance_frequency = *reinterpret_cast(0x7FFE0300LL); + result.shared_user_va_bias = *(uint64_t*)(RtlpHypervisorSharedUserVa + 16); + result.shared_user_va_multiplier = + *(unsigned __int64*)(RtlpHypervisorSharedUserVa + 8); + result.qpc_bias = *reinterpret_cast(0x7FFE03B8LL); + result.qpc_shift = *reinterpret_cast(0x7FFE03C7LL); + return result; +} +// pretty much always 10000000 +static uint64_t XeQueryPerformanceFrequency() { + return *reinterpret_cast(0x7FFE0300LL); +} + +static uint64_t XeQueryPerformanceFrequencyMs() { + return *reinterpret_cast(0x7FFE0300LL) / 1000LL; +} +static uint64_t XeQueryPerformanceCounter() { + auto v1 = *(uint64_t*)(RtlpHypervisorSharedUserVa + 16); + uint64_t v2 = *(unsigned __int64*)(RtlpHypervisorSharedUserVa + 8); + + uint64_t v4 = v1 + __umulh(__rdtsc(), v2); + return (*reinterpret_cast(0x7FFE03B8LL) + v4) >> + *reinterpret_cast(0x7FFE03C7LL); +} +static uint64_t XeDestinationPerformanceCounterToRdtscStamp( + uint64_t dest_time) { + uint64_t rescaled = dest_time; + + rescaled <<= *reinterpret_cast(0x7FFE03C7LL); + + rescaled -= *reinterpret_cast(0x7FFE03B8LL); + + rescaled -= *(uint64_t*)(RtlpHypervisorSharedUserVa + 16); + + // undo __umulh(__rdtsc(), v2); + + uint64_t undo_mul = *(unsigned __int64*)(RtlpHypervisorSharedUserVa + 8); + unsigned long long rem; + uint64_t cycles = _udiv128(rescaled, 0, undo_mul, &rem); + + return cycles; +} + uint64_t Clock::host_tick_frequency_platform() { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - return frequency.QuadPart; + return XeQueryPerformanceFrequency(); } uint64_t Clock::host_tick_count_platform() { - LARGE_INTEGER counter; - uint64_t time = 0; - if (QueryPerformanceCounter(&counter)) { - time = counter.QuadPart; - } - return time; + return XeQueryPerformanceCounter(); +} + +uint64_t Clock::HostTickTimestampToQuickTimestamp(uint64_t host_ticks) { + return XeDestinationPerformanceCounterToRdtscStamp(host_ticks); } + uint64_t Clock::QueryHostSystemTime() { FILETIME t; GetSystemTimeAsFileTime(&t); @@ -53,4 +102,20 @@ uint64_t Clock::QueryHostInterruptTime() { return *reinterpret_cast(KUserShared() + KUSER_SHARED_INTERRUPTTIME_OFFSET); } +extern uint64_t guest_tick_frequency_; +extern uint64_t guest_system_time_base_; + +extern uint64_t last_host_tick_count_; + +void Clock::Initialize() { + NtQuerySystemInformationPtr.invoke(SystemHypervisorSharedPageInformation, + &RtlpHypervisorSharedUserVa, 8, nullptr); + guest_tick_frequency_ = Clock::host_tick_frequency_platform(); + // Base FILETIME of the guest system from app start. + guest_system_time_base_ = Clock::QueryHostSystemTime(); + + + // Last sampled host tick count. + last_host_tick_count_ = Clock::QueryHostTickCount(); +} } // namespace xe diff --git a/src/xenia/base/logging.cc b/src/xenia/base/logging.cc index eec0fb93bf..7b81bf1001 100644 --- a/src/xenia/base/logging.cc +++ b/src/xenia/base/logging.cc @@ -508,9 +508,9 @@ void logging::AppendLogLine(LogLevel log_level, const char prefix_char, void FatalError(const std::string_view str) { logging::AppendLogLine(LogLevel::Error, 'x', str); - if (!xe::has_console_attached()) { + //if (!xe::has_console_attached()) { ShowSimpleMessageBox(SimpleMessageBoxType::Error, str); - } +// } ShutdownLogging(); diff --git a/src/xenia/base/mutex.cc b/src/xenia/base/mutex.cc index b975e4bc31..5525160565 100644 --- a/src/xenia/base/mutex.cc +++ b/src/xenia/base/mutex.cc @@ -11,7 +11,7 @@ #if XE_PLATFORM_WIN32 == 1 #include "xenia/base/platform_win.h" #endif - +#include "xenia/base/threading.h" namespace xe { #if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX == 1 // default spincount for entercriticalsection is insane on windows, 0x20007D0i64 @@ -65,6 +65,74 @@ bool xe_fast_mutex::try_lock() { return TryEnterCriticalSection(fast_crit(this)); } #endif + +xe_portable_mutex::xe_portable_mutex() { + lock_count_ = -1; + recursion_count_ = 0; + + owning_thread_ = nullptr; + wait_object_ = (void*)threading::Event::CreateAutoResetEvent(FALSE).release(); + spin_count_ = ~0u; +} +xe_portable_mutex::~xe_portable_mutex() { + auto wait_event = reinterpret_cast(wait_object_); + delete wait_event; +} + +void xe_portable_mutex::lock() { + auto thread = threading::Thread::GetCurrentThread(); + + if (owning_thread_ == thread) { + lock_count_.fetch_add(1); + recursion_count_++; + return; + } + + uint32_t spin_count = spin_count_; + while (spin_count--) { + int expected = -1; + if (lock_count_.compare_exchange_strong(expected, 0)) { + owning_thread_ = thread; + recursion_count_ = 1; + return; + } + } + if ((lock_count_.fetch_add(1)+1) != 0) { + threading::Wait(reinterpret_cast(wait_object_), false); + } + xenia_assert(owning_thread_ == nullptr); + owning_thread_ = thread; + recursion_count_ = 1; +} +void xe_portable_mutex::unlock() { + xenia_assert(owning_thread_ == threading::Thread::GetCurrentThread()); + xenia_assert(recursion_count_ > 0); + + if (--recursion_count_ != 0) { + xenia_assert(recursion_count_ > 0); + lock_count_.fetch_sub(1); + return; + } + owning_thread_ = nullptr; + if ((lock_count_.fetch_sub(1) - 1) != -1) { + reinterpret_cast(wait_object_)->Set(); + } +} +bool xe_portable_mutex::try_lock() { + auto thread = threading::Thread::GetCurrentThread(); + int expected = -1; + if (lock_count_.compare_exchange_strong(expected, 0)) { + owning_thread_ = thread; + recursion_count_ = 1; + return true; + } else if (owning_thread_ == thread) { + lock_count_.fetch_add(1); + ++recursion_count_; + return true; + } + return false; +} + // chrispy: moved this out of body of function to eliminate the initialization // guards static global_mutex_type global_mutex; diff --git a/src/xenia/base/mutex.h b/src/xenia/base/mutex.h index c75d2ce785..c8a101dda6 100644 --- a/src/xenia/base/mutex.h +++ b/src/xenia/base/mutex.h @@ -13,6 +13,7 @@ #include "platform.h" #include "memory.h" #define XE_ENABLE_FAST_WIN32_MUTEX 1 +#define XE_FAST_MUTEX_FOR_GLOBAL_MUTEX 1 namespace xe { #if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX == 1 @@ -23,8 +24,19 @@ namespace xe { this emulates a recursive mutex, except with far less overhead */ +class alignas(64) xe_fast_mutex { + XE_MAYBE_UNUSED + char detail[64]; + + public: + xe_fast_mutex(); + ~xe_fast_mutex(); -class alignas(4096) xe_global_mutex { + void lock(); + void unlock(); + bool try_lock(); +}; +class alignas(64) xe_global_mutex { XE_MAYBE_UNUSED char detail[64]; @@ -36,20 +48,28 @@ class alignas(4096) xe_global_mutex { void unlock(); bool try_lock(); }; -using global_mutex_type = xe_global_mutex; -class alignas(64) xe_fast_mutex { - XE_MAYBE_UNUSED - char detail[64]; +class alignas(64) xe_portable_mutex { + std::atomic_int lock_count_; + uint32_t recursion_count_; + void* owning_thread_; + uint32_t spin_count_; + void* wait_object_; public: - xe_fast_mutex(); - ~xe_fast_mutex(); + xe_portable_mutex(); + ~xe_portable_mutex(); void lock(); void unlock(); bool try_lock(); }; +#if XE_FAST_MUTEX_FOR_GLOBAL_MUTEX==1 +using global_mutex_type = xe_portable_mutex; +#else +using global_mutex_type = xe_global_mutex; +#endif + // a mutex that is extremely unlikely to ever be locked // use for race conditions that have extremely remote odds of happening class xe_unlikely_mutex { diff --git a/src/xenia/base/platform.h b/src/xenia/base/platform.h index c258ad08fd..d69341afaa 100644 --- a/src/xenia/base/platform.h +++ b/src/xenia/base/platform.h @@ -10,6 +10,9 @@ #ifndef XENIA_BASE_PLATFORM_H_ #define XENIA_BASE_PLATFORM_H_ +//if true, hosttoguestvirtual/translatevirtual become no-ops, and xe::be becomes little endian +#define XE_COMPARISON_BUILD 0 + // This file contains the main platform switches used by xenia as well as any // fixups required to normalize the environment. Everything in here should be // largely portable. diff --git a/src/xenia/base/platform_win.h b/src/xenia/base/platform_win.h index 84d2fd1503..243c0d0770 100644 --- a/src/xenia/base/platform_win.h +++ b/src/xenia/base/platform_win.h @@ -68,7 +68,6 @@ static constexpr size_t KSUER_SHARED_SYSTEMTIME_OFFSET = 0x14; static constexpr size_t KUSER_SHARED_INTERRUPTTIME_OFFSET = 8; static unsigned char* KUserShared() { return (unsigned char*)0x7FFE0000ULL; } -#if XE_USE_KUSER_SHARED == 1 // KUSER_SHARED struct __declspec(align(4)) _KSYSTEM_TIME { unsigned int LowPart; @@ -80,5 +79,4 @@ static volatile _KSYSTEM_TIME* GetKUserSharedSystemTime() { return reinterpret_cast( KUserShared() + KSUER_SHARED_SYSTEMTIME_OFFSET); } -#endif #endif // XENIA_BASE_PLATFORM_WIN_H_ diff --git a/src/xenia/base/string_buffer.cc b/src/xenia/base/string_buffer.cc index b3a9270a81..6d137e7811 100644 --- a/src/xenia/base/string_buffer.cc +++ b/src/xenia/base/string_buffer.cc @@ -15,7 +15,7 @@ #include "xenia/base/assert.h" #include "xenia/base/literals.h" #include "xenia/base/math.h" - +#include "xenia/base/byte_order.h" namespace xe { using namespace xe::literals; @@ -37,20 +37,27 @@ void StringBuffer::Reset() { buffer_[0] = 0; } -void StringBuffer::Grow(size_t additional_length) { - if (buffer_offset_ + additional_length <= buffer_capacity_) { +void StringBuffer::Reserve(size_t reservation_size) { + if (buffer_capacity_ >= reservation_size) { return; } - size_t old_capacity = buffer_capacity_; - size_t new_capacity = - std::max(xe::round_up(buffer_offset_ + additional_length, 16_KiB), - old_capacity * 2); + size_t new_capacity = reservation_size; auto new_buffer = std::realloc(buffer_, new_capacity); assert_not_null(new_buffer); buffer_ = reinterpret_cast(new_buffer); buffer_capacity_ = new_capacity; } +void StringBuffer::Grow(size_t additional_length) { + if (buffer_offset_ + additional_length <= buffer_capacity_) { + return; + } + size_t new_capacity = + std::max(xe::round_up(buffer_offset_ + additional_length, 16_KiB), + buffer_capacity_ * 2); + Reserve(new_capacity); +} + void StringBuffer::Append(char c) { AppendBytes(reinterpret_cast(&c), 1); } @@ -103,4 +110,74 @@ std::vector StringBuffer::to_bytes() const { return bytes; } +#if XE_ARCH_AMD64 == 1 +static __m128i ToHexUpper(__m128i value) { + __m128i w = _mm_cvtepu8_epi16(value); + + __m128i msk = + _mm_and_si128(_mm_or_si128(_mm_srli_epi16(w, 4), _mm_bslli_si128(w, 1)), + _mm_set1_epi16(0x0F0F)); + + __m128i conv = + _mm_shuffle_epi8(_mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'), + msk); + return conv; +} +#endif + +void StringBuffer::AppendHexUInt64(uint64_t value) { +#if XE_ARCH_AMD64 == 1 + __m128i conv = ToHexUpper(_mm_cvtsi64_si128(static_cast(xe::byte_swap(value)))); + + AppendBytes(reinterpret_cast(&conv), 16); +#else + AppendFormat("{:016X}", value); +#endif +} + +void StringBuffer::AppendHexUInt32(uint32_t value) { +#if XE_ARCH_AMD64 == 1 + __m128i conv = ToHexUpper(_mm_cvtsi32_si128(static_cast(xe::byte_swap(value)))); + + uint64_t low = _mm_cvtsi128_si64(conv); + + AppendBytes(reinterpret_cast(&low), 8); +#else + AppendFormat("{:08X}", value); +#endif +} + +void StringBuffer::AppendParenthesizedHexUInt32(uint32_t value) { +#if XE_ARCH_AMD64 == 1 + Grow(10); + + buffer_[buffer_offset_] = '('; + *reinterpret_cast(&buffer_[buffer_offset_ + 1]) = + _mm_cvtsi128_si64(ToHexUpper(_mm_cvtsi32_si128(xe::byte_swap(value)))); + buffer_[buffer_offset_ + 9] = ')'; + buffer_offset_ += 10; + buffer_[buffer_offset_] = 0; +#else + AppendFormat("({:08X})", value); +#endif +} + + +void StringBuffer::AppendParenthesizedHexUInt64(uint64_t value) { +#if XE_ARCH_AMD64 == 1 + Grow(18); + + buffer_[buffer_offset_] = '('; + __m128i conv = ToHexUpper(_mm_cvtsi64_si128(static_cast(xe::byte_swap(value)))); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&buffer_[buffer_offset_ + 1]), + conv); + + buffer_[buffer_offset_ + 17] = ')'; + buffer_offset_ += 18; + buffer_[buffer_offset_] = 0; +#else + AppendFormat("({:016X})", value); +#endif +} } // namespace xe diff --git a/src/xenia/base/string_buffer.h b/src/xenia/base/string_buffer.h index 16900824a7..01b2e36c77 100644 --- a/src/xenia/base/string_buffer.h +++ b/src/xenia/base/string_buffer.h @@ -36,16 +36,19 @@ class StringBuffer { template void AppendFormat(const char* format, const Args&... args) { auto s = fmt::format(format, args...); - Append(s.c_str()); + Append(s); } - + void AppendHexUInt64(uint64_t value); + void AppendHexUInt32(uint32_t value); + void AppendParenthesizedHexUInt32(uint32_t value); + void AppendParenthesizedHexUInt64(uint64_t value); void AppendVarargs(const char* format, va_list args); void AppendBytes(const uint8_t* buffer, size_t length); std::string to_string(); std::string_view to_string_view() const; std::vector to_bytes() const; - + void Reserve(size_t reservation_size); private: void Grow(size_t additional_length); diff --git a/src/xenia/base/threading.h b/src/xenia/base/threading.h index a145c3830c..f40ddaf812 100644 --- a/src/xenia/base/threading.h +++ b/src/xenia/base/threading.h @@ -32,7 +32,9 @@ namespace xe { namespace threading { - +// msvc's debugger cannot show the stacks of fibers that are no longer running, +// but it can show thread stacks +#define XE_USE_FAKEFIBERS 0 using namespace xe::literals; #if XE_PLATFORM_ANDROID @@ -144,6 +146,11 @@ bool FreeTlsHandle(TlsHandle handle); uintptr_t GetTlsValue(TlsHandle handle); bool SetTlsValue(TlsHandle handle, uintptr_t value); +TlsHandle AllocateFlsHandle(); +bool FreeFlsHandle(TlsHandle handle); +uintptr_t GetFlsValue(TlsHandle handle); +bool SetFlsValue(TlsHandle handle, uintptr_t value); + // A high-resolution timer capable of firing at millisecond-precision. All // timers created in this way are executed in the same thread so callbacks must // be kept short or else all timers will be impacted. This is a simplified @@ -221,6 +228,9 @@ WaitResult Wait( WaitHandle* wait_handle, bool is_alertable, std::chrono::milliseconds timeout = std::chrono::milliseconds::max()); +WaitResult NanoWait(WaitHandle* wait_handle, bool is_alertable, + int64_t nanoseconds); + // Signals one object and waits on another object as a single operation. // Waits until the wait handle is in the signaled state, an alert triggers and // a user callback is queued to the thread, or the timeout interval elapses. @@ -410,10 +420,17 @@ struct ThreadPriority { static const int32_t kHighest = 2; }; +// NtWaitForAlertByThreadId +bool WaitForAlert(int64_t nanoseconds); +// NtAlertThreadByThreadId +bool AlertThreadById(uint32_t thread_id); + +using IPIFunction = uintptr_t (*)(void* ud); // Models a Win32-like thread object. // https://msdn.microsoft.com/en-us/library/windows/desktop/ms682453(v=vs.85).aspx class Thread : public WaitHandle { public: + bool is_ppc_thread_ = false; struct CreationParameters { size_t stack_size = 4_MiB; bool create_suspended = false; @@ -479,10 +496,49 @@ class Thread : public WaitHandle { // threads that had been waiting for the thread to terminate. virtual void Terminate(int exit_code) = 0; + virtual bool IPI(IPIFunction ipi_function, void* userdata, + uintptr_t* result_out = nullptr) = 0; + protected: std::string name_; }; +class Fiber : public WaitHandle { + public: + struct CreationParameters { + size_t stack_size = 4_MiB; + }; + + // Creates a thread with the given parameters and calls the start routine from + // within that thread. + static std::unique_ptr Create(CreationParameters params, + std::function start_routine); + static Fiber* GetCurrentFiber(); + + static std::unique_ptr CreateFromThread(); + // use this to signal explicitly that the fiber is done + // otherwise you can deadlock in the destructor if your fiber has finished, + // but will never actually return + virtual void SetTerminated() = 0; + virtual void SwitchTo() = 0; + virtual void set_name(std::string name) = 0; + virtual ~Fiber() {} +}; + +struct alignas(16) AtomicListEntry { + AtomicListEntry* next_ = nullptr; +}; + +struct alignas(16) AtomicListHeader { + void* opaque_[2]; + + AtomicListHeader(); + AtomicListEntry* Flush(); + void Push(AtomicListEntry* entry); + AtomicListEntry* Pop(); + uint16_t depth() const { return *reinterpret_cast(this); } +}; + } // namespace threading } // namespace xe diff --git a/src/xenia/base/threading_win.cc b/src/xenia/base/threading_win.cc index e30f16e348..4433aae498 100644 --- a/src/xenia/base/threading_win.cc +++ b/src/xenia/base/threading_win.cc @@ -56,9 +56,24 @@ XE_NTDLL_IMPORT(NtReleaseSemaphore, cls_NtReleaseSemaphore, XE_NTDLL_IMPORT(NtDelayExecution, cls_NtDelayExecution, NtDelayExecutionPointer); XE_NTDLL_IMPORT(NtQueryEvent, cls_NtQueryEvent, NtQueryEventPointer); +XE_NTDLL_IMPORT(NtQueryInformationThread, cls_NtQueryInformationThread, + NtQueryInformationThreadPointer); + +XE_NTDLL_IMPORT(NtQueueApcThreadEx, cls_NtQueueApcThreadEx, + NtQueueApcThreadExPointer); +XE_NTDLL_IMPORT(NtAlertThreadByThreadId, cls_NtAlertThreadByThreadId, + NtAlertThreadByThreadIdPointer); +XE_NTDLL_IMPORT(NtWaitForAlertByThreadId, cls_NtWaitForAlertByThreadId, + NtWaitForAlertByThreadIdPointer); namespace xe { namespace threading { - +static bool IsInGuestThread() { + auto current_thread = Thread::GetCurrentThread(); + if (current_thread) { + return current_thread->is_ppc_thread_; + } + return false; +} void EnableAffinityConfiguration() { // chrispy: i don't think this is necessary, // affinity always seems to be the system mask? research more @@ -74,9 +89,7 @@ void EnableAffinityConfiguration() { SetProcessAffinityMask(process_handle, system_affinity_mask); } -uint32_t current_thread_system_id() { - return static_cast(GetCurrentThreadId()); -} +uint32_t current_thread_system_id() { return __readgsdword(0x48); } // https://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx #pragma pack(push, 8) @@ -162,6 +175,7 @@ void NanoSleep(int64_t ns) { void SyncMemory() { MemoryBarrier(); } void Sleep(std::chrono::microseconds duration) { + xenia_assert(!IsInGuestThread()); if (duration.count() < 100) { MaybeYield(); } else { @@ -170,6 +184,7 @@ void Sleep(std::chrono::microseconds duration) { } SleepResult AlertableSleep(std::chrono::microseconds duration) { + xenia_assert(!IsInGuestThread()); if (SleepEx(static_cast(duration.count() / 1000), TRUE) == WAIT_IO_COMPLETION) { return SleepResult::kAlerted; @@ -188,7 +203,30 @@ uintptr_t GetTlsValue(TlsHandle handle) { bool SetTlsValue(TlsHandle handle, uintptr_t value) { return TlsSetValue(handle, reinterpret_cast(value)) ? true : false; } +#if XE_USE_FAKEFIBERS == 1 +TlsHandle AllocateFlsHandle() { return TlsAlloc(); } +bool FreeFlsHandle(TlsHandle handle) { return TlsFree(handle) ? true : false; } + +uintptr_t GetFlsValue(TlsHandle handle) { + return reinterpret_cast(TlsGetValue(handle)); +} + +bool SetFlsValue(TlsHandle handle, uintptr_t value) { + return TlsSetValue(handle, reinterpret_cast(value)) ? true : false; +} +#else +TlsHandle AllocateFlsHandle() { return FlsAlloc(nullptr); } +bool FreeFlsHandle(TlsHandle handle) { return FlsFree(handle) ? true : false; } + +uintptr_t GetFlsValue(TlsHandle handle) { + return reinterpret_cast(FlsGetValue(handle)); +} + +bool SetFlsValue(TlsHandle handle, uintptr_t value) { + return FlsSetValue(handle, reinterpret_cast(value)) ? true : false; +} +#endif template class Win32Handle : public T { public: @@ -208,6 +246,7 @@ class Win32Handle : public T { WaitResult Wait(WaitHandle* wait_handle, bool is_alertable, std::chrono::milliseconds timeout) { + xenia_assert(!IsInGuestThread()); HANDLE handle = wait_handle->native_handle(); DWORD result; DWORD timeout_dw = DWORD(timeout.count()); @@ -244,6 +283,32 @@ WaitResult Wait(WaitHandle* wait_handle, bool is_alertable, } } +WaitResult NanoWait(WaitHandle* wait_handle, bool is_alertable, + int64_t nanoseconds) { + HANDLE handle = wait_handle->native_handle(); + DWORD result; + BOOL bAlertable = is_alertable ? TRUE : FALSE; + + LARGE_INTEGER timeout_big; + timeout_big.QuadPart = -(nanoseconds / 100LL); + + result = NtWaitForSingleObjectPointer.invoke(handle, bAlertable, + &timeout_big); + + switch (result) { + case STATUS_WAIT_0: + return WaitResult::kSuccess; + case STATUS_ABANDONED_WAIT_0: + return WaitResult::kAbandoned; + case STATUS_USER_APC: + return WaitResult::kUserCallback; + case STATUS_TIMEOUT: + return WaitResult::kTimeout; + default: + return WaitResult::kFailed; + } +} + WaitResult SignalAndWait(WaitHandle* wait_handle_to_signal, WaitHandle* wait_handle_to_wait_on, bool is_alertable, std::chrono::milliseconds timeout) { @@ -508,7 +573,18 @@ std::unique_ptr Timer::CreateSynchronizationTimer() { return nullptr; } } - +// NtWaitForAlertByThreadId +bool WaitForAlert(int64_t nanoseconds) { + LARGE_INTEGER wait_time{}; + wait_time.QuadPart = -(nanoseconds / 100LL); + return NtWaitForAlertByThreadIdPointer.invoke(nullptr, + &wait_time) == + 0x101; // STATUS_ALERTED vs STATUS_TIMEOUT +} +// NtAlertThreadByThreadId +bool AlertThreadById(uint32_t thread_id) { + return NtAlertThreadByThreadIdPointer.invoke(thread_id) == 0; +} class Win32Thread : public Win32Handle { public: explicit Win32Thread(HANDLE handle) : Win32Handle(handle) {} @@ -585,13 +661,165 @@ class Win32Thread : public Win32Handle { void Terminate(int exit_code) override { TerminateThread(handle_, exit_code); } + bool IPI(IPIFunction function, void* userdata, + uintptr_t* result_out) override; private: void AssertCallingThread() { assert_true(GetCurrentThreadId() == GetThreadId(handle_)); } + struct IPIContext* cached_ipi_context_ = nullptr; + + void* interrupt_stack_ = nullptr; + + std::mutex ipi_mutex_; + + int GetSuspendCount() { + int result = 0; + ULONG out_length = 0; + NTSTATUS rval = NtQueryInformationThreadPointer.invoke( + handle_, 0x23 /*suspendcount*/, &result, 4, &out_length); + xenia_assert(rval == 0); + xenia_assert(out_length == 4); + return result; + } }; +struct IPIContext { + void* userdata_; + IPIFunction function_; + + uintptr_t result_; + _CONTEXT saved_context_; + _CONTEXT initial_context_; + HANDLE racy_handle_; +}; +#if 1 +void IPIForwarder(IPIContext* context) { + while (true) { + __try { + context->result_ = context->function_(context->userdata_); + } __except (EXCEPTION_EXECUTE_HANDLER) { + ; + } + } + SetEvent(context->racy_handle_); + RtlRestoreContext(&context->saved_context_, nullptr); +} + +bool Win32Thread::IPI(IPIFunction ipi_function, void* userdata, + uintptr_t* result_out) { + std::unique_lock ipi_lock{ipi_mutex_}; + constexpr uint64_t INTERRUPT_STACK_BASE = 0x4860ULL << 32; + + constexpr uint64_t INTERRUPT_STACK_SIZE = 1024 * 1024 * 16; + if (!interrupt_stack_) { + // interrupt_stack_ = VirtualAlloc() + + void* result = nullptr; + uint64_t alloc_point = INTERRUPT_STACK_BASE + INTERRUPT_STACK_SIZE; + + while (!result) { + result = memory::AllocFixed((void*)alloc_point, INTERRUPT_STACK_SIZE, + memory::AllocationType::kReserveCommit, + memory::PageAccess::kReadWrite); + alloc_point += INTERRUPT_STACK_SIZE; + } + interrupt_stack_ = result; + } + uint32_t previous_suspend_count = 0; + if (!this->Suspend(&previous_suspend_count)) { + return false; + } + if (previous_suspend_count != 0) { + bool resumed = this->Resume(nullptr); + xenia_assert(resumed); + return false; + } + + IPIContext* ctx_to_use = cached_ipi_context_; + + if (!ctx_to_use) { + ctx_to_use = new IPIContext(); + memset(ctx_to_use, 0, sizeof(IPIContext)); + cached_ipi_context_ = ctx_to_use; + ctx_to_use->racy_handle_ = CreateEventA(nullptr, FALSE, FALSE, nullptr); + } + ctx_to_use->initial_context_.ContextFlags = CONTEXT_FULL; + ctx_to_use->saved_context_.ContextFlags = CONTEXT_FULL; + BOOL getcontext_worked = + GetThreadContext(this->handle_, &ctx_to_use->initial_context_); + + // already on interrupt stack? + if ((ctx_to_use->initial_context_.Rsp >> 32) == 0x4860ULL) { + bool resumed = this->Resume(nullptr); + xenia_assert(resumed); + return false; + } + + ctx_to_use->initial_context_.ContextFlags = CONTEXT_FULL; + ctx_to_use->saved_context_.ContextFlags = CONTEXT_FULL; + ctx_to_use->function_ = ipi_function; + ctx_to_use->userdata_ = userdata; + ctx_to_use->initial_context_.Rip = + reinterpret_cast(reinterpret_cast(IPIForwarder)); + + ctx_to_use->initial_context_.Rcx = reinterpret_cast(ctx_to_use); + + ctx_to_use->initial_context_.Rsp = + reinterpret_cast(interrupt_stack_) + INTERRUPT_STACK_SIZE - 56; + + // racy! + GetThreadContext(this->handle_, &ctx_to_use->saved_context_); + + BOOL setcontext_worked = + SetThreadContext(this->handle_, &ctx_to_use->initial_context_); + + bool resumed = this->Resume(nullptr); + WaitForSingleObject(ctx_to_use->racy_handle_, INFINITE); + if (result_out) { + *result_out = ctx_to_use->result_; + } + return true; +} + +#else +typedef union _USER_APC_OPTION { + ULONG_PTR UserApcFlags; + HANDLE MemoryReserveHandle; +} USER_APC_OPTION, *PUSER_APC_OPTION; + +void IPIForwarder(void* sysarg1, void* sysarg2, void* sysarg3) { + IPIFunction func = reinterpret_cast(sysarg1); + void* ud = sysarg2; + uintptr_t* result_out = reinterpret_cast(sysarg3); + + uintptr_t scratch = func(ud); + if (result_out) { + *result_out = scratch; + } +} + +bool Win32Thread::IPI(IPIFunction ipi_function, void* userdata, + uintptr_t* result_out) { + if (!ipi_mutex_.try_lock()) { + return false; + } + USER_APC_OPTION UserApcOption; + UserApcOption.UserApcFlags = QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC; + UserApcOption.MemoryReserveHandle = nullptr; + NTSTATUS invoke_res = + NtQueueApcThreadExPointer + .invoke( + this->handle_, UserApcOption, IPIForwarder, ipi_function, + userdata, result_out); + xenia_assert(invoke_res == 0); + ipi_mutex_.unlock(); + return true; +} +#endif + thread_local std::unique_ptr current_thread_ = nullptr; struct ThreadStartData { @@ -637,5 +865,157 @@ Thread* Thread::GetCurrentThread() { void Thread::Exit(int exit_code) { ExitThread(exit_code); } +class Win32Fiber : public Fiber { + public: + std::function callback; + LPVOID this_fiber_; + HANDLE done_signal_; + static void FiberFunc(LPVOID param) { + Win32Fiber* thiz = reinterpret_cast(param); + + thiz->callback(); + SetEvent(thiz->done_signal_); + } + Win32Fiber(size_t stack_size, std::function callback_) + : callback(std::move(callback_)) { + done_signal_ = CreateEventA(nullptr, TRUE, FALSE, nullptr); + this_fiber_ = CreateFiber(stack_size, FiberFunc, this); + } + Win32Fiber() : callback({}) { this_fiber_ = ConvertThreadToFiber(this); } + virtual void* native_handle() const override { return (void*)done_signal_; } + virtual void SetTerminated() override { SetEvent(this->done_signal_); } + virtual ~Win32Fiber() { + WaitForSingleObject(done_signal_, INFINITE); + CloseHandle(done_signal_); + DeleteFiber(this_fiber_); + } + + virtual void SwitchTo() override { SwitchToFiber(this_fiber_); } + virtual void set_name(std::string name) {} +}; +class FakeWin32Fiber; +thread_local FakeWin32Fiber* g_current_fake_win32_fiber = nullptr; +class FakeWin32Fiber : public Fiber { + public: + std::function callback; + + HANDLE execute_signal_; + // HANDLE this_handle_; + std::unique_ptr this_thrd_; + HANDLE done_signal_; + DWORD thread_id_; + uint64_t fiber_affinity_; + bool terminating_ = false; + static DWORD FiberFunc(LPVOID param) { + FakeWin32Fiber* thiz = reinterpret_cast(param); + g_current_fake_win32_fiber = thiz; + WaitForSingleObject(thiz->execute_signal_, INFINITE); + + thiz->callback(); + SetEvent(thiz->done_signal_); + return 0; + } + + FakeWin32Fiber(size_t stack_size, std::function callback_) + : callback(std::move(callback_)) { + fiber_affinity_ = 0ULL; + done_signal_ = CreateEventA(nullptr, TRUE, FALSE, nullptr); + + execute_signal_ = CreateEventA(nullptr, FALSE, FALSE, nullptr); + threading::Thread::CreationParameters crparams{}; + crparams.stack_size = stack_size; + + this_thrd_ = + Thread::Create(crparams, std::bind(&FakeWin32Fiber::FiberFunc, this)); + } + + FakeWin32Fiber() : callback({}) { + g_current_fake_win32_fiber = this; + fiber_affinity_ = threading::Thread::GetCurrentThread()->affinity_mask(); + + done_signal_ = CreateEventA(nullptr, TRUE, FALSE, nullptr); + + execute_signal_ = CreateEventA(nullptr, FALSE, FALSE, nullptr); + thread_id_ = GetCurrentThreadId(); + } + virtual void* native_handle() const override { return (void*)done_signal_; } + virtual void SetTerminated() override { + terminating_ = true; + SetEvent(this->done_signal_); + } + virtual ~FakeWin32Fiber() { + WaitForSingleObject(done_signal_, INFINITE); + CloseHandle(done_signal_); + CloseHandle(execute_signal_); + } + + virtual void SwitchTo() override { + if (this->this_thrd_) { + auto this_aff = g_current_fake_win32_fiber->fiber_affinity_; + if (this->fiber_affinity_ != this_aff) { + this->fiber_affinity_ = this_aff; + this->this_thrd_->set_affinity_mask(this_aff); + } + } + if (!g_current_fake_win32_fiber->terminating_) { + SignalObjectAndWait(this->execute_signal_, + g_current_fake_win32_fiber->execute_signal_, INFINITE, + false); + } else { + + SetEvent(this->execute_signal_); + g_current_fake_win32_fiber->this_thrd_->Terminate(0); + } + } + virtual void set_name(std::string name) { + if (this_thrd_) { + this_thrd_->set_name(name); + } + } +}; +#if XE_USE_FAKEFIBERS == 1 +std::unique_ptr Fiber::Create(CreationParameters params, + std::function start_routine) { + return std::make_unique(params.stack_size, start_routine); +} + +std::unique_ptr Fiber::CreateFromThread() { + return std::make_unique(); +} + +Fiber* Fiber::GetCurrentFiber() { + return reinterpret_cast(g_current_fake_win32_fiber); +} +#else +std::unique_ptr Fiber::Create(CreationParameters params, + std::function start_routine) { + return std::make_unique(params.stack_size, start_routine); +} + +std::unique_ptr Fiber::CreateFromThread() { + return std::make_unique(); +} + +Fiber* Fiber::GetCurrentFiber() { + return reinterpret_cast(GetFiberData()); +} + +#endif + +AtomicListHeader::AtomicListHeader() { + InitializeSListHead(reinterpret_cast(this)); +} +AtomicListEntry* AtomicListHeader::Flush() { + return reinterpret_cast( + InterlockedFlushSList(reinterpret_cast(this))); +} +void AtomicListHeader::Push(AtomicListEntry* entry) { + InterlockedPushEntrySList(reinterpret_cast(this), + reinterpret_cast(entry)); +} +AtomicListEntry* AtomicListHeader::Pop() { + return reinterpret_cast( + InterlockedPopEntrySList(reinterpret_cast(this))); +} } // namespace threading } // namespace xe diff --git a/src/xenia/cpu/backend/backend.cc b/src/xenia/cpu/backend/backend.cc index 6773d5c99d..d54fefc763 100644 --- a/src/xenia/cpu/backend/backend.cc +++ b/src/xenia/cpu/backend/backend.cc @@ -23,10 +23,6 @@ bool Backend::Initialize(Processor* processor) { return true; } -void* Backend::AllocThreadData() { return nullptr; } - -void Backend::FreeThreadData(void* thread_data) {} - } // namespace backend } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/backend/backend.h b/src/xenia/cpu/backend/backend.h index 2e247fc559..222fa707ff 100644 --- a/src/xenia/cpu/backend/backend.h +++ b/src/xenia/cpu/backend/backend.h @@ -51,10 +51,6 @@ class Backend { CodeCache* code_cache() const { return code_cache_; } virtual bool Initialize(Processor* processor); - - virtual void* AllocThreadData(); - virtual void FreeThreadData(void* thread_data); - virtual void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) = 0; @@ -118,6 +114,25 @@ class Backend { } virtual void FreeGuestTrampoline(uint32_t trampoline_addr) {} + /* + these methods are wrapped by Processor's guest atomic functions + */ + virtual uint32_t ReservedLoad32(cpu::ppc::PPCContext* context, + uint32_t address) { + return 0; + } + virtual uint64_t ReservedLoad64(cpu::ppc::PPCContext* context, + uint32_t address) { + return 0; + } + virtual bool ReservedStore32(cpu::ppc::PPCContext* context, uint32_t address, + uint32_t value) { + return false; + } + virtual bool ReservedStore64(cpu::ppc::PPCContext* context, uint32_t address, + uint64_t value) { + return false; + } protected: Processor* processor_ = nullptr; MachineInfo machine_info_; diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 53c54b1b0e..7e395fbc79 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -14,6 +14,7 @@ #include "third_party/capstone/include/capstone/capstone.h" #include "third_party/capstone/include/capstone/x86.h" +#include "xenia/base/atomic.h" #include "xenia/base/exception_handler.h" #include "xenia/base/logging.h" #include "xenia/cpu/backend/x64/x64_assembler.h" @@ -26,7 +27,7 @@ #include "xenia/cpu/processor.h" #include "xenia/cpu/stack_walker.h" #include "xenia/cpu/xex_module.h" - +#define XE_ALLOW_DEADLOCKABLE_RESERVE 0 DEFINE_bool(record_mmio_access_exceptions, true, "For guest addresses records whether we caught any mmio accesses " "for them. This info can then be used on a subsequent run to " @@ -78,6 +79,9 @@ class X64HelperEmitter : public X64Emitter { void* EmitFrsqrteHelper(); + void* EmitEmulatedInterruptHelper(); + void* EmitTimedInterruptHelper(); + private: void* EmitCurrentForOffsets(const _code_offsets& offsets, size_t stack_size = 0); @@ -290,6 +294,10 @@ bool X64Backend::Initialize(Processor* processor) { vrsqrtefp_vector_helper = thunk_emitter.EmitVectorVRsqrteHelper(vrsqrtefp_scalar_helper); frsqrtefp_helper = thunk_emitter.EmitFrsqrteHelper(); + + emulated_interrupt_helper_ = thunk_emitter.EmitEmulatedInterruptHelper(); + enqueue_timed_interrupts_helper_ = thunk_emitter.EmitTimedInterruptHelper(); + // Set the code cache to use the ResolveFunction thunk for default // indirections. assert_zero(uint64_t(resolve_function_thunk_) & 0xFFFFFFFF00000000ull); @@ -924,7 +932,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { Xbyak::Label L18, L2, L35, L4, L9, L8, L10, L11, L12, L13, L1; Xbyak::Label LC1, _LCPI3_1; Xbyak::Label handle_denormal_input; - Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret, handle_oddball_denormal; + Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret, + handle_oddball_denormal; auto emulate_lzcnt_helper_unary_reg = [this](auto& reg, auto& scratch_reg) { inLocalLabel(); @@ -941,19 +950,19 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { vmovd(r8d, xmm0); vmovaps(xmm1, xmm0); mov(ecx, r8d); - //extract mantissa + // extract mantissa and_(ecx, 0x7fffff); mov(edx, ecx); cmp(r8d, 0xff800000); jz(specialcheck_1, CodeGenerator::T_NEAR); - //is exponent zero? + // is exponent zero? test(r8d, 0x7f800000); jne(L18); test(ecx, ecx); jne(L2); L(L18); - //extract biased exponent and unbias + // extract biased exponent and unbias mov(r9d, r8d); shr(r9d, 23); movzx(r9d, r9b); @@ -988,7 +997,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { vxorps(xmm0, xmm0, xmm0); vcomiss(xmm0, xmm1); jbe(L9); - vmovss(xmm2, ptr[rip+LC1]); + vmovss(xmm2, ptr[rip + LC1]); vandps(xmm1, GetXmmConstPtr(XMMSignMaskF32)); test(edx, edx); @@ -1019,7 +1028,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { L(L11); vxorps(xmm2, xmm2, xmm2); - vmovss(xmm0, ptr[rip+LC1]); + vmovss(xmm0, ptr[rip + LC1]); vcomiss(xmm2, xmm1); ja(L1, CodeGenerator::T_NEAR); mov(ecx, 127); @@ -1080,7 +1089,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { or_(ecx, r8d); or_(ecx, eax); vmovd(xmm0, ecx); - vaddss(xmm0, xmm1);//apply DAZ behavior to output + vaddss(xmm0, xmm1); // apply DAZ behavior to output L(L1); ret(); @@ -1107,7 +1116,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { xchg(ecx, edx); // esi is just the value of xmm0's low word, so we can restore it from there shl(r8d, cl); - mov(ecx, edx); // restore ecx, dont xchg because we're going to spoil edx anyway + mov(ecx, + edx); // restore ecx, dont xchg because we're going to spoil edx anyway mov(edx, r8d); vmovd(r8d, xmm0); } @@ -1115,8 +1125,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { jmp(L4); L(specialcheck_1); - //should be extremely rare - vmovss(xmm0, ptr[rip+LC1]); + // should be extremely rare + vmovss(xmm0, ptr[rip + LC1]); ret(); L(handle_oddball_denormal); @@ -1131,7 +1141,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { dd(0xFF800000); dd(0x7F800000); L(LC1); - //the position of 7FC00000 here matters, this address will be indexed in handle_oddball_denormal + // the position of 7FC00000 here matters, this address will be indexed in + // handle_oddball_denormal dd(0x7FC00000); dd(0x5F34FD00); @@ -1143,16 +1154,56 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() { return EmitCurrentForOffsets(code_offsets); } +static void NativeCheckInterrupt(void* ctx) { + ppc::PPCContext::ReallyDoInterrupt(reinterpret_cast(ctx)); +} + +void* X64HelperEmitter::EmitEmulatedInterruptHelper() { + _code_offsets code_offsets = {}; + pop(r9); +#if XE_TRACE_LAST_INTERRUPT_ADDR == 1 + mov(qword[GetContextReg() + + offsetof(ppc::PPCContext, recent_interrupt_addr_)], + r9); +#endif + CallNativeSafe(NativeCheckInterrupt); + jmp(r9); + code_offsets.prolog_stack_alloc = getSize(); + code_offsets.body = getSize(); + code_offsets.epilog = getSize(); + code_offsets.tail = getSize(); + code_offsets.prolog = getSize(); + return EmitCurrentForOffsets(code_offsets); +} + +static void NativeTimedInterruptHelper(void* ctx) { + reinterpret_cast(ctx)->EnqueueTimedInterrupts(); +} +void* X64HelperEmitter::EmitTimedInterruptHelper() { + _code_offsets code_offsets = {}; + pop(r9); + CallNativeSafe(NativeTimedInterruptHelper); + jmp(r9); + code_offsets.prolog_stack_alloc = getSize(); + code_offsets.body = getSize(); + code_offsets.epilog = getSize(); + code_offsets.tail = getSize(); + code_offsets.prolog = getSize(); + return EmitCurrentForOffsets(code_offsets); +} + void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) { _code_offsets code_offsets = {}; Xbyak::Label check_scalar_operation_in_vmx, actual_vector_version; auto result_ptr = GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_xmms[0])); - auto counter_ptr = GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2])); + auto counter_ptr = + GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2])); counter_ptr.setBit(64); - //shuffle and xor to check whether all lanes are equal - //sadly has to leave the float pipeline for the vptest, which is moderate yikes + // shuffle and xor to check whether all lanes are equal + // sadly has to leave the float pipeline for the vptest, which is moderate + // yikes vmovhlps(xmm2, xmm0, xmm0); vmovsldup(xmm1, xmm0); vxorps(xmm1, xmm1, xmm0); @@ -1160,7 +1211,7 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) { vorps(xmm2, xmm1, xmm2); vptest(xmm2, xmm2); jnz(check_scalar_operation_in_vmx); - //jmp(scalar_helper, CodeGenerator::T_NEAR); + // jmp(scalar_helper, CodeGenerator::T_NEAR); call(scalar_helper); vshufps(xmm0, xmm0, xmm0, 0); ret(); @@ -1169,9 +1220,8 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) { vptest(xmm0, ptr[backend()->LookupXMMConstantAddress(XMMThreeFloatMask)]); jnz(actual_vector_version); - vshufps(xmm0, xmm0,xmm0, _MM_SHUFFLE(3, 3, 3, 3)); + vshufps(xmm0, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); call(scalar_helper); - // this->DebugBreak(); vinsertps(xmm0, xmm0, (3 << 4) | (0 << 6)); vblendps(xmm0, xmm0, ptr[backend()->LookupXMMConstantAddress(XMMFloatInf)], @@ -1189,11 +1239,11 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) { L(loop); lea(rax, result_ptr); - vmovss(xmm0, ptr[rax+rcx*4]); + vmovss(xmm0, ptr[rax + rcx * 4]); call(scalar_helper); mov(rcx, counter_ptr); lea(rax, result_ptr); - vmovss(ptr[rax+rcx*4], xmm0); + vmovss(ptr[rax + rcx * 4], xmm0); inc(ecx); cmp(ecx, 4); mov(counter_ptr, rcx); @@ -1274,7 +1324,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() { xor_(eax, 8); sub(edx, ecx); lea(rcx, ptr[rip + frsqrte_table2]); - movzx(eax, byte[rax+rcx]); + movzx(eax, byte[rax + rcx]); sal(rdx, 52); sal(rax, 44); or_(rax, rdx); @@ -1342,6 +1392,70 @@ void* X64HelperEmitter::EmitFrsqrteHelper() { return EmitCurrentForOffsets(code_offsets); } +void ReserveHelper::lock() { + while (!xe::atomic_cas(0, (uint32_t)(uint64_t)this, (uint32_t*)&blocks[0])) { + } +} +void ReserveHelper::unlock() { + auto old = xe::atomic_exchange(0, (uint32_t*)&blocks[0]); + xenia_assert(old == (uint32_t)(uint64_t)this); +} + +void X64Backend::AcquireReservation(cpu::ppc::PPCContext* context, + uint32_t address) { + auto bctx = this->BackendContextForGuestContext((void*)context); + auto r8 = bctx->reserve_helper_; + r8->lock(); + unsigned char has_reserve = _bittestandreset( + reinterpret_cast(&bctx->flags), kX64BackendHasReserveBit); + + if (has_reserve) { + goto already_has_a_reservation; + } + uint32_t ecx = address >> RESERVE_BLOCK_SHIFT; + unsigned r9d = 0; + unsigned edx = ecx; + edx >>= 6; + uint64_t* rdx = &r8->blocks[edx]; + ecx &= 63; + unsigned char r9b = _interlockedbittestandset64( + reinterpret_cast(rdx), ecx) ^ + 1; + + r9b <<= kX64BackendHasReserveBit; + bctx->cached_reserve_offset = reinterpret_cast(rdx); + bctx->cached_reserve_bit = ecx; + bctx->flags |= static_cast(r9b); +#if XE_ALLOW_DEADLOCKABLE_RESERVE == 0 + r8->unlock(); +#endif + return; +already_has_a_reservation: + r8->unlock(); + __debugbreak(); +} + +uint32_t X64Backend::ReservedLoad32(cpu::ppc::PPCContext* context, + uint32_t address) { + auto address_host = context->TranslateVirtual(address); + swcache::PrefetchW(address_host); + AcquireReservation(context, address); + auto bctx = this->BackendContextForGuestContext((void*)context); + uint32_t result_unswapped = *address_host; + bctx->cached_reserve_value_ = static_cast(result_unswapped); + return xe::byte_swap(result_unswapped); +} +uint64_t X64Backend::ReservedLoad64(cpu::ppc::PPCContext* context, + uint32_t address) { + auto address_host = context->TranslateVirtual(address); + swcache::PrefetchW(address_host); + AcquireReservation(context, address); + auto bctx = this->BackendContextForGuestContext((void*)context); + uint64_t result_unswapped = *address_host; + bctx->cached_reserve_value_ = result_unswapped; + return xe::byte_swap(result_unswapped); +} + void* X64HelperEmitter::EmitTryAcquireReservationHelper() { _code_offsets code_offsets = {}; code_offsets.prolog = getSize(); @@ -1349,8 +1463,20 @@ void* X64HelperEmitter::EmitTryAcquireReservationHelper() { Xbyak::Label already_has_a_reservation; Xbyak::Label acquire_new_reservation; - btr(GetBackendFlagsPtr(), kX64BackendHasReserveBit); + Xbyak::Label lock_label; + mov(r8, GetBackendCtxPtr(offsetof(X64BackendContext, reserve_helper_))); + push(rax); + L(lock_label); + xor_(r9d, r9d); + mov(rax, r8); + xchg(rax, r9); + lock(); + cmpxchg(ptr[r8], r9d); + jnz(lock_label); + + btr(GetBackendFlagsPtr(), kX64BackendHasReserveBit); + pop(rax); jc(already_has_a_reservation); shr(ecx, RESERVE_BLOCK_SHIFT); @@ -1372,6 +1498,9 @@ void* X64HelperEmitter::EmitTryAcquireReservationHelper() { mov(GetBackendCtxPtr(offsetof(X64BackendContext, cached_reserve_bit)), ecx); or_(GetBackendCtxPtr(offsetof(X64BackendContext, flags)), r9d); +#if XE_ALLOW_DEADLOCKABLE_RESERVE == 0 + mov(dword[r8], 0); +#endif ret(); L(already_has_a_reservation); DebugBreak(); @@ -1382,6 +1511,69 @@ void* X64HelperEmitter::EmitTryAcquireReservationHelper() { code_offsets.tail = getSize(); return EmitCurrentForOffsets(code_offsets); } + +template +bool ReservedStoreHelperHost( + ShiftedPointer + context, + unsigned int address, T* host_address, T value) { + auto reserve_helper = ADJ(context)->reserve_helper_; + // lock carries over from load +#if XE_ALLOW_DEADLOCKABLE_RESERVE == 0 + reserve_helper->lock(); +#endif + + value = xe::byte_swap(value); + + unsigned char v4 = + _bittestandreset((long*)&ADJ(context)->flags, kX64BackendHasReserveBit); + + if (!v4) { + reserve_helper->unlock(); + return false; + } + uint32_t address_to_block = address >> RESERVE_BLOCK_SHIFT; + uint64_t* reserve_bitmap_element = + &reserve_helper->blocks[address_to_block >> 6]; + _m_prefetchw(reserve_bitmap_element); + unsigned char result = ADJ(context)->cached_reserve_offset == + reinterpret_cast(reserve_bitmap_element); + if (result) { + uint32_t reserve_bit = static_cast(address_to_block & 0x3F); + result = + ADJ(context)->cached_reserve_bit == static_cast(reserve_bit); + if (result) { + if constexpr (sizeof(T) == 4) { + uint32_t compare_with = + static_cast(ADJ(context)->cached_reserve_value_); + result = + _InterlockedCompareExchange((volatile unsigned int*)host_address, + value, compare_with) == compare_with; + } else { + uint64_t compare_with = ADJ(context)->cached_reserve_value_; + + result = + _InterlockedCompareExchange64((volatile long long*)host_address, + value, compare_with) == compare_with; + } + v4 = _interlockedbittestandreset64( + (volatile long long*)reserve_bitmap_element, reserve_bit); + if (v4) { + reserve_helper->unlock(); + return static_cast(result & v4); + } + } else { + __debugbreak(); + } + } else { + __debugbreak(); + } + //__debugbreak(); + reserve_helper->unlock(); + return false; +} + // ecx=guest addr // r9 = host addr // r8 = value @@ -1393,13 +1585,31 @@ void* X64HelperEmitter::EmitReservedStoreHelper(bool bit64) { Xbyak::Label reservation_isnt_for_our_addr; Xbyak::Label somehow_double_cleared; // carry must be set + zero flag must be set + mov(rax, GetBackendCtxPtr(offsetof(X64BackendContext, reserve_helper_))); +#if XE_ALLOW_DEADLOCKABLE_RESERVE == 0 + Xbyak::Label lock_label; + + push(r8); + push(r9); + push(rax); + mov(r8, rax); + L(lock_label); + xor_(r9d, r9d); + mov(rax, r8); + xchg(rax, r9); + lock(); + cmpxchg(ptr[r8], r9d); + jnz(lock_label); + + pop(rax); + pop(r9); + pop(r8); +#endif btr(GetBackendFlagsPtr(), kX64BackendHasReserveBit); jnc(done); - mov(rax, GetBackendCtxPtr(offsetof(X64BackendContext, reserve_helper_))); - shr(ecx, RESERVE_BLOCK_SHIFT); mov(edx, ecx); shr(edx, 6); // divide by 64 @@ -1425,7 +1635,6 @@ void* X64HelperEmitter::EmitReservedStoreHelper(bool bit64) { lock(); if (bit64) { cmpxchg(ptr[r9], r8); - } else { cmpxchg(ptr[r9], r8d); } @@ -1443,6 +1652,9 @@ void* X64HelperEmitter::EmitReservedStoreHelper(bool bit64) { setz(al); setc(ah); cmp(ax, 0x0101); + mov(rax, GetBackendCtxPtr(offsetof(X64BackendContext, reserve_helper_))); + // release lock + mov(dword[rax], 0); ret(); // could be the same label, but otherwise we don't know where we came from @@ -1460,6 +1672,17 @@ void* X64HelperEmitter::EmitReservedStoreHelper(bool bit64) { return EmitCurrentForOffsets(code_offsets); } +bool X64Backend::ReservedStore32(cpu::ppc::PPCContext* context, + uint32_t address, uint32_t value) { + return ReservedStoreHelperHost( + context, address, context->TranslateVirtual(address), value); +} +bool X64Backend::ReservedStore64(cpu::ppc::PPCContext* context, + uint32_t address, uint64_t value) { + return ReservedStoreHelperHost( + context, address, context->TranslateVirtual(address), value); +} + void X64HelperEmitter::EmitSaveVolatileRegs() { // Save off volatile registers. // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax); @@ -1696,6 +1919,11 @@ uint32_t X64Backend::CreateGuestTrampoline(GuestTrampolineProc proc, code_cache()->AddIndirection( indirection_guest_addr, static_cast(reinterpret_cast(write_pos))); + auto funct = new BuiltinFunction(nullptr, indirection_guest_addr); + funct->SetupBuiltin(proc, userdata1, userdata2); + funct->set_end_address(indirection_guest_addr + 4); + + processor()->DirectlyInsertFunction(indirection_guest_addr, funct); return indirection_guest_addr; } @@ -1703,9 +1931,13 @@ uint32_t X64Backend::CreateGuestTrampoline(GuestTrampolineProc proc, void X64Backend::FreeGuestTrampoline(uint32_t trampoline_addr) { xenia_assert(trampoline_addr >= GUEST_TRAMPOLINE_BASE && trampoline_addr < GUEST_TRAMPOLINE_END); + auto old_function = processor()->LookupFunction(trampoline_addr); + size_t index = (trampoline_addr - GUEST_TRAMPOLINE_BASE) / GUEST_TRAMPOLINE_MIN_LEN; + processor()->RemoveFunctionByAddress(trampoline_addr); guest_trampoline_address_bitmap_.Release(index); + delete old_function; } } // namespace x64 } // namespace backend diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 665b337b0f..f664a3dccb 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -12,8 +12,8 @@ #include -#include "xenia/base/cvar.h" #include "xenia/base/bit_map.h" +#include "xenia/base/cvar.h" #include "xenia/cpu/backend/backend.h" #if XE_PLATFORM_WIN32 == 1 @@ -44,9 +44,10 @@ typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1); typedef void (*ResolveFunctionThunk)(); /* - place guest trampolines in the memory range that the HV normally occupies. - This way guests can call in via the indirection table and we don't have to clobber/reuse an existing memory range - The xboxkrnl range is already used by export trampolines (see kernel/kernel_module.cc) + place guest trampolines in the memory range that the HV normally occupies. + This way guests can call in via the indirection table and we don't have to + clobber/reuse an existing memory range The xboxkrnl range is already used by + export trampolines (see kernel/kernel_module.cc) */ static constexpr uint32_t GUEST_TRAMPOLINE_BASE = 0x80000000; static constexpr uint32_t GUEST_TRAMPOLINE_END = 0x80040000; @@ -56,15 +57,19 @@ static constexpr uint32_t GUEST_TRAMPOLINE_MIN_LEN = 8; static constexpr uint32_t MAX_GUEST_TRAMPOLINES = (GUEST_TRAMPOLINE_END - GUEST_TRAMPOLINE_BASE) / GUEST_TRAMPOLINE_MIN_LEN; -#define RESERVE_BLOCK_SHIFT 16 +#define RESERVE_BLOCK_SHIFT 7 #define RESERVE_NUM_ENTRIES \ ((1024ULL * 1024ULL * 1024ULL * 4ULL) >> RESERVE_BLOCK_SHIFT) // https://codalogic.com/blog/2022/12/06/Exploring-PowerPCs-read-modify-write-operations struct ReserveHelper { + // block 0 is used as global reserve helper lock uint64_t blocks[RESERVE_NUM_ENTRIES / 64]; ReserveHelper() { memset(blocks, 0, sizeof(blocks)); } + + void lock(); + void unlock(); }; struct X64BackendStackpoint { @@ -75,11 +80,13 @@ struct X64BackendStackpoint { // use unsigned guest_return_address_; }; -enum : uint32_t { - kX64BackendMXCSRModeBit = 0, - kX64BackendHasReserveBit = 1, - kX64BackendNJMOn = 2, //non-java mode bit is currently set. for use in software fp routines - kX64BackendNonIEEEMode = 3, //non-ieee mode is currently enabled for scalar fpu. +enum : uint32_t { + kX64BackendMXCSRModeBit = 0, + kX64BackendHasReserveBit = 1, + kX64BackendNJMOn = + 2, // non-java mode bit is currently set. for use in software fp routines + kX64BackendNonIEEEMode = + 3, // non-ieee mode is currently enabled for scalar fpu. }; // located prior to the ctx register // some things it would be nice to have be per-emulator instance instead of per @@ -170,8 +177,8 @@ class X64Backend : public Backend { reinterpret_cast(ctx) - sizeof(X64BackendContext)); } virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc, - void* userdata1, - void* userdata2, bool long_term) override; + void* userdata1, void* userdata2, + bool long_term) override; virtual void FreeGuestTrampoline(uint32_t trampoline_addr) override; virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override; @@ -184,6 +191,21 @@ class X64Backend : public Backend { void* LookupXMMConstantAddress(unsigned index) { return reinterpret_cast(emitter_data() + sizeof(vec128_t) * index); } + + void AcquireReservation(cpu::ppc::PPCContext* context, uint32_t address); + + virtual uint32_t ReservedLoad32(cpu::ppc::PPCContext* context, + uint32_t address) override; + + virtual uint64_t ReservedLoad64(cpu::ppc::PPCContext* context, + uint32_t address) override; + + virtual bool ReservedStore32(cpu::ppc::PPCContext* context, uint32_t address, + uint32_t value) override; + + virtual bool ReservedStore64(cpu::ppc::PPCContext* context, uint32_t address, + uint64_t value) override; + #if XE_X64_PROFILER_AVAILABLE == 1 uint64_t* GetProfilerRecordForFunction(uint32_t guest_address); #endif @@ -213,6 +235,8 @@ class X64Backend : public Backend { void* vrsqrtefp_vector_helper = nullptr; void* vrsqrtefp_scalar_helper = nullptr; void* frsqrtefp_helper = nullptr; + void* emulated_interrupt_helper_ = nullptr; + void* enqueue_timed_interrupts_helper_ = nullptr; private: #if XE_X64_PROFILER_AVAILABLE == 1 GuestProfilerData profiler_data_; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 39ee0e25b2..9d79a68063 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -38,6 +38,7 @@ #include "xenia/cpu/processor.h" #include "xenia/cpu/symbol.h" #include "xenia/cpu/thread_state.h" +#include "xenia/kernel/kernel_guest_structures.h" DEFINE_bool(debugprint_trap_log, false, "Log debugprint traps to the active debugger", "CPU"); @@ -210,24 +211,27 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { // Adding or changing anything here must be matched! /* - pick a page to use as the local base as close to the commonly accessed page that contains most backend fields - the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k + pick a page to use as the local base as close to the commonly accessed page + that contains most backend fields the sizes that are checked are chosen + based on PTE coalescing sizes. zen does 16k or 32k */ size_t stack_size = StackLayout::GUEST_STACK_SIZE; if (stack_offset < (4096 - sizeof(X64BackendContext))) { locals_page_delta_ = 4096; - } else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing + } else if (stack_offset < + (16384 - sizeof(X64BackendContext))) { // 16k PTE coalescing locals_page_delta_ = 16384; } else if (stack_offset < (32768 - sizeof(X64BackendContext))) { locals_page_delta_ = 32768; } else if (stack_offset < (65536 - sizeof(X64BackendContext))) { locals_page_delta_ = 65536; } else { - //extremely unlikely, fall back to stack - stack_size = xe::align(StackLayout::GUEST_STACK_SIZE + stack_offset, 16); + // extremely unlikely, fall back to stack + stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset; + locals_page_delta_ = 0; } - + assert_true((stack_size + 8) % 16 == 0); func_info.stack_size = stack_size; stack_size_ = stack_size; @@ -294,7 +298,6 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { synchronize_stack_on_next_instruction_ = false; while (block) { ForgetMxcsrMode(); // at start of block, mxcsr mode is undefined - // Mark block labels. auto label = block->label_head; while (label) { @@ -307,6 +310,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { } // Process instructions. const Instr* instr = block->instr_head; + while (instr) { if (synchronize_stack_on_next_instruction_) { if (instr->GetOpcodeNum() != hir::OPCODE_SOURCE_OFFSET) { @@ -398,9 +402,8 @@ void X64Emitter::MarkSourceOffset(const Instr* i) { entry->code_offset = static_cast(getSize()); if (cvars::emit_source_annotations) { - nop(2); + nop(1); mov(eax, entry->guest_address); - nop(2); } if (debug_info_flags_ & DebugInfoFlags::kDebugInfoTraceFunctionCoverage) { @@ -425,24 +428,29 @@ void X64Emitter::DebugBreak() { } uint64_t TrapDebugPrint(void* raw_context, uint64_t address) { - auto thread_state = - reinterpret_cast(raw_context)->thread_state; - uint32_t str_ptr = uint32_t(thread_state->context()->r[3]); + auto context = reinterpret_cast(raw_context); + + uint32_t str_ptr = uint32_t(context->r[3]); + uint32_t str_length = uint32_t(context->r[4]); + // uint16_t str_len = uint16_t(thread_state->context()->r[4]); - auto str = thread_state->memory()->TranslateVirtual(str_ptr); + auto str = context->TranslateVirtual(str_ptr); + char* string_tmp = new char[str_length + 1]; + memcpy(string_tmp, str, str_length); + string_tmp[str_length] = 0; + // TODO(benvanik): truncate to length? - XELOGD("(DebugPrint) {}", str); + XELOGD("(DebugPrint) {}", string_tmp); if (cvars::debugprint_trap_log) { - debugging::DebugPrint("(DebugPrint) {}", str); + debugging::DebugPrint("(DebugPrint) {}", string_tmp); } + delete[] string_tmp; return 0; } uint64_t TrapDebugBreak(void* raw_context, uint64_t address) { - auto thread_state = - reinterpret_cast(raw_context)->thread_state; XELOGE("tw/td forced trap hit! This should be a crash!"); if (cvars::break_on_debugbreak) { xe::debugging::Break(); @@ -483,7 +491,7 @@ void X64Emitter::UnimplementedInstr(const hir::Instr* i) { uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { auto guest_context = reinterpret_cast(raw_context); - auto thread_state = guest_context->thread_state; + auto thread_state = guest_context->thread_state(); // TODO(benvanik): required? assert_not_zero(target_address); @@ -504,7 +512,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { */ if (cvars::enable_host_guest_stack_synchronization) { - auto processor = thread_state->processor(); + auto processor = guest_context->processor; auto module_for_address = processor->LookupModule(static_cast(target_address)); @@ -684,7 +692,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { } } } - auto fn = thread_state->processor()->ResolveFunction( + auto fn = guest_context->processor->ResolveFunction( static_cast(target_address)); assert_not_null(fn); auto x64_fn = static_cast(fn); @@ -1002,7 +1010,7 @@ static inline vec128_t v128_setr_bytes(unsigned char v0, unsigned char v1, } static inline vec128_t v128_setr_words(uint32_t v0, uint32_t v1, uint32_t v2, - uint32_t v3) { + uint32_t v3) { vec128_t result; result.u32[0] = v0; result.u32[1] = v1; @@ -1181,7 +1189,7 @@ static const vec128_t xmm_consts[] = { v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80), // XMMVSRMask vec128b(1), - //XMMVRsqrteTableStart + // XMMVRsqrteTableStart v128_setr_words(0x568B4FD, 0x4F3AF97, 0x48DAAA5, 0x435A618), v128_setr_words(0x3E7A1E4, 0x3A29DFE, 0x3659A5C, 0x32E96F8), v128_setr_words(0x2FC93CA, 0x2D090CE, 0x2A88DFE, 0x2838B57), @@ -1190,8 +1198,8 @@ static const vec128_t xmm_consts[] = { v128_setr_words(0x2C27279, 0x2926FB7, 0x2666D26, 0x23F6AC0), v128_setr_words(0x21D6881, 0x1FD6665, 0x1E16468, 0x1C76287), v128_setr_words(0x1AF60C1, 0x1995F12, 0x1855D79, 0x1735BF4), - //XMMVRsqrteTableBase - vec128i(0) //filled in later + // XMMVRsqrteTableBase + vec128i(0) // filled in later }; void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) { @@ -1267,12 +1275,13 @@ uintptr_t X64Emitter::PlaceConstData() { std::memcpy(mem, xmm_consts, sizeof(xmm_consts)); /* - set each 32-bit element of the constant XMMVRsqrteTableBase to be the address of the start of the constant XMMVRsqrteTableStart - this + set each 32-bit element of the constant XMMVRsqrteTableBase to be the + address of the start of the constant XMMVRsqrteTableStart this */ vec128_t* deferred_constants = reinterpret_cast(mem); vec128_t* vrsqrte_table_base = &deferred_constants[XMMVRsqrteTableBase]; - uint32_t ptr_to_vrsqrte_table32 = static_cast(reinterpret_cast(&deferred_constants[XMMVRsqrteTableStart])); + uint32_t ptr_to_vrsqrte_table32 = static_cast( + reinterpret_cast(&deferred_constants[XMMVRsqrteTableStart])); *vrsqrte_table_base = vec128i(ptr_to_vrsqrte_table32); memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr); @@ -1288,8 +1297,10 @@ void X64Emitter::FreeConstData(uintptr_t data) { Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) { // Load through fixed constant table setup by PlaceConstData. // It's important that the pointer is not signed, as it will be sign-extended. - void* emitter_data_ptr = backend_->LookupXMMConstantAddress(static_cast(id)); - xenia_assert(reinterpret_cast(emitter_data_ptr) < (1ULL << 31));//must not have signbit set + void* emitter_data_ptr = + backend_->LookupXMMConstantAddress(static_cast(id)); + xenia_assert(reinterpret_cast(emitter_data_ptr) < + (1ULL << 31)); // must not have signbit set return ptr[emitter_data_ptr]; } // Implies possible StashXmm(0, ...)! @@ -1610,8 +1621,8 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) { return SimdDomain::DONTCARE; } -Xbyak::RegExp X64Emitter::GetLocalsBase() const { - return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_; +Xbyak::RegExp X64Emitter::GetLocalsBase() const { + return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_; } Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const { /* @@ -1832,6 +1843,69 @@ void X64Emitter::EnsureSynchronizedGuestAndHostStack() { L(return_from_sync); } +void X64Emitter::EmitEmulatedInterruptCheck() { + Xbyak::Label& after_interrupt_check = NewCachedLabel(); + Xbyak::Label& after_interrupt_check_pop = NewCachedLabel(); + auto interval_ptr = GetBackendCtxPtr(offsetof(X64BackendContext, flags) + 3); + interval_ptr.setBit(8); + inc(interval_ptr); + + jnz(after_interrupt_check); + + Xbyak::Label& rerun_due_to_timer = NewCachedLabel(); + push(rax); + push(rcx); + push(rdx); + push(rax); + L(rerun_due_to_timer); + rdtsc(); + mov(ecx, dword[GetContextReg() + offsetof(ppc::PPCContext, r[13])]); + shl(rdx, 32); + // assume PCR is never in physical memory! + // add(rax, GetMembaseReg()); + mov(rcx, qword[GetMembaseReg() + rcx + + offsetof(kernel::X_KPCR, emulated_interrupt)]); + or_(rax, rdx); + + cmp(rax, qword[rcx + offsetof(cpu::XenonInterruptController, + next_event_quick_timestamp_)]); + + Xbyak::Label& do_timed_interrupts_label = AddToTail( + [&rerun_due_to_timer](X64Emitter& e, Xbyak::Label& our_tail_label) { + e.L(our_tail_label); + + e.call(e.backend()->enqueue_timed_interrupts_helper_); + + e.jmp(rerun_due_to_timer, e.T_NEAR); + }); + + ja(do_timed_interrupts_label, CodeGenerator::T_NEAR); + + cmp(word[rcx + offsetof(cpu::XenonInterruptController, queued_interrupts_)], + 0); + + // cmp(qword[rax + offsetof(kernel::X_KPCR, emulated_interrupt)], rax); + + Xbyak::Label& do_emulated_interrupt_label = + AddToTail([&after_interrupt_check_pop](X64Emitter& e, + Xbyak::Label& our_tail_label) { + e.L(our_tail_label); + + e.call(e.backend()->emulated_interrupt_helper_); + + e.jmp(after_interrupt_check_pop, e.T_NEAR); + }); + jnz(do_emulated_interrupt_label, CodeGenerator::T_NEAR); + + L(after_interrupt_check_pop); + pop(rax); + pop(rdx); + pop(rcx); + pop(rax); + + L(after_interrupt_check); +} + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 3806e9ee2a..9c8aae7ef1 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -383,6 +383,7 @@ class X64Emitter : public Xbyak::CodeGenerator { DEFINESHIFTER(vpshld) DEFINESHIFTER(vpshlq) + void EmitEmulatedInterruptCheck(); protected: void* Emplace(const EmitFunctionInfo& func_info, GuestFunction* function = nullptr); diff --git a/src/xenia/cpu/backend/x64/x64_function.cc b/src/xenia/cpu/backend/x64/x64_function.cc index 668726e8fb..bbc96bbb03 100644 --- a/src/xenia/cpu/backend/x64/x64_function.cc +++ b/src/xenia/cpu/backend/x64/x64_function.cc @@ -32,7 +32,7 @@ void X64Function::Setup(uint8_t* machine_code, size_t machine_code_length) { bool X64Function::CallImpl(ThreadState* thread_state, uint32_t return_address) { auto backend = - reinterpret_cast(thread_state->processor()->backend()); + reinterpret_cast(thread_state->context()->processor->backend()); auto thunk = backend->host_to_guest_thunk(); thunk(machine_code_, thread_state->context(), reinterpret_cast(uintptr_t(return_address))); diff --git a/src/xenia/cpu/backend/x64/x64_seq_control.cc b/src/xenia/cpu/backend/x64/x64_seq_control.cc index 2e2d273cc7..a1091d7a40 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_control.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_control.cc @@ -608,6 +608,14 @@ EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE, BRANCH_FALSE_I8, BRANCH_FALSE_I16, BRANCH_FALSE_I32, BRANCH_FALSE_I64, BRANCH_FALSE_F32, BRANCH_FALSE_F64); +struct CHECK_INTERRUPT + : Sequence> { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.EmitEmulatedInterruptCheck(); + } +}; +EMITTER_OPCODE_TABLE(OPCODE_CHECK_INTERRUPT, CHECK_INTERRUPT); + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/backend/x64/x64_seq_memory.cc b/src/xenia/cpu/backend/x64/x64_seq_memory.cc index 61fcbdfb9a..cabdea22ae 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_memory.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_memory.cc @@ -1876,7 +1876,9 @@ EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL); // ============================================================================ struct MEMORY_BARRIER : Sequence> { - static void Emit(X64Emitter& e, const EmitArgType& i) { e.mfence(); } + static void Emit(X64Emitter& e, const EmitArgType& i) { + //do nothing. X86 does not reorder writes in the way PPC does, so not necessary + } }; EMITTER_OPCODE_TABLE(OPCODE_MEMORY_BARRIER, MEMORY_BARRIER); diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index e6b3d156a0..eedf230f61 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -484,43 +484,15 @@ EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128); // ============================================================================ struct LOAD_CLOCK : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - if (cvars::inline_loadclock) { - e.mov(e.rcx, - e.GetBackendCtxPtr(offsetof(X64BackendContext, guest_tick_count))); - e.mov(i.dest, e.qword[e.rcx]); - } else { - // When scaling is disabled and the raw clock source is selected, the code - // in the Clock class is actually just forwarding tick counts after one - // simple multiply and division. In that case we rather bake the scaling - // in here to cut extra function calls with CPU cache misses and stack - // frame overhead. - if (cvars::clock_no_scaling && cvars::clock_source_raw) { - auto ratio = Clock::guest_tick_ratio(); - // The 360 CPU is an in-order CPU, AMD64 usually isn't. Without - // mfence/lfence magic the rdtsc instruction can be executed sooner or - // later in the cache window. Since it's resolution however is much - // higher than the 360's mftb instruction this can safely be ignored. - - // Read time stamp in edx (high part) and eax (low part). - e.rdtsc(); - // Make it a 64 bit number in rax. - e.shl(e.rdx, 32); - e.or_(e.rax, e.rdx); - // Apply tick frequency scaling. - e.mov(e.rcx, ratio.first); - e.mul(e.rcx); - // We actually now have a 128 bit number in rdx:rax. - e.mov(e.rcx, ratio.second); - e.div(e.rcx); - e.mov(i.dest, e.rax); - } else { - e.CallNative(LoadClock); - e.mov(i.dest, e.rax); - } - } + e.CallNative(LoadClock); + e.mov(i.dest, e.rax); } static uint64_t LoadClock(void* raw_context) { - return Clock::QueryGuestTickCount(); + auto context = reinterpret_cast(raw_context); + // get hw thread from pcr page number + auto hw_thread = + context->processor->GetCPUThread((context->r[13] >> 12) & 0xF); + return hw_thread->mftb(); } }; EMITTER_OPCODE_TABLE(OPCODE_LOAD_CLOCK, LOAD_CLOCK); @@ -2120,9 +2092,9 @@ struct RSQRT_V128 : Sequence> { e.ChangeMxcsrMode(MXCSRMode::Vmx); Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3); /* - the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as part - of a vector normalization sequence. in fact, its difficult to find uses of vrsqrte in titles - that have inputs which do not come from vmsum. + the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as + part of a vector normalization sequence. in fact, its difficult to find + uses of vrsqrte in titles that have inputs which do not come from vmsum. */ if (i.src1.value && i.src1.value->AllFloatVectorLanesSameValue()) { e.vmovss(e.xmm0, src1); @@ -3193,8 +3165,7 @@ struct SET_ROUNDING_MODE_I32 if (constant_value & 4) { e.or_(flags_ptr, 1U << kX64BackendNonIEEEMode); - } - else { + } else { e.btr(flags_ptr, kX64BackendNonIEEEMode); } e.mov(e.dword[e.rsp + StackLayout::GUEST_SCRATCH], e.eax); @@ -3202,14 +3173,14 @@ struct SET_ROUNDING_MODE_I32 e.vldmxcsr(e.dword[e.rsp + StackLayout::GUEST_SCRATCH]); } else { - //can andnot, but this is a very infrequently used opcode + // can andnot, but this is a very infrequently used opcode e.mov(e.eax, 1U << kX64BackendNonIEEEMode); e.mov(e.edx, e.eax); e.not_(e.edx); e.mov(e.ecx, flags_ptr); - //edx = flags w/ non ieee cleared + // edx = flags w/ non ieee cleared e.and_(e.edx, e.ecx); - //eax = flags w/ non ieee set + // eax = flags w/ non ieee set e.or_(e.eax, e.ecx); e.bt(i.src1, 2); diff --git a/src/xenia/cpu/compiler/compiler_passes.h b/src/xenia/cpu/compiler/compiler_passes.h index fc58ec7107..03e9a4996a 100644 --- a/src/xenia/cpu/compiler/compiler_passes.h +++ b/src/xenia/cpu/compiler/compiler_passes.h @@ -24,5 +24,6 @@ #include "xenia/cpu/compiler/passes/simplification_pass.h" #include "xenia/cpu/compiler/passes/validation_pass.h" #include "xenia/cpu/compiler/passes/value_reduction_pass.h" +#include "xenia/cpu/compiler/passes/interrupt_check_injection_pass.h" #endif // XENIA_CPU_COMPILER_COMPILER_PASSES_H_ diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index bc4e91287f..49fa481485 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -266,10 +266,17 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) { auto mmio_range = processor_->memory()->LookupVirtualMappedRange(address); if (cvars::inline_mmio_access && mmio_range) { - i->Replace(&OPCODE_LOAD_MMIO_info, 0); - i->src1.offset = reinterpret_cast(mmio_range); - i->src2.offset = address; - result = true; + auto constant_entry = mmio_range->constant_addresses.find(address); + if (constant_entry == mmio_range->constant_addresses.end()) { + i->Replace(&OPCODE_LOAD_MMIO_info, 0); + i->src1.offset = reinterpret_cast(mmio_range); + i->src2.offset = address; + result = true; + } else { + v->set_constant(xe::byte_swap(constant_entry->second)); + i->UnlinkAndNOP(); + result = true; + } } else { auto heap = memory->LookupHeap(address); uint32_t protect; diff --git a/src/xenia/cpu/compiler/passes/context_promotion_pass.cc b/src/xenia/cpu/compiler/passes/context_promotion_pass.cc index f5cc368db6..fb2010659c 100644 --- a/src/xenia/cpu/compiler/passes/context_promotion_pass.cc +++ b/src/xenia/cpu/compiler/passes/context_promotion_pass.cc @@ -104,6 +104,10 @@ void ContextPromotionPass::PromoteBlock(Block* block) { if (i->opcode->flags & OPCODE_FLAG_VOLATILE) { // Volatile instruction - requires all context values be flushed. validity.reset(); + } else if (i->opcode->flags & (OPCODE_FLAG_R13_BARRIER)) { + validity.reset( + offsetof(ppc::PPCContext, r[13]), + offsetof(ppc::PPCContext, r[13]) + sizeof(ppc::PPCContext::r[13])); } else if (i->opcode == &OPCODE_LOAD_CONTEXT_info) { size_t offset = i->src1.offset; if (validity.test(static_cast(offset))) { @@ -139,6 +143,10 @@ void ContextPromotionPass::RemoveDeadStoresBlock(Block* block) { if (i->opcode->flags & (OPCODE_FLAG_VOLATILE | OPCODE_FLAG_BRANCH)) { // Volatile instruction - requires all context values be flushed. validity.reset(); + } else if (i->opcode->flags & (OPCODE_FLAG_R13_BARRIER)) { + validity.reset( + offsetof(ppc::PPCContext, r[13]), + offsetof(ppc::PPCContext, r[13]) + sizeof(ppc::PPCContext::r[13])); } else if (i->opcode == &OPCODE_STORE_CONTEXT_info) { size_t offset = i->src1.offset; if (!validity.test(static_cast(offset))) { diff --git a/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.cc b/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.cc new file mode 100644 index 0000000000..ce5aec1a46 --- /dev/null +++ b/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.cc @@ -0,0 +1,53 @@ +/** + ****************************************************************************** + * Xenia Canary : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2024 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/compiler/passes/interrupt_check_injection_pass.h" + +#include "xenia/base/assert.h" +#include "xenia/base/profiling.h" +#include "xenia/cpu/backend/backend.h" +#include "xenia/cpu/compiler/compiler.h" +#include "xenia/cpu/processor.h" + +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { + +using namespace xe::cpu::hir; + +InterruptInjectionPass::InterruptInjectionPass() : CompilerPass() {} + +InterruptInjectionPass::~InterruptInjectionPass() {} + +bool InterruptInjectionPass::Run(HIRBuilder* builder) { + bool added_interrupt_checks = false; + // add interrupt checks to the front of each block + for (auto block = builder->first_block(); block != nullptr; + block = block->next) { + auto first_nonfake = block->instr_head; + for (; first_nonfake && first_nonfake->IsFake(); + first_nonfake = first_nonfake->next) { + } + + if (first_nonfake && + first_nonfake->GetOpcodeNum() != OPCODE_CHECK_INTERRUPT) { + auto interrupt_instruction = builder->CheckInterrupt(); + + interrupt_instruction->MoveBefore(first_nonfake); + added_interrupt_checks = true; + } + } + return added_interrupt_checks; +} + +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.h b/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.h new file mode 100644 index 0000000000..df134aeb40 --- /dev/null +++ b/src/xenia/cpu/compiler/passes/interrupt_check_injection_pass.h @@ -0,0 +1,33 @@ +/** + ****************************************************************************** + * Xenia Canary : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2024 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_COMPILER_PASSES_INTERRUPT_CHECK_INJECTION_PASS_H_ +#define XENIA_CPU_COMPILER_PASSES_INTERRUPT_CHECK_INJECTION_PASS_H_ + +#include "xenia/cpu/compiler/compiler_pass.h" + +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { + +class InterruptInjectionPass : public CompilerPass { + public: + InterruptInjectionPass(); + ~InterruptInjectionPass() override; + + bool Run(hir::HIRBuilder* builder) override; +}; + +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_COMPILER_PASSES_INTERRUPT_CHECK_INJECTION_PASS_H_ diff --git a/src/xenia/cpu/function.cc b/src/xenia/cpu/function.cc index 828c5f94e8..febc4aa41f 100644 --- a/src/xenia/cpu/function.cc +++ b/src/xenia/cpu/function.cc @@ -40,6 +40,7 @@ bool BuiltinFunction::Call(ThreadState* thread_state, uint32_t return_address) { ThreadState* original_thread_state = ThreadState::Get(); if (original_thread_state != thread_state) { + xenia_assert(false); ThreadState::Bind(thread_state); } @@ -47,6 +48,7 @@ bool BuiltinFunction::Call(ThreadState* thread_state, uint32_t return_address) { handler_(thread_state->context(), arg0_, arg1_); if (original_thread_state != thread_state) { + xenia_assert(false); ThreadState::Bind(original_thread_state); } @@ -131,12 +133,14 @@ bool GuestFunction::Call(ThreadState* thread_state, uint32_t return_address) { ThreadState* original_thread_state = ThreadState::Get(); if (original_thread_state != thread_state) { + xenia_assert(false); ThreadState::Bind(thread_state); } bool result = CallImpl(thread_state, return_address); if (original_thread_state != thread_state) { + xenia_assert(false); ThreadState::Bind(original_thread_state); } diff --git a/src/xenia/cpu/hir/hir_builder.cc b/src/xenia/cpu/hir/hir_builder.cc index 00634a5e67..026b0d47d4 100644 --- a/src/xenia/cpu/hir/hir_builder.cc +++ b/src/xenia/cpu/hir/hir_builder.cc @@ -20,7 +20,7 @@ #include "xenia/cpu/hir/instr.h" #include "xenia/cpu/hir/label.h" #include "xenia/cpu/symbol.h" - +#include "xenia/cpu/thread.h" // Will scribble arena memory to hopefully find use before clears. // #define SCRIBBLE_ARENA_ON_RESET @@ -1237,6 +1237,10 @@ void HIRBuilder::ContextBarrier() { AppendInstr(OPCODE_CONTEXT_BARRIER_info, 0); } +Instr* HIRBuilder::CheckInterrupt() { + return AppendInstr(OPCODE_CHECK_INTERRUPT_info, 0); +} + Value* HIRBuilder::LoadMmio(cpu::MMIORange* mmio_range, uint32_t address, TypeName type) { Instr* i = AppendInstr(OPCODE_LOAD_MMIO_info, 0, AllocValue(type)); @@ -1459,7 +1463,7 @@ Value* HIRBuilder::IsTrue(Value* value) { // opcode or special code path but this could be optimized to not require as // many extracts, we can shuffle and or v128 and then extract the low - return CompareEQ(OrLanes32(*this, value), LoadZeroInt32()); + return CompareNE(OrLanes32(*this, value), LoadZeroInt32()); } if (value->IsConstant()) { diff --git a/src/xenia/cpu/hir/hir_builder.h b/src/xenia/cpu/hir/hir_builder.h index b33f18aaf4..cafe345340 100644 --- a/src/xenia/cpu/hir/hir_builder.h +++ b/src/xenia/cpu/hir/hir_builder.h @@ -179,6 +179,7 @@ class HIRBuilder { Value* LoadContext(size_t offset, TypeName type); void StoreContext(size_t offset, Value* value); void ContextBarrier(); + Instr* CheckInterrupt(); Value* LoadMmio(cpu::MMIORange* mmio_range, uint32_t address, TypeName type); void StoreMmio(cpu::MMIORange* mmio_range, uint32_t address, Value* value); diff --git a/src/xenia/cpu/hir/opcodes.h b/src/xenia/cpu/hir/opcodes.h index 5a1bdc53b4..5588fce965 100644 --- a/src/xenia/cpu/hir/opcodes.h +++ b/src/xenia/cpu/hir/opcodes.h @@ -293,6 +293,7 @@ enum Opcode { OPCODE_DELAY_EXECUTION, // for db16cyc OPCODE_RESERVED_LOAD, OPCODE_RESERVED_STORE, + OPCODE_CHECK_INTERRUPT, __OPCODE_MAX_VALUE, // Keep at end. }; @@ -305,7 +306,8 @@ enum OpcodeFlags { OPCODE_FLAG_IGNORE = (1 << 5), OPCODE_FLAG_HIDE = (1 << 6), OPCODE_FLAG_PAIRED_PREV = (1 << 7), - OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING = (1 << 8) + OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING = (1 << 8), + OPCODE_FLAG_R13_BARRIER = (1<<9) }; enum OpcodeSignatureType { diff --git a/src/xenia/cpu/hir/opcodes.inl b/src/xenia/cpu/hir/opcodes.inl index c5c089e852..a30636c14c 100644 --- a/src/xenia/cpu/hir/opcodes.inl +++ b/src/xenia/cpu/hir/opcodes.inl @@ -278,6 +278,12 @@ DEFINE_OPCODE( OPCODE_SIG_X, OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE) +DEFINE_OPCODE( + OPCODE_CHECK_INTERRUPT, + "check_interrupt", + OPCODE_SIG_X, + OPCODE_FLAG_R13_BARRIER|OPCODE_FLAG_VOLATILE) + DEFINE_OPCODE( OPCODE_MAX, "max", diff --git a/src/xenia/cpu/hwclock.cc b/src/xenia/cpu/hwclock.cc new file mode 100644 index 0000000000..1b8b0db4fb --- /dev/null +++ b/src/xenia/cpu/hwclock.cc @@ -0,0 +1,71 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canart. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/hwclock.h" +#include "xenia/base/clock.h" +#include "xenia/base/platform_win.h" +#include "xenia/cpu/processor.h" +namespace xe { +namespace cpu { + +void HWClock::SynchronizeToHostClockInterrupt() {} +void HWClock::WorkerThreadMain() { + SynchronizeToHostClockInterrupt(); + + uint64_t millisecond_frequency = + Clock::host_tick_frequency_platform() / 1000LL; + + uint64_t last_tick_count = Clock::host_tick_count_platform(); + + uint64_t rdtsc_endpoint = Clock::HostTickTimestampToQuickTimestamp( + last_tick_count + millisecond_frequency); + + while (true) { + uint64_t new_value; + while (true) { + new_value = Clock::QueryQuickCounter(); + if (new_value >= rdtsc_endpoint) { + break; + } else { + _mm_pause(); + _mm_pause(); + _mm_pause(); + _mm_pause(); + } + } + last_tick_count = Clock::host_tick_count_platform(); + + rdtsc_endpoint = Clock::HostTickTimestampToQuickTimestamp( + last_tick_count + millisecond_frequency); + + // uint64_t num_interrupts_to_trigger = new_value - last_value; + // last_value = new_value; + + // for (uint64_t i = 0; i < num_interrupts_to_trigger; ++i) { + if (interrupt_callback_) { + interrupt_callback_(processor_); + } + + threading::NanoSleep(1000000 / 2); + //} + } +} +HWClock::HWClock(Processor* processor) : processor_(processor) { + threading::Thread::CreationParameters crparams{}; + crparams.stack_size = 65536; + crparams.initial_priority = threading::ThreadPriority::kBelowNormal; + crparams.create_suspended = true; + timer_thread_ = threading::Thread::Create( + crparams, std::bind(&HWClock::WorkerThreadMain, this)); +} +HWClock::~HWClock() {} + +void HWClock::Start() { timer_thread_->Resume(); } +} // namespace cpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/cpu/hwclock.h b/src/xenia/cpu/hwclock.h new file mode 100644 index 0000000000..3eacf5023d --- /dev/null +++ b/src/xenia/cpu/hwclock.h @@ -0,0 +1,43 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_HWCLOCK_H_ +#define XENIA_CPU_HWCLOCK_H_ + +#include "xenia/base/threading.h" +namespace xe { +namespace cpu { + +class Processor; +#define XE_USE_TIMED_INTERRUPTS_FOR_CLOCK 1 + +// raises the clock interrupt on cpu 0 every 1 millisecond +class HWClock { + void SynchronizeToHostClockInterrupt(); + void WorkerThreadMain(); + + public: + HWClock(Processor* processor); + ~HWClock(); + void SetInterruptCallback(void (*callback)(Processor*)) { + interrupt_callback_ = callback; + } + + void Start(); + private: + Processor* processor_; + std::unique_ptr timer_thread_; + + void (*interrupt_callback_)(Processor*) = nullptr; +}; + +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_HWCLOCK_H_ diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index 4fc2811347..55acdf10f5 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "xenia/base/mutex.h" #include "xenia/base/platform.h" @@ -38,6 +39,7 @@ struct MMIORange { void* callback_context; MMIOReadCallback read; MMIOWriteCallback write; + std::map constant_addresses; }; // NOTE: only one can exist at a time! diff --git a/src/xenia/cpu/ppc/ppc_context.cc b/src/xenia/cpu/ppc/ppc_context.cc index 17a609acbc..81135c7dda 100644 --- a/src/xenia/cpu/ppc/ppc_context.cc +++ b/src/xenia/cpu/ppc/ppc_context.cc @@ -13,8 +13,10 @@ #include #include "xenia/base/assert.h" +#include "xenia/base/atomic.h" #include "xenia/base/string_util.h" - +#include "xenia/cpu/thread.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace cpu { namespace ppc { @@ -194,6 +196,203 @@ bool PPCContext::CompareRegWithString(const char* name, const char* value, } } +static PPCInterruptRequest* SelectInterruptRequest( + PPCContext* context, kernel::X_KPCR* kpcr, + cpu::XenonInterruptController* interrupt_controller) { + std::vector requests{}; + requests.reserve(interrupt_controller->queued_interrupts_.depth()); + + auto xchged = interrupt_controller->queued_interrupts_.Flush(); + + if (xchged) { + auto current = xchged; + PPCInterruptRequest* internal_interrupt = nullptr; + while (current) { + auto ireq = reinterpret_cast(current); + requests.push_back(ireq); + + current = current->next_; + } + + if (requests.size() > 1) { + std::sort( + requests.begin(), requests.end(), + [](PPCInterruptRequest* x, PPCInterruptRequest* y) { + auto prio_x = + cpu::XenonInterruptController::KernelIrqlToInterruptPriority( + x->irql_); + auto prio_y = + cpu::XenonInterruptController::KernelIrqlToInterruptPriority( + y->irql_); + + if (prio_x == prio_y) { + return x->interrupt_serial_number_ < y->interrupt_serial_number_; + } else { + return prio_x < prio_y; + } + }); + } + + for (auto&& ireq : requests) { + // if an internal interrupt is available, immediately select it. i'm not + // confident that it runs before any external interrupt, + // but the only internal interrupt is the decrementer one which is very + // simple and wont mess anything up by running at any point in time. + if (ireq->internal_interrupt_) { + if (ireq->may_run_(context)) { + for (auto&& requeue_request : requests) { + if (requeue_request == ireq) { + continue; + } + interrupt_controller->queued_interrupts_.Push( + &requeue_request->list_entry_); + } + return ireq; + } + } + } + + // reinsert to list + for (size_t i = 1; i < requests.size(); ++i) { + interrupt_controller->queued_interrupts_.Push(&requests[i]->list_entry_); + } + + return requests[0]; + + } else { + return nullptr; + } +} + +XE_NOINLINE +bool PPCContext::ReallyDoInterrupt(PPCContext* context) { + auto kpcr = context->TranslateVirtualGPR(context->r[13]); + auto interrupt_controller = context->GetExternalInterruptController(); + if (interrupt_controller->queued_interrupts_.depth()) { + auto ireq = SelectInterruptRequest(context, kpcr, interrupt_controller); + + auto ireq_deref = *ireq; + bool run_interrupt = true; + if (ireq_deref.may_run_) { + run_interrupt = ireq_deref.may_run_(context); + } + if (run_interrupt) { + if (ireq_deref.wait) { + interrupt_controller->SetEOIWriteMirror(ireq_deref.result_out_); + } + uintptr_t result = ireq_deref.func_(context, &ireq_deref, ireq_deref.ud_); + interrupt_controller->FreeInterruptRequest(ireq); + return true; + } else { + // requeue + interrupt_controller->queued_interrupts_.Push(&ireq->list_entry_); + return false; + } + } + return false; +} + +bool PPCContext::CheckInterrupt() { + auto controller = GetExternalInterruptController(); + CheckTimedInterrupt(); + + if (!controller->queued_interrupts_.depth()) { + return false; + } else { + return ReallyDoInterrupt(this); + } +} +void PPCContext::AssertCurrent() { + xenia_assert(this == cpu::ThreadState::GetContext()); +} + +void PPCContext::TakeGPRSnapshot(PPCGprSnapshot* out) { + swcache::PrefetchW(&out->r[13]); + swcache::PrefetchL1(&this->r[14]); + unsigned i; + for (i = 0; i < 8; ++i) { + out->crs[i] = this->crs[i]; + } + for (i = 0; i < 13; ++i) { + out->r[i] = this->r[i]; + } + // skip r13 + for (i = 14; i < 32; ++i) { + out->r[i - 1] = this->r[i]; + } + out->ctr = this->ctr; + + out->lr = this->lr; + out->msr = this->msr; + + out->xer_ca = xer_ca; + out->xer_ov = xer_ov; + out->xer_so = xer_so; + +#if XE_FULL_CONTEXT_SNAPSHOTS == 1 + out->fpscr = this->fpscr.value; + for (i = 0; i < 32; ++i) { + out->f[i] = this->f[i]; + } + for (i = 0; i < 128; ++i) { + out->v[i] = this->v[i]; + } +#endif +} +void PPCContext::RestoreGPRSnapshot(const PPCGprSnapshot* in) { + swcache::PrefetchW(&this->r[14]); + swcache::PrefetchL1(&in->r[14]); + + unsigned i; + for (i = 0; i < 8; ++i) { + this->crs[i] = in->crs[i]; + } + for (i = 0; i < 13; ++i) { + this->r[i] = in->r[i]; + } + // skip r13 + for (i = 14; i < 32; ++i) { + this->r[i] = in->r[i - 1]; + } + this->ctr = in->ctr; + + this->lr = in->lr; + this->msr = in->msr; + + xer_ca = in->xer_ca; + xer_ov = in->xer_ov; + xer_so = in->xer_so; +#if XE_FULL_CONTEXT_SNAPSHOTS == 1 + this->fpscr.value = in->fpscr; + for (i = 0; i < 32; ++i) { + this->f[i] = in->f[i]; + } + for (i = 0; i < 128; ++i) { + this->v[i] = in->v[i]; + } +#endif +} + +XenonInterruptController* PPCContext::GetExternalInterruptController() { + auto kpcr = this->TranslateVirtualGPR(this->r[13]); + return reinterpret_cast(kpcr->emulated_interrupt); +} + +XE_NOINLINE +void PPCContext::EnqueueTimedInterrupts() { + this->GetExternalInterruptController()->EnqueueTimedInterrupts(); +} +void PPCContext::CheckTimedInterrupt() { + auto eext = GetExternalInterruptController(); + uint64_t cycles = Clock::QueryQuickCounter(); + + if (cycles <= eext->next_event_quick_timestamp_) { + return; + } else { + EnqueueTimedInterrupts(); + } +} + } // namespace ppc } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/ppc/ppc_context.h b/src/xenia/cpu/ppc/ppc_context.h index 55a44fc226..0fbc2e09e7 100644 --- a/src/xenia/cpu/ppc/ppc_context.h +++ b/src/xenia/cpu/ppc/ppc_context.h @@ -15,10 +15,12 @@ #include #include "xenia/base/mutex.h" +#include "xenia/base/threading.h" #include "xenia/base/vec128.h" #include "xenia/guest_pointers.h" namespace xe { namespace cpu { +class XenonInterruptController; class Processor; class ThreadState; } // namespace cpu @@ -31,6 +33,27 @@ namespace xe { namespace cpu { namespace ppc { + #define XE_FULL_CONTEXT_SNAPSHOTS 1 + +struct alignas(64) PPCContext_s; + +#if defined(DEBUG) +#define XE_TRACE_LAST_INTERRUPT_ADDR 1 +#endif +struct PPCInterruptRequest { + threading::AtomicListEntry list_entry_; + bool (*may_run_)(PPCContext_s* in_context); + uintptr_t (*func_)(PPCContext_s* context, PPCInterruptRequest* request, + void* ud); + void* ud_; + uintptr_t* result_out_; + bool wait; + uint8_t irql_; + bool internal_interrupt_; + uint64_t interrupt_serial_number_; + + uintptr_t extra_data_[4]; +}; // Map: // 0-31: GPR // 32-63: FPR @@ -245,84 +268,114 @@ enum class PPCRegister { kVSCR, kCR, }; +struct PPCGprSnapshot { + // snapshot of all gpregs except for r13 + uint32_t crs[8]; + uint64_t r[31]; + uint64_t ctr; + uint64_t lr; + uint64_t msr; + + uint8_t xer_ca; + uint8_t xer_ov; + uint8_t xer_so; +#if XE_FULL_CONTEXT_SNAPSHOTS == 1 + uint32_t fpscr; + double f[32]; + vec128_t v[128];//ouch! +#endif +}; #pragma pack(push, 8) typedef struct alignas(64) PPCContext_s { + // Processor-specific data pointer. Used on callbacks to get access to the + // current runtime and its data. + Processor* processor; + + // Shared kernel state, for easy access from kernel exports. + xe::kernel::KernelState* kernel_state; + unsigned char membase_bit; + bool status_raised; + ThreadState* thread_state() { return reinterpret_cast(this); } union { - uint32_t value; - struct { - uint8_t cr0_lt; // Negative (LT) - result is negative - uint8_t cr0_gt; // Positive (GT) - result is positive (and not zero) - uint8_t cr0_eq; // Zero (EQ) - result is zero or a stwcx/stdcx completed - // successfully - uint8_t cr0_so; // Summary Overflow (SO) - copy of XER[SO] - }; - } cr0; // 0xA24 - union { - uint32_t value; - struct { - uint8_t cr1_fx; // FP exception summary - copy of FPSCR[FX] - uint8_t cr1_fex; // FP enabled exception summary - copy of FPSCR[FEX] - uint8_t - cr1_vx; // FP invalid operation exception summary - copy of FPSCR[VX] - uint8_t cr1_ox; // FP overflow exception - copy of FPSCR[OX] - }; - } cr1; - union { - uint32_t value; - struct { - uint8_t cr2_0; - uint8_t cr2_1; - uint8_t cr2_2; - uint8_t cr2_3; - }; - } cr2; - union { - uint32_t value; - struct { - uint8_t cr3_0; - uint8_t cr3_1; - uint8_t cr3_2; - uint8_t cr3_3; - }; - } cr3; - union { - uint32_t value; - struct { - uint8_t cr4_0; - uint8_t cr4_1; - uint8_t cr4_2; - uint8_t cr4_3; - }; - } cr4; - union { - uint32_t value; - struct { - uint8_t cr5_0; - uint8_t cr5_1; - uint8_t cr5_2; - uint8_t cr5_3; - }; - } cr5; - union { - uint32_t value; - struct { - uint8_t cr6_all_equal; - uint8_t cr6_1; - uint8_t cr6_none_equal; - uint8_t cr6_3; - }; - } cr6; - union { - uint32_t value; struct { - uint8_t cr7_0; - uint8_t cr7_1; - uint8_t cr7_2; - uint8_t cr7_3; + union { + uint32_t value; + struct { + uint8_t cr0_lt; // Negative (LT) - result is negative + uint8_t cr0_gt; // Positive (GT) - result is positive (and not zero) + uint8_t cr0_eq; // Zero (EQ) - result is zero or a stwcx/stdcx + // completed successfully + uint8_t cr0_so; // Summary Overflow (SO) - copy of XER[SO] + }; + } cr0; // 0xA24 + union { + uint32_t value; + struct { + uint8_t cr1_fx; // FP exception summary - copy of FPSCR[FX] + uint8_t cr1_fex; // FP enabled exception summary - copy of FPSCR[FEX] + uint8_t cr1_vx; // FP invalid operation exception summary - copy of + // FPSCR[VX] + uint8_t cr1_ox; // FP overflow exception - copy of FPSCR[OX] + }; + } cr1; + union { + uint32_t value; + struct { + uint8_t cr2_0; + uint8_t cr2_1; + uint8_t cr2_2; + uint8_t cr2_3; + }; + } cr2; + union { + uint32_t value; + struct { + uint8_t cr3_0; + uint8_t cr3_1; + uint8_t cr3_2; + uint8_t cr3_3; + }; + } cr3; + union { + uint32_t value; + struct { + uint8_t cr4_0; + uint8_t cr4_1; + uint8_t cr4_2; + uint8_t cr4_3; + }; + } cr4; + union { + uint32_t value; + struct { + uint8_t cr5_0; + uint8_t cr5_1; + uint8_t cr5_2; + uint8_t cr5_3; + }; + } cr5; + union { + uint32_t value; + struct { + uint8_t cr6_all_equal; + uint8_t cr6_1; + uint8_t cr6_none_equal; + uint8_t cr6_3; + }; + } cr6; + union { + uint32_t value; + struct { + uint8_t cr7_0; + uint8_t cr7_1; + uint8_t cr7_2; + uint8_t cr7_3; + }; + } cr7; }; - } cr7; - + uint32_t crs[8]; + }; union { uint32_t value; struct { @@ -374,12 +427,20 @@ typedef struct alignas(64) PPCContext_s { } fpscr; // Floating-point status and control register // Most frequently used registers first. + union { + uint64_t r[32]; // 0x20 General purpose registers +#if XE_COMPARISON_BUILD + struct { + uint64_t rpad[12]; + uint64_t kpcr; + uint64_t rpad2[19]; + }; +#endif + }; + uint64_t ctr; // 0x18 Count register + uint64_t lr; // 0x10 Link register - uint64_t r[32]; // 0x20 General purpose registers - uint64_t ctr; // 0x18 Count register - uint64_t lr; // 0x10 Link register - - uint64_t msr; //machine state register + uint64_t msr; // machine state register double f[32]; // 0x120 Floating-point registers vec128_t v[128]; // 0x220 VMX128 vector registers @@ -416,33 +477,36 @@ typedef struct alignas(64) PPCContext_s { // Used to shuttle data into externs. Contents volatile. uint64_t scratch; - // Processor-specific data pointer. Used on callbacks to get access to the - // current runtime and its data. - Processor* processor; - - // Shared kernel state, for easy access from kernel exports. - xe::kernel::KernelState* kernel_state; - uint8_t* physical_membase; // Value of last reserved load uint64_t reserved_val; - ThreadState* thread_state; uint8_t* virtual_membase; - + uint32_t raised_status; +#if XE_TRACE_LAST_INTERRUPT_ADDR == 1 + uint64_t recent_interrupt_addr_; +#endif template inline T TranslateVirtual(uint32_t guest_address) XE_RESTRICT const { static_assert(std::is_pointer_v); +#if XE_COMPARISON_BUILD == 1 + return reinterpret_cast(static_cast(guest_address)); +#else #if XE_PLATFORM_WIN32 == 1 - uint8_t* host_address = virtual_membase + guest_address; - if (guest_address >= - static_cast(reinterpret_cast(this))) { - host_address += 0x1000; + if (guest_address) { + uint8_t* host_address = virtual_membase + guest_address; + if (guest_address >= + static_cast(reinterpret_cast(this))) { + host_address += 0x1000; + } + return reinterpret_cast(host_address); + } else { + return nullptr; } - return reinterpret_cast(host_address); #else return processor->memory()->TranslateVirtual(guest_address); +#endif #endif } template @@ -462,18 +526,37 @@ typedef struct alignas(64) PPCContext_s { inline T* TranslateVirtual(TypedGuestPointer guest_address) { return TranslateVirtual(guest_address.m_ptr); } + template + bool IsNull(T* host) { +#if XE_COMPARISON_BUILD == 1 + return !host; +#else + return host == (T*)virtual_membase; +#endif + } + template inline uint32_t HostToGuestVirtual(T* host_ptr) XE_RESTRICT const { +#if XE_COMPARISON_BUILD == 1 + return static_cast(reinterpret_cast(host_ptr)); +#else #if XE_PLATFORM_WIN32 == 1 - uint32_t guest_tmp = static_cast( + uint64_t guest_tmp64 = static_cast( reinterpret_cast(host_ptr) - virtual_membase); - if (guest_tmp >= static_cast(reinterpret_cast(this))) { - guest_tmp -= 0x1000; + uint32_t guest_tmp32 = static_cast(guest_tmp64); + if (static_cast(guest_tmp32) == guest_tmp64) { + if (guest_tmp32 >= + static_cast(reinterpret_cast(this))) { + guest_tmp32 -= 0x1000; + } + return guest_tmp32; + } else { + return 0; } - return guest_tmp; #else return processor->memory()->HostToGuestVirtual( reinterpret_cast(host_ptr)); +#endif #endif } static std::string GetRegisterName(PPCRegister reg); @@ -483,6 +566,43 @@ typedef struct alignas(64) PPCContext_s { void SetRegFromString(const char* name, const char* value); bool CompareRegWithString(const char* name, const char* value, std::string& result) const; + // todo: distinguish between these! + bool RecoverableInterruptsEnabled() { return !!(msr & 2); } + bool ExternalInterruptsEnabled() { return !!(msr & 0x8000); } + + void DisableEI() { msr &= ~0x8000ULL; } + void EnableEI() { msr |= 0x8000ULL; } + + bool CheckInterrupt(); + XE_NOINLINE + static bool ReallyDoInterrupt(PPCContext_s* context); + void TakeGPRSnapshot(PPCGprSnapshot* out); + void RestoreGPRSnapshot(const PPCGprSnapshot* in); + + // assert that the current ppccontext for this thread is this + void AssertCurrent(); + + void AssertInterruptsOff() { xenia_assert(!ExternalInterruptsEnabled()); } + void AssertInterruptsOn() { xenia_assert(ExternalInterruptsEnabled()); } + + // for a very weak emulation of RtlRaiseStatus + void RaiseStatus(uint32_t stat) { + status_raised = true; + raised_status = stat; + } + uint32_t CatchStatus() { + if (!status_raised) { + return 0U; + } else { + status_raised = false; + return raised_status; + } + } + XenonInterruptController* GetExternalInterruptController(); + void CheckTimedInterrupt(); + XE_NOINLINE + void EnqueueTimedInterrupts(); + } PPCContext; #pragma pack(pop) constexpr size_t ppcctx_size = sizeof(PPCContext); diff --git a/src/xenia/cpu/ppc/ppc_emit_altivec.cc b/src/xenia/cpu/ppc/ppc_emit_altivec.cc index 15db03282f..e0adcb6b50 100644 --- a/src/xenia/cpu/ppc/ppc_emit_altivec.cc +++ b/src/xenia/cpu/ppc/ppc_emit_altivec.cc @@ -1393,12 +1393,10 @@ int InstrEmit_vsel128(PPCHIRBuilder& f, const InstrData& i) { // chrispy: this is test code for checking whether a game takes advantage of the // VSR/VSL undocumented/undefined variable shift behavior static void AssertShiftElementsOk(PPCHIRBuilder& f, Value* v) { -#if 0 Value* splatted = f.Splat(f.Extract(v, (uint8_t)0, INT8_TYPE), VEC128_TYPE); Value* checkequal = f.Xor(splatted, v); f.DebugBreakTrue(f.IsTrue(checkequal)); -#endif } int InstrEmit_vsl(PPCHIRBuilder& f, const InstrData& i) { Value* va = f.LoadVR(i.VX.VA); diff --git a/src/xenia/cpu/ppc/ppc_emit_control.cc b/src/xenia/cpu/ppc/ppc_emit_control.cc index 241dcf4af8..ff2dcc0e8d 100644 --- a/src/xenia/cpu/ppc/ppc_emit_control.cc +++ b/src/xenia/cpu/ppc/ppc_emit_control.cc @@ -132,23 +132,23 @@ int InstrEmit_branch(PPCHIRBuilder& f, const char* src, uint64_t cia, #else { #endif - // Jump to pointer. - bool likely_return = !lk && nia_is_lr; - if (likely_return) { - call_flags |= CALL_POSSIBLE_RETURN; - } - if (cond) { - if (!expect_true) { - cond = f.IsFalse(cond); + // Jump to pointer. + bool likely_return = !lk && nia_is_lr; + if (likely_return) { + call_flags |= CALL_POSSIBLE_RETURN; + } + if (cond) { + if (!expect_true) { + cond = f.IsFalse(cond); + } + f.CallIndirectTrue(cond, nia, call_flags); + } else { + f.CallIndirect(nia, call_flags); } - f.CallIndirectTrue(cond, nia, call_flags); - } else { - f.CallIndirect(nia, call_flags); } } -} -return 0; + return 0; } // namespace ppc int InstrEmit_bx(PPCHIRBuilder& f, const InstrData& i) { @@ -789,9 +789,8 @@ int InstrEmit_mtspr(PPCHIRBuilder& f, const InstrData& i) { // code requires it. Sequences of mtmsr/lwar/stcw/mtmsr come up a lot, and // without the lock here threads can livelock. - -//0x400 = debug singlestep i think -//ive seen 0x8000 used in kernel code +// 0x400 = debug singlestep i think +// ive seen 0x8000 used in kernel code int InstrEmit_mfmsr(PPCHIRBuilder& f, const InstrData& i) { // bit 48 = EE; interrupt enabled // bit 62 = RI; recoverable interrupt @@ -806,7 +805,7 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) { } int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) { - //todo: this is moving msr under a mask, so only writing EE and RI + // todo: this is moving msr under a mask, so only writing EE and RI Value* from = f.LoadGPR(i.X.RT); Value* mtmsrd_mask = f.LoadConstantUint64((1ULL << 15)); diff --git a/src/xenia/cpu/ppc/ppc_translator.cc b/src/xenia/cpu/ppc/ppc_translator.cc index 69b0da4e3a..7ce8190c45 100644 --- a/src/xenia/cpu/ppc/ppc_translator.cc +++ b/src/xenia/cpu/ppc/ppc_translator.cc @@ -52,6 +52,9 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) { compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); + // add interrupt checks after control flow simplification + compiler_->AddPass(std::make_unique()); + // Passes are executed in the order they are added. Multiple of the same // pass type may be used. if (validate) compiler_->AddPass(std::make_unique()); diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index ce69c6b705..311e6bdf8c 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -48,6 +48,13 @@ DEFINE_path(trace_function_data_path, "", "File to write trace data to.", "CPU"); DEFINE_bool(break_on_start, false, "Break into the debugger on startup.", "CPU"); +#define XE_DISABLE_RESERVE_IN_HOST_CODE 1 + +DEFINE_bool(use_reserve_in_host_code, false, + "If true try to emulate RESERVE behavior in kernel code, and try " + "to emulate stores cancelling reservations in certain places. If " + "false host atomics are used.", + "CPU"); namespace xe { namespace kernel { @@ -81,7 +88,10 @@ class BuiltinModule : public Module { private: std::string name_; }; - +uint32_t Processor::GetPCRForCPU(uint32_t cpu_num) { + xenia_assert(cpu_num < 6); + return protdata_ + (4096 * cpu_num); +} Processor::Processor(xe::Memory* memory, ExportResolver* export_resolver) : memory_(memory), export_resolver_(export_resolver) {} @@ -153,6 +163,42 @@ bool Processor::Setup(std::unique_ptr backend) { functions_trace_file_ = ChunkedMappedMemoryWriter::Open(functions_trace_path_, 32_MiB, true); } + bool protdata_success = + memory_->LookupHeap(0x801B0000) + ->AllocFixed(0x801B0000, 65536, 65536, + MemoryAllocationFlag::kMemoryAllocationCommit, + MemoryProtectFlag::kMemoryProtectRead | + MemoryProtectFlag::kMemoryProtectWrite); + struct ThreadStacks { + uint32_t stackbase; + uint32_t stack_end; + }; + + static constexpr ThreadStacks stacks_for_idle_threads[] = { + {0x801B7000, 0x801B3000}, + {0x3c08b000, 0x3c087000}, + {0x3c095000, 0x3c091000}, + {0x3c09f000, 0x3c09b000}, + {0x3c0a9000, 0x3c0a5000}, + {0x3c0b3000, 0x3c0af000} + + }; + + protdata_ = 0x801B0000; + + /* + set all processors initial stack to be what cpu 0's ought to be. cpu 0 is + going to assign them their own stacks during the boot process in the kernel + */ + for (unsigned i = 0; i < 6; ++i) { + // idle threadid must be 0 + cpu::ThreadState* processor_idle_state = + ThreadState::Create(this, i, 0x801B7000, GetPCRForCPU(i)); + + hw_threads_.push_back(std::make_unique(i, processor_idle_state)); + } + + hw_clock_ = std::make_unique(this); return true; } @@ -267,7 +313,7 @@ Function* Processor::ResolveFunction(uint32_t address) { entry->status = Entry::STATUS_FAILED; return nullptr; } - //only add it to the list of resolved functions if resolving succeeded + // only add it to the list of resolved functions if resolving succeeded auto module_for = function->module(); auto xexmod = dynamic_cast(module_for); @@ -361,6 +407,21 @@ bool Processor::DemandFunction(Function* function) { return true; } +static void PerformSavegplr(ppc::PPCContext* context) { + + auto std = [context](uint32_t register_, int offset) { + *context->TranslateVirtualGPR(context->r[1] + offset) = + xe::byte_swap(context->r[register_]); + }; + context->r[12] = static_cast(context->lr); + + for (uint32_t i = 14; i < 32; ++i) { + std(i, -(0x98 - ((static_cast(i) - 14) * 8))); + } + *context->TranslateVirtualGPR(context->r[1] - 8) = + xe::byte_swap(static_cast(context->r[12])); +} + bool Processor::Execute(ThreadState* thread_state, uint32_t address) { SCOPE_profile_cpu_f("cpu"); @@ -407,7 +468,7 @@ bool Processor::ExecuteRaw(ThreadState* thread_state, uint32_t address) { } uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address, - uint64_t args[], size_t arg_count) { + uint64_t args[], size_t arg_count, bool raw) { SCOPE_profile_cpu_f("cpu"); auto context = thread_state->context(); @@ -425,9 +486,12 @@ uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address, (uint32_t)args[i + 8]); } } - - if (!Execute(thread_state, address)) { - return 0xDEADBABE; + if (!raw) { + if (!Execute(thread_state, address)) { + return 0xDEADBABE; + } + } else { + ExecuteRaw(thread_state, address); } return context->r[3]; } @@ -478,70 +542,28 @@ void Processor::OnFunctionDefined(Function* function) { } void Processor::OnThreadCreated(uint32_t thread_handle, - ThreadState* thread_state, Thread* thread) { - auto global_lock = global_critical_region_.Acquire(); - auto thread_info = std::make_unique(); - thread_info->thread_handle = thread_handle; - thread_info->thread_id = thread_state->thread_id(); - thread_info->thread = thread; - thread_info->state = ThreadDebugInfo::State::kAlive; - thread_info->suspended = false; - thread_debug_infos_.emplace(thread_info->thread_id, std::move(thread_info)); -} + ThreadState* thread_state, Thread* thread) {} -void Processor::OnThreadExit(uint32_t thread_id) { - auto global_lock = global_critical_region_.Acquire(); - auto it = thread_debug_infos_.find(thread_id); - assert_true(it != thread_debug_infos_.end()); - auto thread_info = it->second.get(); - thread_info->state = ThreadDebugInfo::State::kExited; -} +void Processor::OnThreadExit(uint32_t thread_id) {} -void Processor::OnThreadDestroyed(uint32_t thread_id) { - auto global_lock = global_critical_region_.Acquire(); - auto it = thread_debug_infos_.find(thread_id); - assert_true(it != thread_debug_infos_.end()); - thread_debug_infos_.erase(it); -} +void Processor::OnThreadDestroyed(uint32_t thread_id) {} -void Processor::OnThreadEnteringWait(uint32_t thread_id) { - auto global_lock = global_critical_region_.Acquire(); - auto it = thread_debug_infos_.find(thread_id); - assert_true(it != thread_debug_infos_.end()); - auto thread_info = it->second.get(); - thread_info->state = ThreadDebugInfo::State::kWaiting; -} +void Processor::OnThreadEnteringWait(uint32_t thread_id) {} -void Processor::OnThreadLeavingWait(uint32_t thread_id) { - auto global_lock = global_critical_region_.Acquire(); - auto it = thread_debug_infos_.find(thread_id); - assert_true(it != thread_debug_infos_.end()); - auto thread_info = it->second.get(); - if (thread_info->state == ThreadDebugInfo::State::kWaiting) { - thread_info->state = ThreadDebugInfo::State::kAlive; - } -} +void Processor::OnThreadLeavingWait(uint32_t thread_id) {} std::vector Processor::QueryThreadDebugInfos() { - auto global_lock = global_critical_region_.Acquire(); std::vector result; - for (auto& it : thread_debug_infos_) { - result.push_back(it.second.get()); - } + return result; } ThreadDebugInfo* Processor::QueryThreadDebugInfo(uint32_t thread_id) { - auto global_lock = global_critical_region_.Acquire(); - const auto& it = thread_debug_infos_.find(thread_id); - if (it == thread_debug_infos_.end()) { - return nullptr; - } - return it->second.get(); + return nullptr; } void Processor::AddBreakpoint(Breakpoint* breakpoint) { - auto global_lock = global_critical_region_.Acquire(); + // auto global_lock = global_critical_region_.Acquire(); // Add to breakpoints map. breakpoints_.push_back(breakpoint); @@ -552,7 +574,7 @@ void Processor::AddBreakpoint(Breakpoint* breakpoint) { } void Processor::RemoveBreakpoint(Breakpoint* breakpoint) { - auto global_lock = global_critical_region_.Acquire(); + // auto global_lock = global_critical_region_.Acquire(); // Uninstall (if needed). if (execution_state_ == ExecutionState::kRunning) { @@ -565,7 +587,7 @@ void Processor::RemoveBreakpoint(Breakpoint* breakpoint) { } Breakpoint* Processor::FindBreakpoint(uint32_t address) { - auto global_lock = global_critical_region_.Acquire(); + // auto global_lock = global_critical_region_.Acquire(); for (auto breakpoint : breakpoints_) { if (breakpoint->address() == address) { return breakpoint; @@ -1293,60 +1315,173 @@ uint32_t Processor::CalculateNextGuestInstruction(ThreadDebugInfo* thread_info, } uint32_t Processor::GuestAtomicIncrement32(ppc::PPCContext* context, uint32_t guest_address) { - uint32_t* host_address = context->TranslateVirtual(guest_address); - - uint32_t result; - while (true) { - result = *host_address; - // todo: should call a processor->backend function that acquires a - // reservation instead of using host atomics - if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result + 1), - host_address)) { - break; +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + } while (!backend()->ReservedStore32(context, guest_address, result + 1)); + return result; + } else +#endif + { + uint32_t* host_address = + context->TranslateVirtual(guest_address); + + uint32_t result; + while (true) { + result = *host_address; + // todo: should call a processor->backend function that acquires a + // reservation instead of using host atomics + if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) + 1), + host_address)) { + break; + } } + return xe::byte_swap(result); } - return result; } uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context, uint32_t guest_address) { - uint32_t* host_address = context->TranslateVirtual(guest_address); - - uint32_t result; - while (true) { - result = *host_address; - // todo: should call a processor->backend function that acquires a - // reservation instead of using host atomics - if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result - 1), - host_address)) { - break; +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + } while (!backend()->ReservedStore32(context, guest_address, result - 1)); + return result; + } else +#endif + { + uint32_t* host_address = + context->TranslateVirtual(guest_address); + + uint32_t result; + while (true) { + result = *host_address; + // todo: should call a processor->backend function that acquires a + // reservation instead of using host atomics + if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) - 1), + host_address)) { + break; + } } + return xe::byte_swap(result); } - return result; } uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address, uint32_t mask) { - return xe::atomic_or( - context->TranslateVirtual(guest_address), - xe::byte_swap(mask)); +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + } while ( + !backend()->ReservedStore32(context, guest_address, result | mask)); + return result; + } else +#endif + { + return xe::byte_swap(xe::atomic_or( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask))); + } } uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address, uint32_t mask) { - return xe::atomic_xor( - context->TranslateVirtual(guest_address), - xe::byte_swap(mask)); +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + } while ( + !backend()->ReservedStore32(context, guest_address, result ^ mask)); + return result; + } else +#endif + { + return xe::byte_swap(xe::atomic_xor( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask))); + } } uint32_t Processor::GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address, uint32_t mask) { - return xe::atomic_and( - context->TranslateVirtual(guest_address), - xe::byte_swap(mask)); +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + } while ( + !backend()->ReservedStore32(context, guest_address, result & mask)); + return result; + } else +#endif + { + return xe::byte_swap(xe::atomic_and( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask))); + } } bool Processor::GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, uint32_t new_value, uint32_t guest_address) { - return xe::atomic_cas(xe::byte_swap(old_value), xe::byte_swap(new_value), - context->TranslateVirtual(guest_address)); +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + do { + result = backend()->ReservedLoad32(context, guest_address); + if (result != old_value) { + // guests seem to do this, presumably its to end the reservation? + // what would be the disadvantage to leaving a reserve hanging? + backend()->ReservedStore32(context, guest_address, result); + return false; + } + } while (!backend()->ReservedStore32(context, guest_address, new_value)); + return true; + } else +#endif + { + return xe::atomic_cas(xe::byte_swap(old_value), xe::byte_swap(new_value), + context->TranslateVirtual(guest_address)); + } +} + +uint32_t Processor::GuestAtomicExchange32(ppc::PPCContext* context, + void* guest_address, + uint32_t new_value) { +#if !XE_DISABLE_RESERVE_IN_HOST_CODE + if (cvars::use_reserve_in_host_code) { + uint32_t result; + uint32_t gaddr = context->HostToGuestVirtual(guest_address); + do { + result = backend()->ReservedLoad32(context, gaddr); + } while (!backend()->ReservedStore32(context, gaddr, new_value)); + return result; + } else +#endif + { + return xe::byte_swap(xe::atomic_exchange(xe::byte_swap(new_value), + (uint32_t*)guest_address)); + } +} +void Processor::NotifyHWThreadBooted(uint32_t i) { + xe::atomic_inc(&num_booted_hwthreads_); +} +bool Processor::AllHWThreadsBooted() { return num_booted_hwthreads_ == 6; } + +void Processor::Suspend() { + xenia_assert(AllHWThreadsBooted()); + for (auto&& thread : hw_threads_) { + thread->Suspend(); + } +} +// resumes all hw threads +void Processor::Resume() { + for (auto&& thread : hw_threads_) { + thread->Resume(); + } } } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index 782d7e52b6..4143bcc2dd 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -23,6 +23,7 @@ #include "xenia/cpu/entry_table.h" #include "xenia/cpu/export_resolver.h" #include "xenia/cpu/function.h" +#include "xenia/cpu/hwclock.h" #include "xenia/cpu/module.h" #include "xenia/cpu/ppc/ppc_frontend.h" #include "xenia/cpu/thread_debug_info.h" @@ -30,7 +31,7 @@ #include "xenia/memory.h" DECLARE_bool(debug); - +DECLARE_bool(use_reserve_in_host_code); namespace xe { namespace cpu { @@ -40,13 +41,6 @@ class Breakpoint; class StackWalker; class XexModule; -enum class Irql : uint32_t { - PASSIVE = 0, - APC = 1, - DISPATCH = 2, - DPC = 3, -}; - // Describes the current state of the emulator as known to the debugger. // This determines which state the debugger is in and what operations are // allowed. @@ -99,7 +93,7 @@ class Processor { void set_debug_info_flags(uint32_t debug_info_flags) { debug_info_flags_ = debug_info_flags; } - + uint32_t GetPCRForCPU(uint32_t cpu_num); bool AddModule(std::unique_ptr module); void RemoveModule(const std::string_view name); Module* GetModule(const std::string_view name); @@ -122,7 +116,7 @@ class Processor { bool Execute(ThreadState* thread_state, uint32_t address); bool ExecuteRaw(ThreadState* thread_state, uint32_t address); uint64_t Execute(ThreadState* thread_state, uint32_t address, uint64_t args[], - size_t arg_count); + size_t arg_count, bool raw = false); bool Save(ByteStream* stream); bool Restore(ByteStream* stream); @@ -188,14 +182,31 @@ class Processor { uint32_t guest_address); uint32_t GuestAtomicDecrement32(ppc::PPCContext* context, uint32_t guest_address); + uint32_t GuestAtomicIncrement32(ppc::PPCContext* context, + void* guest_address) { + return GuestAtomicIncrement32(context, + context->HostToGuestVirtual(guest_address)); + } + uint32_t GuestAtomicDecrement32(ppc::PPCContext* context, + void* guest_address) { + return GuestAtomicDecrement32(context, + context->HostToGuestVirtual(guest_address)); + } uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address, - uint32_t mask); + uint32_t mask); uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address, - uint32_t mask); + uint32_t mask); uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address, - uint32_t mask); + uint32_t mask); bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, uint32_t new_value, uint32_t guest_address); + bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, + uint32_t new_value, void* guest_address) { + return GuestAtomicCAS32(context, old_value, new_value, + context->HostToGuestVirtual(guest_address)); + } + uint32_t GuestAtomicExchange32(ppc::PPCContext* context, void* guest_address, + uint32_t new_value); public: // TODO(benvanik): hide. @@ -211,6 +222,27 @@ class Processor { uint8_t* AllocateFunctionTraceData(size_t size); + void DirectlyInsertFunction(uint32_t address, Function* function) { + Entry* ent = nullptr; + entry_table_.GetOrCreate(address, &ent); + ent->function = function; + ent->address = function->address(); + ent->end_address = function->end_address(); + ent->status = Entry_t::STATUS_READY; + } + + HWThread* GetCPUThread(uint32_t cpu_num) { + return hw_threads_[cpu_num].get(); + } + + HWClock* GetHWClock() { return hw_clock_.get(); } + void NotifyHWThreadBooted(uint32_t i); + bool AllHWThreadsBooted(); + + //suspends all hw threads + void Suspend(); + //resumes all hw threads + void Resume(); private: // Synchronously demands a debug listener. void DemandDebugListener(); @@ -255,6 +287,11 @@ class Processor { std::function debug_listener_handler_; DebugListener* debug_listener_ = nullptr; + // location of the pcr pages + uint32_t protdata_ = 0; + + std::vector> hw_threads_; + // Which debug features are enabled in generated code. uint32_t debug_info_flags_ = 0; // If specified, the file trace data gets written to when running. @@ -279,7 +316,9 @@ class Processor { // TODO(benvanik): cleanup/change structures. std::vector breakpoints_; - Irql irql_; + std::unique_ptr hw_clock_; + + volatile uint32_t num_booted_hwthreads_ = 0; }; } // namespace cpu diff --git a/src/xenia/cpu/thread.cc b/src/xenia/cpu/thread.cc index 5cc9736f17..2268e9d4ba 100644 --- a/src/xenia/cpu/thread.cc +++ b/src/xenia/cpu/thread.cc @@ -8,8 +8,25 @@ */ #include "xenia/cpu/thread.h" +#include "xenia/base/atomic.h" +#include "xenia/cpu/processor.h" #include "xenia/cpu/thread_state.h" +#include "xenia/kernel/kernel_guest_structures.h" +#include "xenia/kernel/kernel_state.h" +DEFINE_bool(threads_aint_cheap, false, "For people with < 8 hardware threads", + "CPU"); + +DEFINE_bool(enable_cpu_timing_fences, false, + "If true, introduce artificial delays to try to better match " + "original cpu/kernel timing", + "CPU"); + +DEFINE_bool( + no_idle_sleeping_for_hw_threads, false, + "If true, do not make the os thread sleep when a hw thread has no work. " + "Reduces latency for interrupts at the cost of much higher cpu usage.", + "CPU"); namespace xe { namespace cpu { @@ -22,7 +39,328 @@ bool Thread::IsInThread() { return current_thread_ != nullptr; } Thread* Thread::GetCurrentThread() { return current_thread_; } uint32_t Thread::GetCurrentThreadId() { - return Thread::GetCurrentThread()->thread_state()->thread_id(); + return cpu::ThreadState::GetContext()->thread_id; +} + +bool HWThread::HandleInterrupts() { return false; } + +void HWThread::RunRunnable(RunnableThread* runnable) { + runnable->fiber_->SwitchTo(); +} + +void HWThread::RunIdleProcess() { + if (idle_process_function_) { + idle_process_function_(cpu::ThreadState::Get()->context()); + } +} + +// thread_local HWThread* this_hw_thread = nullptr; + +HWThread* this_hw_thread(ppc::PPCContext* context) { + return context->processor->GetCPUThread((context->r[13] >> 12) & 0x7); +} +void HWThread::ThreadFunc() { + if (cpu_number_) { + // synchronize to cpu 0 timebase + + uint64_t tbtime = mftb(); + uint64_t systemtime = Clock::QueryHostSystemTime(); + + // estimate from difference in systemtime what cpu0's timestamp counter + // currently looks like + uint64_t systemtime_delta = systemtime - mftb_cycle_sync_systemtime_; + constexpr double HUNDREDNANOSECOND_TO_SECOND = 1e-7; + + constexpr double RESCALE_SYSTIME = + static_cast(TIMEBASE_FREQUENCY) * HUNDREDNANOSECOND_TO_SECOND; + + uint64_t current_cpu0_timebase = + mftb_cycle_sync_ + + static_cast( + round(RESCALE_SYSTIME * static_cast(systemtime_delta))); + + if (current_cpu0_timebase > tbtime) { + mftb_delta_sign_ = false; + mftb_delta_ = current_cpu0_timebase - tbtime; + } else { + mftb_delta_sign_ = true; + mftb_delta_ = tbtime - current_cpu0_timebase; + } + } + threading::set_current_thread_id(this->cpu_number_); + interrupt_controller()->Initialize(); + idle_process_fiber_ = threading::Fiber::CreateFromThread(); + cpu::ThreadState::Bind(idle_process_threadstate_); + + if (boot_function_) { + boot_function_(idle_process_threadstate_->context(), boot_ud_); + } + + ready_ = true; + idle_process_threadstate_->context()->processor->NotifyHWThreadBooted( + cpu_number_); + + while (true) { + RunIdleProcess(); + } +} + +HWThread::HWThread(uint32_t cpu_number, cpu::ThreadState* thread_state) + : cpu_number_(cpu_number), + idle_process_threadstate_(thread_state), + runnable_thread_list_() { + threading::Thread::CreationParameters params; + params.create_suspended = true; + params.initial_priority = threading::ThreadPriority::kBelowNormal; + params.stack_size = 16 * 1024 * 1024; + + os_thread_ = + threading::Thread::Create(params, std::bind(&HWThread::ThreadFunc, this)); + + if (!cvars::threads_aint_cheap) { + os_thread_->set_affinity_mask(1ULL << cpu_number_); + } + + os_thread_->set_name(std::string("PPC HW Thread ") + + std::to_string(cpu_number)); + + os_thread_->is_ppc_thread_ = true; + interrupt_controller_ = std::make_unique( + this, thread_state->context()->processor); + host_thread_id_ = os_thread_->system_id(); + wake_idle_event_ = threading::Event::CreateAutoResetEvent(false); +} +HWThread::~HWThread() { + xenia_assert(false); // dctor not implemented yet +} + +void HWThread::EnqueueRunnableThread(RunnableThread* rth) { + rth->list_entry_.next_ = nullptr; + runnable_thread_list_.Push(&rth->list_entry_); +} + +void HWThread::YieldToScheduler() { + xenia_assert(cpu::ThreadState::Get() != idle_process_threadstate_); + xenia_assert(threading::Fiber::GetCurrentFiber() != + this->idle_process_fiber_.get()); + // cpu::ThreadState::Bind(idle_process_threadstate_); + this->idle_process_fiber_->SwitchTo(); +} + +struct GuestInterruptWrapper { + void (*ipi_func)(void*); + void* ud; + HWThread* thiz; + bool internal_; +}; +// todo: handle interrupt controller/irql shit, that matters too +// theres a special mmio region 0x7FFF (or 0xFFFF, cant tell) +static bool may_run_interrupt_proc(ppc::PPCContext_s* context) { + return context->ExternalInterruptsEnabled() && + this_hw_thread(context)->interrupt_controller()->GetEOI() != 0; +} +static bool internal_may_run_interrupt_proc(ppc::PPCContext_s* context) { + // despite not using the external interrupt controller, EI still controls + // whether the decrementer interrupt happens + return context->ExternalInterruptsEnabled(); +} + +uintptr_t HWThread::IPIWrapperFunction(ppc::PPCContext_s* context, + ppc::PPCInterruptRequest* request, + void* ud) { + auto interrupt_wrapper = reinterpret_cast(ud); + + ppc::PPCGprSnapshot snap; + context->TakeGPRSnapshot(&snap); + if (!interrupt_wrapper->internal_) { // is it an external interrupt? most are + auto kpcr = context->TranslateVirtualGPR(context->r[13]); + + bool cr2 = kpcr->use_alternative_stack == 0; + + auto old_irql = kpcr->current_irql; + bool cr3; + context->DisableEI(); + if (cr2) { + cr3 = 1 < old_irql; + if (!cr3) { + kpcr->current_irql = kernel::IRQL_DISPATCH; + } + kpcr->use_alternative_stack = kpcr->alt_stack_base_ptr; + context->r[1] = kpcr->alt_stack_base_ptr; + } + this_hw_thread(context)->interrupt_controller()->SetEOI(0); + + interrupt_wrapper->ipi_func(interrupt_wrapper->ud); + this_hw_thread(context)->interrupt_controller()->SetEOI(1); + // xenia_assert(interrupt_wrapper->thiz->interrupt_controller()->GetEOI()); + kpcr = context->TranslateVirtualGPR(context->r[13]); + + context->RestoreGPRSnapshot(&snap); + + if (cr2) { + kpcr->use_alternative_stack = 0; + if (!cr3) { + context->kernel_state->GenericExternalInterruptEpilog(context, + old_irql); + } + } + } else { + // internal interrupt, does not get dispatched the same way + interrupt_wrapper->ipi_func(interrupt_wrapper->ud); + context->RestoreGPRSnapshot(&snap); + } + context->AssertInterruptsOn(); + return 2; +} + +void HWThread::ThreadDelay() { + if (cvars::threads_aint_cheap) { + threading::MaybeYield(); + } else { + _mm_pause(); + } +} +bool HWThread::TrySendInterruptFromHost(SendInterruptArguments& arguments) { + auto ipi_func = arguments.ipi_func; + auto ud = arguments.ud; + auto wait_done = arguments.wait_done; + ppc::PPCInterruptRequest* request = + this->interrupt_controller()->AllocateInterruptRequest(); + GuestInterruptWrapper* wrapper = + reinterpret_cast(&request->extra_data_[0]); + + wrapper->ipi_func = ipi_func; + wrapper->ud = ud; + wrapper->thiz = this; + wrapper->internal_ = arguments.internal_interrupt_; + + // ipi wrapper returns 0 if current context has interrupts disabled + volatile uintptr_t result_from_call = 0; + + request->func_ = IPIWrapperFunction; + request->ud_ = (void*)wrapper; + request->may_run_ = arguments.internal_interrupt_ + ? internal_may_run_interrupt_proc + : may_run_interrupt_proc; + request->result_out_ = (uintptr_t*)&result_from_call; + request->wait = wait_done; + request->interrupt_serial_number_ = + this->interrupt_controller()->interrupt_serial_number_++; + request->internal_interrupt_ = arguments.internal_interrupt_; + request->irql_ = arguments.irql_; + this->interrupt_controller()->queued_interrupts_.Push(&request->list_entry_); + if (!cvars::no_idle_sleeping_for_hw_threads) { + auto context = cpu::ThreadState::GetContext(); + if (!context || this_hw_thread(context) != this) { + this->wake_idle_event_->Set(); + } + } + if (!wait_done) { + return true; + } else { + while (result_from_call != 2) { + ThreadDelay(); + } + return true; + } + + return true; +} +bool HWThread::SendGuestIPI(SendInterruptArguments& arguments) { + // todo: pool this structure! + return TrySendInterruptFromHost(arguments); +} + +void HWThread::DecrementerInterruptEnqueueProc( + XenonInterruptController* controller, uint32_t slot, void* ud) { + auto thiz = reinterpret_cast(ud); + + cpu::SendInterruptArguments interrupt_arguments{}; + interrupt_arguments.ipi_func = thiz->decrementer_interrupt_callback_; + interrupt_arguments.ud = thiz->decrementer_ud_; + interrupt_arguments.wait_done = false; + interrupt_arguments.irql_ = 0; + interrupt_arguments.internal_interrupt_ = true; + thiz->SendGuestIPI(interrupt_arguments); + + controller->FreeTimedInterruptSlot(slot); + thiz->decrementer_interrupt_slot_ = ~0u; +} +void HWThread::SetDecrementerTicks(int32_t ticks) { + if (decrementer_interrupt_slot_ != ~0u) { + interrupt_controller()->FreeTimedInterruptSlot(decrementer_interrupt_slot_); + decrementer_interrupt_slot_ = ~0u; + } + // 0x7FFFFFFF just means cancel + if (ticks != 0x7FFFFFFF) { + double wait_time_in_microseconds = + (static_cast(ticks) / static_cast(TIMEBASE_FREQUENCY)) * + 1000000.0; + + CpuTimedInterrupt cti; + cti.destination_microseconds_ = + interrupt_controller()->CreateRelativeUsTimestamp( + static_cast(wait_time_in_microseconds)); + + cti.ud_ = this; + cti.enqueue_ = DecrementerInterruptEnqueueProc; + + decrementer_interrupt_slot_ = + interrupt_controller()->AllocateTimedInterruptSlot(); + + interrupt_controller()->SetTimedInterruptArgs(decrementer_interrupt_slot_, + &cti); + } + interrupt_controller()->RecomputeNextEventCycles(); +} +void HWThread::SetDecrementerInterruptCallback(void (*decr)(void* ud), + void* ud) { + decrementer_interrupt_callback_ = decr; + decrementer_ud_ = ud; +} +void HWThread::IdleSleep(int64_t nanoseconds) { + if (!cvars::no_idle_sleeping_for_hw_threads) { + threading::NanoWait(wake_idle_event_.get(), false, nanoseconds); + } +} + +uint64_t HWThread::mftb() const { + // need to rescale to TIMEBASE_FREQUENCY + + long long freq = Clock::host_tick_frequency_platform(); + + long long counter = Clock::host_tick_count_platform(); + unsigned long long rem = 0; + + unsigned long long ratio = (49875000ULL << 32) / static_cast(freq); + + unsigned long long result_low = (ratio * counter) >> 32; + + unsigned long long result_high = __umulh(ratio, counter); + + unsigned long long result = result_low | (result_high << 32); + if (mftb_delta_sign_) { + return result - mftb_delta_; + } else { + return result + mftb_delta_; + } +} + +void HWThread::Suspend() { os_thread_->Suspend(); } +void HWThread::Resume() { os_thread_->Resume(); } + +MFTBFence::MFTBFence(uint64_t timebase_cycles) + : desired_timebase_value_( + timebase_cycles + this_hw_thread(ThreadState::GetContext())->mftb()) { +} +MFTBFence::~MFTBFence() { + auto context = ThreadState::GetContext(); + auto hwthread = this_hw_thread(context); + if (cvars::enable_cpu_timing_fences) { + while (hwthread->mftb() < desired_timebase_value_) { + context->CheckInterrupt(); + } + } } } // namespace cpu diff --git a/src/xenia/cpu/thread.h b/src/xenia/cpu/thread.h index 4878e9725d..20cd0adec2 100644 --- a/src/xenia/cpu/thread.h +++ b/src/xenia/cpu/thread.h @@ -9,14 +9,15 @@ #ifndef XENIA_CPU_THREAD_H_ #define XENIA_CPU_THREAD_H_ - +#include "xenia/base/cvar.h" #include "xenia/base/threading.h" #include - +#include "xenia/cpu/ppc/ppc_context.h" +#include "xenia/cpu/xenon_interrupt_controller.h" +DECLARE_bool(threads_aint_cheap); namespace xe { namespace cpu { - class ThreadState; // Represents a thread that runs guest code. @@ -49,6 +50,171 @@ class Thread { std::string thread_name_; }; +struct RunnableThread { + threading::AtomicListEntry list_entry_; + + threading::Fiber* fiber_; + cpu::ThreadState* thread_state_; + uint32_t kthread_; +}; +class HWThread; + +/* + decrementer interrupt handler + runs at the same rate as the timebase + + + according to KeQueryPerformanceFrequency, there are 50000000 timebase ticks + per second + + the normal decrementer interrupt does 0x7FFFFFFF ticks + that means that the decrementer has an interval of 42 seconds? that cant be + right + + xeSelectThreadDueToTimesliceExpiration sets the decrementer to 50000, which + is 1 millisecond + + this makes more sense: the decrementer signals the end of the timeslice. in + the interrupt, it gets set to an impossibly large value so it wont trigger + again. the kernel isnt setting it because it wants it to take 42 seconds to + trigger. so lets just treat 0x7FFFFFFF as a special value that disables the + decrementer + + +*/ +// this figure comes courtesy of libxenon. turns out 50mhz was not the real +// frequency, so i wonder where we got that figure from +static constexpr uint64_t TIMEBASE_FREQUENCY = 49875000ULL; + +static constexpr int32_t DECREMENTER_DISABLE = 0x7FFFFFFF; + +struct SendInterruptArguments { + void (*ipi_func)(void*); + + void* ud; + bool wait_done = false; + //means the interrupt actually does not use the external interrupt codepath (decrementer does this) + //currently does nothing + bool internal_interrupt_ = false; + uint8_t irql_ = 4; +}; + +class HWThread { + void ThreadFunc(); + + bool HandleInterrupts(); + + void RunRunnable(RunnableThread* runnable); + // dpcs? + void RunIdleProcess(); + + static uintptr_t IPIWrapperFunction(ppc::PPCContext_s* context, + ppc::PPCInterruptRequest* request, + void* ud); + volatile bool ready_ = false; + std::unique_ptr os_thread_; + + std::unique_ptr idle_process_fiber_; + + cpu::ThreadState* idle_process_threadstate_; + uint32_t cpu_number_; + + threading::AtomicListHeader runnable_thread_list_; + + RunnableThread* last_run_thread_ = nullptr; + + // set by kernel + void (*idle_process_function_)(ppc::PPCContext* context) = nullptr; + + void (*boot_function_)(ppc::PPCContext* context, void* ud) = nullptr; + void* boot_ud_ = nullptr; + std::unique_ptr interrupt_controller_; + + void (*external_interrupt_handler_)(cpu::ppc::PPCContext* context, + XenonInterruptController* controller); + + uint32_t decrementer_interrupt_slot_ = ~0u; + + void (*decrementer_interrupt_callback_)(void* ud); + void* decrementer_ud_; + + uint32_t host_thread_id_; + static void DecrementerInterruptEnqueueProc( + XenonInterruptController* controller, uint32_t slot, void* ud); + + std::unique_ptr wake_idle_event_; + + uint64_t mftb_cycle_sync_; + uint64_t mftb_cycle_sync_systemtime_; + //we do a sort of 65-bit counter here + bool mftb_delta_sign_ = 0; + uint64_t mftb_delta_ = 0; + public: + HWThread(uint32_t cpu_number, cpu::ThreadState* thread_state); + ~HWThread(); + + uint32_t cpu_number() const { return cpu_number_; } + + void SetBootFunction(void (*f)(ppc::PPCContext*, void*), void* ud) { + boot_function_ = f; + boot_ud_ = ud; + } + bool HasBooted() { return ready_; } + + void SetDecrementerTicks(int32_t ticks); + void SetDecrementerInterruptCallback(void (*decr)(void* ud), void* ud); + + static void ThreadDelay(); + void SetExternalInterruptHandler(void (*handler)( + cpu::ppc::PPCContext* context, XenonInterruptController* controller)) { + external_interrupt_handler_ = handler; + } + + void _CallExternalInterruptHandler(cpu::ppc::PPCContext* context, + XenonInterruptController* controller) { + if (external_interrupt_handler_) { + external_interrupt_handler_(context, controller); + } + } + + void SetIdleProcessFunction( + void (*idle_process_function)(ppc::PPCContext* context)) { + idle_process_function_ = idle_process_function; + } + + void Boot() { os_thread_->Resume(); } + + void EnqueueRunnableThread(RunnableThread* rth); + + void YieldToScheduler(); + + bool TrySendInterruptFromHost(SendInterruptArguments& arguments); + + void IdleSleep(int64_t nanoseconds); + + void Suspend(); + void Resume(); + + uint64_t mftb() const; + + void SetCycleSync(uint64_t timebase_cpu0, uint64_t systemtime) { + mftb_cycle_sync_ = timebase_cpu0; + mftb_cycle_sync_systemtime_ = systemtime; + } + // SendGuestIPI is designed to run on a guest thread + // it ought to be nonblocking, unlike TrySendHostIPI + bool SendGuestIPI(SendInterruptArguments& arguments); + XenonInterruptController* interrupt_controller() { + return interrupt_controller_.get(); + } +}; + +struct MFTBFence { + const uint64_t desired_timebase_value_; + MFTBFence(uint64_t average_timebase_cycles); + ~MFTBFence(); +}; + } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/thread_state.cc b/src/xenia/cpu/thread_state.cc index 6083d7e9da..618d6a64f5 100644 --- a/src/xenia/cpu/thread_state.cc +++ b/src/xenia/cpu/thread_state.cc @@ -18,10 +18,48 @@ #include "xenia/cpu/processor.h" #include "xenia/xbox.h" +// #define THREADSTATE_USE_TEB +#define THREADSTATE_USE_FLS namespace xe { namespace cpu { +#if defined(THREADSTATE_USE_TEB) +#elif defined(THREADSTATE_USE_FLS) +static threading::TlsHandle g_context_fls_handle = threading::kInvalidTlsHandle; +struct initialize_fls_handle_t { + initialize_fls_handle_t() { + g_context_fls_handle = threading::AllocateFlsHandle(); + } +} fls_handle_initializer; + +#else thread_local ThreadState* thread_state_ = nullptr; +#endif + +#define TEB_OFFSET_CONTEXT 0x100 + +#if defined(THREADSTATE_USE_TEB) +static ppc::PPCContext* CurrentContext() { + return reinterpret_cast(__readgsqword(TEB_OFFSET_CONTEXT)); +} + +static void SetCurrentContext(ppc::PPCContext* context) { + __writegsqword(TEB_OFFSET_CONTEXT, reinterpret_cast(context)); +} + +#elif defined(THREADSTATE_USE_FLS) +static ppc::PPCContext* CurrentContext() { + return reinterpret_cast( + threading::GetFlsValue(g_context_fls_handle)); +} + +static void SetCurrentContext(ppc::PPCContext* context) { + threading::SetFlsValue(g_context_fls_handle, + reinterpret_cast(context)); +} +#else + +#endif static void* AllocateContext() { size_t granularity = xe::memory::allocation_granularity(); @@ -62,68 +100,101 @@ static void FreeContext(void* ctx) { memory::DeallocationType::kRelease); } -ThreadState::ThreadState(Processor* processor, uint32_t thread_id, - uint32_t stack_base, uint32_t pcr_address) - : processor_(processor), - memory_(processor->memory()), - thread_id_(thread_id) { - if (thread_id_ == UINT_MAX) { +ThreadState* ThreadState::Create(Processor* processor, uint32_t thread_id, + uint32_t stack_base, uint32_t pcr_address) { + // return new ThreadState(processor, thread_id, stack_base, pcr_address); + if (thread_id == UINT_MAX) { // System thread. Assign the system thread ID with a high bit // set so people know what's up. uint32_t system_thread_handle = xe::threading::current_thread_system_id(); - thread_id_ = 0x80000000 | system_thread_handle; + thread_id = 0x80000000 | system_thread_handle; } - backend_data_ = processor->backend()->AllocThreadData(); // Allocate with 64b alignment. - context_ = reinterpret_cast( - AllocateContext()); + auto context_ = reinterpret_cast(AllocateContext()); processor->backend()->InitializeBackendContext(context_); assert_true(((uint64_t)context_ & 0x3F) == 0); std::memset(context_, 0, sizeof(ppc::PPCContext)); // Stash pointers to common structures that callbacks may need. context_->global_mutex = &xe::global_critical_region::mutex(); - context_->virtual_membase = memory_->virtual_membase(); - context_->physical_membase = memory_->physical_membase(); - context_->processor = processor_; - context_->thread_state = this; - context_->thread_id = thread_id_; + auto memory = processor->memory(); + + context_->virtual_membase = memory->virtual_membase(); + context_->membase_bit = memory->membase_bit(); + context_->physical_membase = memory->physical_membase(); + context_->processor = processor; + context_->thread_id = thread_id; // Set initial registers. context_->r[1] = stack_base; + + //constant register, used by hv only i think + context_->r[2] = 0x20000000; + context_->r[13] = pcr_address; // fixme: VSCR must be set here! context_->msr = 0x9030; // dumped from a real 360, 0x8000 - //this register can be used for arbitrary data according to the PPC docs - //but the suggested use is to mark which vector registers are in use, for faster save/restore - //it seems unlikely anything uses this, especially since we have way more than 32 vrs, but setting it to all ones seems closer to correct than 0 + // this register can be used for arbitrary data according to the PPC docs + // but the suggested use is to mark which vector registers are in use, for + // faster save/restore it seems unlikely anything uses this, especially since + // we have way more than 32 vrs, but setting it to all ones seems closer to + // correct than 0 context_->vrsave = ~0u; + return reinterpret_cast(context_); } ThreadState::~ThreadState() { - if (backend_data_) { - processor_->backend()->FreeThreadData(backend_data_); - } +#if !defined(THREADSTATE_USE_TEB) && !defined(THREADSTATE_USE_FLS) if (thread_state_ == this) { thread_state_ = nullptr; } - if (context_) { - processor_->backend()->DeinitializeBackendContext(context_); - FreeContext(reinterpret_cast(context_)); +#else + auto cc = CurrentContext(); + if (cc && cc->thread_state() == this) { + SetCurrentContext(nullptr); + } +#endif + if (context()) { + context()->processor->backend()->DeinitializeBackendContext(context()); } } +void ThreadState::operator delete(void* vp) { FreeContext(vp); } + void ThreadState::Bind(ThreadState* thread_state) { +#if defined(THREADSTATE_USE_TEB) || defined(THREADSTATE_USE_FLS) + SetCurrentContext(thread_state->context()); +#else thread_state_ = thread_state; +#endif +} +XE_NOALIAS +ppc::PPCContext* ThreadState::GetContext() { +#if defined(THREADSTATE_USE_TEB) || defined(THREADSTATE_USE_FLS) + return CurrentContext(); +#else + return thread_state_ ? thread_state_->context() : nullptr; +#endif +} +ThreadState* ThreadState::Get() { +#if defined(THREADSTATE_USE_TEB) || defined(THREADSTATE_USE_FLS) + auto context = CurrentContext(); + if (context) { + return context->thread_state(); + } + return nullptr; +#else + return thread_state_; +#endif } - -ThreadState* ThreadState::Get() { return thread_state_; } uint32_t ThreadState::GetThreadID() { - return thread_state_ ? thread_state_->thread_id_ : 0xFFFFFFFF; + auto ctx = ThreadState::GetContext(); + + return ctx ? ctx->thread_id : 0xFFFFFFFF; } } // namespace cpu diff --git a/src/xenia/cpu/thread_state.h b/src/xenia/cpu/thread_state.h index 3b7a7c01d9..ff2e54192c 100644 --- a/src/xenia/cpu/thread_state.h +++ b/src/xenia/cpu/thread_state.h @@ -23,30 +23,24 @@ class Processor; class ThreadState { public: - ThreadState(Processor* processor, uint32_t thread_id, uint32_t stack_base = 0, - uint32_t pcr_address = 0); + ~ThreadState(); - Processor* processor() const { return processor_; } - Memory* memory() const { return memory_; } - void* backend_data() const { return backend_data_; } - ppc::PPCContext* context() const { return context_; } - uint32_t thread_id() const { return thread_id_; } + void operator delete(void* vp); + + ppc::PPCContext* context() const { + return reinterpret_cast(const_cast(this)); + } static void Bind(ThreadState* thread_state); static ThreadState* Get(); static uint32_t GetThreadID(); + XE_NOALIAS + static ppc::PPCContext* GetContext(); - private: - Processor* processor_; - Memory* memory_; - void* backend_data_; - - uint32_t pcr_address_ = 0; - uint32_t thread_id_ = 0; - - // NOTE: must be 64b aligned for SSE ops. - ppc::PPCContext* context_; + static ThreadState* Create(Processor* processor, uint32_t thread_id, + uint32_t stack_base = 0, uint32_t pcr_address = 0); + }; } // namespace cpu diff --git a/src/xenia/cpu/xenon_interrupt_controller.cc b/src/xenia/cpu/xenon_interrupt_controller.cc new file mode 100644 index 0000000000..d417b1dc60 --- /dev/null +++ b/src/xenia/cpu/xenon_interrupt_controller.cc @@ -0,0 +1,247 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/xenon_interrupt_controller.h" +#include "xenia/base/logging.h" +#include "xenia/cpu/mmio_handler.h" +#include "xenia/cpu/processor.h" +#include "xenia/cpu/thread.h" +namespace xe { +namespace cpu { +// index is irql >> 2 +static constexpr int interrupt_priorities[32] = { + -1, -1, -1, -1, -1, 3, 13, -1, 2, 1, -1, 4, 5, 6, 7, -1, + 11, 12, -1, 10, -1, 69, 66, -1, -1, 64, 68, 131, -1, 0, -1, -1}; + +int XenonInterruptController::KernelIrqlToInterruptPriority(uint8_t irql) { + return interrupt_priorities[irql >> 2]; +} + +XenonInterruptController::XenonInterruptController(HWThread* thread, + Processor* processor) + : cpu_number_(thread->cpu_number()), + owner_(thread), + processor_(processor) {} + +XenonInterruptController::~XenonInterruptController() {} + +uint32_t XenonInterruptController::GuestMMIOAddress() const { + xenia_assert(cpu_number_ < 6); + return 0x7FFF0000 | (cpu_number_ << 12); +} +static void RaiseMMIOError() { + xe::FatalError( + "MMIO for interrupt controller unimplemented; 64-bit reads and " + "writes unsupported by MMIO subsystem"); +} +static uint32_t ReadRegisterStub(void* ppc_context, void* ud, uint32_t addr) { + RaiseMMIOError(); + return 0; +} + +static void WriteRegisterStub(void* ppc_context, void* ud, uint32_t addr, + uint32_t value) { + RaiseMMIOError(); +} + +void XenonInterruptController::Initialize() { + memset(data_, 0, sizeof(data_)); + processor_->memory()->AddVirtualMappedRange(GuestMMIOAddress(), 0xFFFF0000, + 0xFFFF, this, ReadRegisterStub, + WriteRegisterStub); + + tick_microsecond_frequency = + Clock::host_tick_frequency_platform() / (1000ULL * 1000ULL); +} + +void XenonInterruptController::SetInterruptSource(uint64_t src) { + WriteRegisterOffset(0x50, src); +} + +void XenonInterruptController::InterruptFunction(void* ud) { + auto extargs = reinterpret_cast(ud); + auto controller = extargs->controller_; + + controller->SetInterruptSource(extargs->source_); + + controller->owner_->_CallExternalInterruptHandler( + cpu::ThreadState::GetContext(), controller); +} + +void XenonInterruptController::SendExternalInterrupt( + ExternalInterruptArgs& args) { + xenia_assert(false); +} + +void XenonInterruptController::WriteRegisterOffset(uint32_t offset, + uint64_t value) { + xenia_assert(offset + 8 <= sizeof(data_)); + + *reinterpret_cast(reinterpret_cast(&data_[0]) + offset) = + value; + if (offset == 8) { + current_interrupt_priority_ = + KernelIrqlToInterruptPriority(static_cast(value)); + } +} +uint64_t XenonInterruptController::ReadRegisterOffset(uint32_t offset) { + xenia_assert(offset + 8 <= sizeof(data_)); + return *reinterpret_cast(reinterpret_cast(&data_[0]) + + offset); +} + +ppc::PPCInterruptRequest* XenonInterruptController::AllocateInterruptRequest() { + auto head = free_interrupt_requests_.Pop(); + if (head) { + return new (head) ppc::PPCInterruptRequest(); + } else { + return new ppc::PPCInterruptRequest(); + } +} +void XenonInterruptController::FreeInterruptRequest( + ppc::PPCInterruptRequest* request) { + request->list_entry_.next_ = nullptr; + // limit the number of available interrupts in the list to a sane value + // if we hit this number, the guest has probably frozen and isn't processing + // the interrupts we're sending + if (free_interrupt_requests_.depth() < 256) { + free_interrupt_requests_.Push(&request->list_entry_); + } else { + delete request; + } +} + +uint32_t XenonInterruptController::AllocateTimedInterruptSlot() { + for (uint32_t i = 0; i < MAX_CPU_TIMED_INTERRUPTS; ++i) { + if (!(timed_event_slots_bitmap_ & (1U << i))) { + timed_event_slots_bitmap_ |= 1U << i; + return i; + } + } + xenia_assert(false); // need to expand free slots! + xe::FatalError("out of timed interrupt slots!"); + return ~0u; +} + +void XenonInterruptController::FreeTimedInterruptSlot(uint32_t slot) { + xenia_assert(slot < MAX_CPU_TIMED_INTERRUPTS); + xenia_assert(timed_event_slots_bitmap_ & (1U << slot)); + timed_event_slots_bitmap_ &= ~(1U << slot); +} +void XenonInterruptController::SetTimedInterruptArgs(uint32_t slot, + CpuTimedInterrupt* data) { + timed_events_[slot] = *data; +} + +void XenonInterruptController::RecomputeNextEventCycles() { + last_qpc_params_ = Clock::GetQpcParams(); + uint64_t lowest_cycles = ~0ull; + for (uint32_t i = 0; i < MAX_CPU_TIMED_INTERRUPTS; ++i) { + if (!(timed_event_slots_bitmap_ & (1U << i))) { + continue; + } + + uint64_t rdtsc_cycles = Clock::HostTickTimestampToQuickTimestamp( + timed_events_[i].destination_microseconds_ * + tick_microsecond_frequency); + + if (rdtsc_cycles < lowest_cycles) { + lowest_cycles = rdtsc_cycles; + } + } + next_event_quick_timestamp_ = lowest_cycles; +} + +void XenonInterruptController::EnqueueTimedInterrupts() { + for (uint32_t timed_interrupt_slot = 0; + timed_interrupt_slot < MAX_CPU_TIMED_INTERRUPTS; + ++timed_interrupt_slot) { + if (!(timed_event_slots_bitmap_ & (1U << timed_interrupt_slot))) { + continue; + } + uint64_t current_time_us = + Clock::host_tick_count_platform() / tick_microsecond_frequency; + if (timed_events_[timed_interrupt_slot].destination_microseconds_ < + current_time_us) { + timed_events_[timed_interrupt_slot].enqueue_( + this, timed_interrupt_slot, timed_events_[timed_interrupt_slot].ud_); + } + } + RecomputeNextEventCycles(); +} + +uint64_t XenonInterruptController::CreateRelativeUsTimestamp( + uint64_t microseconds) { + return (Clock::host_tick_count_platform() / tick_microsecond_frequency) + + microseconds; +} + +uint64_t XenonInterruptController::ClampSleepMicrosecondsForTimedInterrupt( + uint64_t sleep_microseconds) { + uint64_t current_microseconds = + Clock::host_tick_count_platform() / tick_microsecond_frequency; + + uint64_t sleep_expiration = current_microseconds + sleep_microseconds; + uint64_t minimum_event_time = sleep_expiration; + for (uint32_t timed_interrupt_slot = 0; + timed_interrupt_slot < MAX_CPU_TIMED_INTERRUPTS; + ++timed_interrupt_slot) { + if (!(timed_event_slots_bitmap_ & (1U << timed_interrupt_slot))) { + continue; + } + + minimum_event_time = std::min( + minimum_event_time, + timed_events_[timed_interrupt_slot].destination_microseconds_); + + } + + if (minimum_event_time == sleep_expiration) { + //input delay is fine, no events would be missed + return sleep_microseconds; + } else { + uint64_t delta_to_event = minimum_event_time - current_microseconds; + + + //compute delta * 0.75 + //onehalf + onefourth + //we do this because most of the time the kernel takes a good deal longer + //than our provided interval + return (delta_to_event >> 2) + (delta_to_event >> 1); + } +} + +void XenonInterruptController::SetEOI(uint64_t value) { + auto context = cpu::ThreadState::GetContext(); + uint32_t cpunum = (context->r[13] >> 12) & 0x7; + auto cpu = context->processor->GetCPUThread(cpunum); + if (cpu->interrupt_controller() != this) { + xenia_assert(false); + } + eoi_written_ = static_cast(value); + if (eoi_written_ && eoi_write_mirror_) { + *eoi_write_mirror_ = 2; + eoi_write_mirror_ = nullptr; + } +} + +bool XenonInterruptController::CanRunInterruptAtIrql(uint8_t irql) { + int32_t prio = KernelIrqlToInterruptPriority(irql); + + if (current_interrupt_priority_ == -1 && prio == -1) { + return true; + } else { + return prio > current_interrupt_priority_; + } +} + +uint64_t XenonInterruptController::GetEOI() { return eoi_written_; } + +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/xenon_interrupt_controller.h b/src/xenia/cpu/xenon_interrupt_controller.h new file mode 100644 index 0000000000..2e80e2d81f --- /dev/null +++ b/src/xenia/cpu/xenon_interrupt_controller.h @@ -0,0 +1,138 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_XENON_INTERRUPT_CONTROLLER_H_ +#define XENIA_CPU_XENON_INTERRUPT_CONTROLLER_H_ +#include "xenia/base/memory.h" +#include "xenia/base/threading.h" +#include "xenia/base/clock.h" +namespace xe { +namespace cpu { +class HWThread; +class Processor; +class XenonInterruptController; +namespace ppc { +struct PPCInterruptRequest; +} +struct ExternalInterruptArgs { + XenonInterruptController* controller_; + uint32_t source_; +}; +static constexpr uint32_t MAX_CPU_TIMED_INTERRUPTS = 4; +using CpuTimedInterruptProc = void (*)(XenonInterruptController* controller, uint32_t slot, void* ud); +struct CpuTimedInterrupt { + //time in nanoseconds that the event should be triggered at + uint64_t destination_microseconds_; + CpuTimedInterruptProc enqueue_; + void* ud_; +}; + +/* + todo: can't LLE this, because the MMIO handler does not support 8-byte loads + and stores, and all accesses to this are 8 bytes +*/ +class XenonInterruptController { + public: + threading::AtomicListHeader queued_interrupts_; + volatile uint64_t interrupt_serial_number_ = 0ULL; + uint64_t next_event_quick_timestamp_ = ~0ULL; + int32_t current_interrupt_priority_ = -1; + // technically has a whole page, but I think only a little bit of it (0x100) is used. at least, from kernel space + union { + struct { + uint64_t unk_0; // 0x0 + // only interrupts with a higher irql than current_irql may be triggered + uint64_t current_irql; // 0x8 + + // low 16 bits = value that gets passed to the cpu we're signalling + // xbox kernel uses it to encode an absolute byte offset to the entry in + // the KPCR's interrupts array high 16 bits = bitmask of cpus to send the + // interrupt to + uint64_t ipi_signal; // 0x10 + uint64_t unk_18; // 0x18 + uint64_t unk_20; // 0x20 + uint64_t unk_28; // 0x28 + uint64_t unk_30; // 0x30 + uint64_t unk_38; // 0x38 + uint64_t unk_40; // 0x40 + uint64_t unk_48; // 0x48 + uint64_t unk_50; // 0x50 + uint64_t unk_58; // 0x58 + uint64_t unk_60; // 0x60 + // writing a value to this marks the end of the interrupt + sets + // current_irql + uint64_t eoi_irql; // 0x68 + uint64_t unk_70; // 0x70 + }; + uint64_t data_[32]; // 0x100 bytes + }; + + static int KernelIrqlToInterruptPriority(uint8_t irql); + + private: + const uint32_t cpu_number_; + uint32_t pad_; + HWThread* const owner_; + Processor* const processor_; + uint64_t tick_microsecond_frequency; + threading::AtomicListHeader free_interrupt_requests_; + + uint32_t eoi_written_ = 1; + uint32_t timed_event_slots_bitmap_=0; + CpuTimedInterrupt timed_events_[4]; + + uintptr_t* eoi_write_mirror_ = nullptr; + + + void SetInterruptSource(uint64_t src); + static void InterruptFunction(void* ud); + + public: + Clock::QpcParams last_qpc_params_; + void Initialize(); + ppc::PPCInterruptRequest* AllocateInterruptRequest(); + void FreeInterruptRequest(ppc::PPCInterruptRequest* request); + XenonInterruptController(HWThread* thread, Processor* processor); + ~XenonInterruptController(); + // the address is normally calculated by setting the top bits + // of KPCR to 0x7FFF + // kpcr's low bits are always 0 except for the nibble starting at bit 12, + // which contains the hw thread number + uint32_t GuestMMIOAddress() const; + + void SendExternalInterrupt(ExternalInterruptArgs& args); + + void WriteRegisterOffset(uint32_t offset, uint64_t value); + uint64_t ReadRegisterOffset(uint32_t offset); + + uint32_t AllocateTimedInterruptSlot(); + void FreeTimedInterruptSlot(uint32_t slot); + void SetTimedInterruptArgs(uint32_t slot, CpuTimedInterrupt* data); + uint64_t GetSlotUsTimestamp(uint32_t slot) { + return timed_events_[slot].destination_microseconds_; + } + + void RecomputeNextEventCycles(); + void EnqueueTimedInterrupts(); + + uint64_t CreateRelativeUsTimestamp(uint64_t microseconds); + void SetEOI(uint64_t value); + uint64_t GetEOI(); + bool CanRunInterruptAtIrql(uint8_t irql); + void SetEOIWriteMirror(uintptr_t* v) { eoi_write_mirror_ = v; } + + //check whether a sleep would miss a timed interrupt, and if so return a more appropriate time to sleep for + //that won't cause us to miss anything + uint64_t ClampSleepMicrosecondsForTimedInterrupt(uint64_t microseconds); +}; + +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_XENON_INTERRUPT_CONTROLLER_H_ diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index dc2990adef..7ee715d260 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -53,8 +53,8 @@ #include "xenia/vfs/devices/disc_zarchive_device.h" #include "xenia/vfs/devices/host_path_device.h" #include "xenia/vfs/devices/null_device.h" -#include "xenia/vfs/virtual_file_system.h" #include "xenia/vfs/devices/xcontent_container_device.h" +#include "xenia/vfs/virtual_file_system.h" #if XE_ARCH_AMD64 #include "xenia/cpu/backend/x64/x64_backend.h" @@ -83,7 +83,7 @@ DECLARE_bool(allow_plugins); namespace xe { using namespace xe::literals; - +static Emulator* g_current_emulator = nullptr; Emulator::GameConfigLoadCallback::GameConfigLoadCallback(Emulator& emulator) : emulator_(emulator) { emulator_.AddGameConfigLoadCallback(this); @@ -93,6 +93,8 @@ Emulator::GameConfigLoadCallback::~GameConfigLoadCallback() { emulator_.RemoveGameConfigLoadCallback(this); } +Emulator* Emulator::Get() { return g_current_emulator; } + Emulator::Emulator(const std::filesystem::path& command_line, const std::filesystem::path& storage_root, const std::filesystem::path& content_root, @@ -119,6 +121,7 @@ Emulator::Emulator(const std::filesystem::path& command_line, paused_(false), restoring_(false), restore_fence_() { + g_current_emulator = this; #if XE_PLATFORM_WIN32 == 1 // Show a disclaimer that links to the quickstart // guide the first time they ever open the emulator @@ -166,6 +169,7 @@ Emulator::~Emulator() { export_resolver_.reset(); ExceptionHandler::Uninstall(Emulator::ExceptionCallbackThunk, this); + g_current_emulator = nullptr; } X_STATUS Emulator::Setup( @@ -266,6 +270,11 @@ X_STATUS Emulator::Setup( patcher_ = std::make_unique(storage_root_); + // Setup the core components. + result = graphics_system_->Setup( + processor_.get(), nullptr, + display_window_ ? &display_window_->app_context() : nullptr, + display_window_ != nullptr); // Shared kernel state. kernel_state_ = std::make_unique(this); #define LOAD_KERNEL_MODULE(t) \ @@ -278,11 +287,6 @@ X_STATUS Emulator::Setup( plugin_loader_ = std::make_unique( kernel_state_.get(), storage_root() / "plugins"); - // Setup the core components. - result = graphics_system_->Setup( - processor_.get(), kernel_state_.get(), - display_window_ ? &display_window_->app_context() : nullptr, - display_window_ != nullptr); if (result) { return result; } @@ -294,7 +298,6 @@ X_STATUS Emulator::Setup( } } - // Initialize emulator fallback exception handling last. ExceptionHandler::Install(Emulator::ExceptionCallbackThunk, this); @@ -333,7 +336,8 @@ const std::unique_ptr Emulator::CreateVfsDeviceBasedOnPath( extension == ".tar" || extension == ".gz") { xe::ShowSimpleMessageBox( xe::SimpleMessageBoxType::Error, - fmt::format("Unsupported format!" + fmt::format( + "Unsupported format!" "Xenia does not support running software in an archived format.")); } return std::make_unique(mount_path, path); @@ -514,6 +518,40 @@ X_STATUS Emulator::InstallContentPackage(const std::filesystem::path& path) { installation_path); } +void Emulator::RegisterGuestHardwareBlockThread(xe::threading::Thread* thread) { + auto lock = global_critical_region::Acquire(); + hw_block_threads_.insert(thread); +} +void Emulator::UnregisterGuestHardwareBlockThread( + xe::threading::Thread* thread) { + auto lock = global_critical_region::Acquire(); + auto iter = hw_block_threads_.find(thread); + xenia_assert(iter != hw_block_threads_.end()); + if (iter != hw_block_threads_.end()) { + hw_block_threads_.erase(iter); + } +} + +void Emulator::Suspend360() { + auto lock = global_critical_region::Acquire(); + + // hardware should be suspended first, so that while the guest threads are + // suspended interrupts don't queue up + for (auto&& hw_block_thread : hw_block_threads_) { + hw_block_thread->Suspend(); + } + processor()->Suspend(); +} +void Emulator::Resume360() { + auto lock = global_critical_region::Acquire(); + + processor()->Resume(); + + for (auto&& hw_block_thread : hw_block_threads_) { + hw_block_thread->Resume(); + } +} + void Emulator::Pause() { if (paused_) { return; @@ -737,6 +775,7 @@ bool Emulator::ExceptionCallback(Exception* ex) { return false; } + xe::FatalError("Exception!"); // Within range. Pause the emulator and eat the exception. Pause(); @@ -752,9 +791,12 @@ bool Emulator::ExceptionCallback(Exception* ex) { std::string crash_msg; crash_msg.append("==== CRASH DUMP ====\n"); crash_msg.append(fmt::format("Thread ID (Host: 0x{:08X} / Guest: 0x{:08X})\n", - current_thread->thread()->system_id(), current_thread->thread_id())); - crash_msg.append(fmt::format("Thread Handle: 0x{:08X}\n", current_thread->handle())); - crash_msg.append(fmt::format("PC: 0x{:08X}\n", + current_thread->thread()->system_id(), + current_thread->thread_id())); + crash_msg.append( + fmt::format("Thread Handle: 0x{:08X}\n", current_thread->handle())); + crash_msg.append( + fmt::format("PC: 0x{:08X}\n", guest_function->MapMachineCodeToGuestAddress(ex->pc()))); crash_msg.append("Registers:\n"); for (int i = 0; i < 32; i++) { @@ -797,7 +839,9 @@ bool Emulator::ExceptionCallback(Exception* ex) { void Emulator::WaitUntilExit() { while (true) { if (main_thread_) { - xe::threading::Wait(main_thread_->thread(), false); + if (main_thread_->fiber()) { + xe::threading::Wait(main_thread_->fiber(), false); + } } if (restoring_) { @@ -974,7 +1018,7 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path, title_version_ = format_version(title_version); } } - + // Try and load the resource database (xex only). if (module->title_id()) { auto title_id = fmt::format("{:08X}", module->title_id()); diff --git a/src/xenia/emulator.h b/src/xenia/emulator.h index 32070d8a6b..0f47a69d74 100644 --- a/src/xenia/emulator.h +++ b/src/xenia/emulator.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "xenia/base/delegate.h" #include "xenia/base/exception_handler.h" @@ -87,7 +88,7 @@ class Emulator { private: Emulator& emulator_; }; - + static Emulator* Get(); explicit Emulator(const std::filesystem::path& command_line, const std::filesystem::path& storage_root, const std::filesystem::path& content_root, @@ -205,6 +206,15 @@ class Emulator { // Extract content of package to content specific directory. X_STATUS InstallContentPackage(const std::filesystem::path& path); + //if a thread is created to emulate some hardware on the 360 it must be registered here so that the system can be fully suspended for debugging + //purposes. can't have hardware writing to memory while we're inspecting it! + void RegisterGuestHardwareBlockThread(xe::threading::Thread* thread); + void UnregisterGuestHardwareBlockThread(xe::threading::Thread* thread); + + void Suspend360(); + void Resume360(); + + //these are different from Suspend360/Resume360, which are intended for debugging void Pause(); void Resume(); bool is_paused() const { return paused_; } @@ -285,6 +295,8 @@ class Emulator { bool paused_; bool restoring_; threading::Fence restore_fence_; // Fired on restore finish. + + std::set hw_block_threads_; }; } // namespace xe diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 0614bc4c84..18e34504b6 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -23,6 +23,7 @@ #include "xenia/gpu/texture_info.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" +#include "xenia/emulator.h" #if !defined(NDEBUG) #define XE_ENABLE_GPU_REG_WRITE_LOGGING 1 @@ -100,14 +101,15 @@ bool CommandProcessor::Initialize() { } worker_running_ = true; - worker_thread_ = kernel::object_ref( - new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { - WorkerThreadMain(); - return 0; - }, kernel_state_->GetIdleProcess())); - worker_thread_->set_name("GPU Commands"); - worker_thread_->Create(); + threading::Thread::CreationParameters crparams{}; + crparams.create_suspended = false; + crparams.stack_size = 16 * 1024 * 1024; + worker_thread_ = threading::Thread::Create( + crparams, std::bind(&CommandProcessor::WorkerThreadMain, this)); + Emulator::Get()->RegisterGuestHardwareBlockThread(worker_thread_.get()); + worker_thread_->set_name("GPU Commands"); + worker_thread_->set_affinity_mask(0b11000000); return true; } @@ -116,7 +118,9 @@ void CommandProcessor::Shutdown() { worker_running_ = false; write_ptr_index_event_->Set(); - worker_thread_->Wait(0, 0, 0, nullptr); + + threading::Wait(worker_thread_.get(), false); + Emulator::Get()->UnregisterGuestHardwareBlockThread(worker_thread_.get()); worker_thread_.reset(); } @@ -203,7 +207,7 @@ void CommandProcessor::RestoreGammaRamp( void CommandProcessor::CallInThread(std::function fn) { if (pending_fns_.empty() && - kernel::XThread::IsInThread(worker_thread_.get())) { + worker_thread_.get() == threading::Thread::GetCurrentThread()) { fn(); } else { pending_fns_.push(std::move(fn)); @@ -270,7 +274,8 @@ void CommandProcessor::WorkerThreadMain() { // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving // that many indices. - // Keep in mind that the gpu also updates the cpu-side copy if the write pointer and read pointer would be equal + // Keep in mind that the gpu also updates the cpu-side copy if the write + // pointer and read pointer would be equal if (read_ptr_writeback_ptr_) { xe::store_and_swap( memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_); @@ -304,7 +309,7 @@ void CommandProcessor::Resume() { } paused_ = false; - worker_thread_->thread()->Resume(); + worker_thread_->Resume(); } bool CommandProcessor::Save(ByteStream* stream) { @@ -360,9 +365,8 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) { cpu::backend::GuestPseudoStackTrace st; - if (logging::internal::ShouldLog(LogLevel::Debug) && kernel_state_->processor() - ->backend() - ->PopulatePseudoStacktrace(&st)) { + if (logging::internal::ShouldLog(LogLevel::Debug) && + kernel_state_->processor()->backend()->PopulatePseudoStacktrace(&st)) { logging::LoggerBatch log_initiator{}; log_initiator("Updating read ptr to {}, initiator stacktrace below\n", @@ -381,7 +385,7 @@ XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) { } void CommandProcessor::UpdateWritePointer(uint32_t value) { - XE_UNLIKELY_IF (cvars::log_ringbuffer_kickoff_initiator_bts) { + XE_UNLIKELY_IF(cvars::log_ringbuffer_kickoff_initiator_bts) { LogKickoffInitator(value); } write_ptr_index_ = value; @@ -390,7 +394,8 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) { void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) { #if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1 - if (cvars::log_guest_driven_gpu_register_written_values && logging::internal::ShouldLog(LogLevel::Debug)) { + if (cvars::log_guest_driven_gpu_register_written_values && + logging::internal::ShouldLog(LogLevel::Debug)) { const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index); if (!reginfo) { @@ -734,7 +739,6 @@ void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } void CommandProcessor::ReturnFromWait() {} - void CommandProcessor::InitializeTrace() { // Write the initial register values, to be loaded directly into the // RegisterFile since all registers, including those that may have side diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 281e608e86..e2d6412284 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -148,6 +148,8 @@ class CommandProcessor { bool Save(ByteStream* stream); bool Restore(ByteStream* stream); + void SetKernelState(xe::kernel::KernelState* ks) { kernel_state_ = ks; + } protected: struct IndexBufferInfo { xenos::IndexFormat format = xenos::IndexFormat::kInt16; @@ -268,7 +270,7 @@ class CommandProcessor { std::filesystem::path trace_frame_path_; std::atomic worker_running_; - kernel::object_ref worker_thread_; + std::unique_ptr worker_thread_; std::queue> pending_fns_; diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index cc06c1390f..e1b590c9f8 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -23,12 +23,13 @@ #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/base/threading.h" +#include "xenia/emulator.h" #include "xenia/gpu/command_processor.h" #include "xenia/gpu/gpu_flags.h" +#include "xenia/kernel/kernel_state.h" #include "xenia/ui/graphics_provider.h" #include "xenia/ui/window.h" #include "xenia/ui/windowed_app_context.h" -#include "xenia/kernel/kernel_state.h" DEFINE_bool( store_shaders, true, "Store shaders persistently and load them when loading games to avoid " @@ -100,51 +101,15 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, reinterpret_cast(ReadRegisterThunk), reinterpret_cast(WriteRegisterThunk)); - // 60hz vsync timer. - vsync_worker_running_ = true; - vsync_worker_thread_ = kernel::object_ref( - new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { - const double vsync_duration_d = - cvars::vsync - ? std::max( - 5.0, 1000.0 / static_cast(cvars::vsync_fps)) - : 1.0; - uint64_t last_frame_time = Clock::QueryGuestTickCount(); - // Sleep for 90% of the vblank duration, spin for 10% - const double duration_scalar = 0.90; - - while (vsync_worker_running_) { - const uint64_t current_time = Clock::QueryGuestTickCount(); - const uint64_t tick_freq = Clock::guest_tick_frequency(); - const uint64_t time_delta = current_time - last_frame_time; - const double elapsed_d = static_cast(time_delta) / - (static_cast(tick_freq) / 1000.0); - if (elapsed_d >= vsync_duration_d) { - last_frame_time = current_time; - - // TODO(disjtqz): should recalculate the remaining time to a vblank - // after MarkVblank, no idea how long the guest code normally takes - MarkVblank(); - if (cvars::vsync) { - const uint64_t estimated_nanoseconds = static_cast( - (vsync_duration_d * 1000000.0) * - duration_scalar); // 1000 microseconds = 1 ms - - threading::NanoSleep(estimated_nanoseconds); - } - } - if (!cvars::vsync) { - xe::threading::Sleep(std::chrono::milliseconds(1)); - } - } - return 0; - }, kernel_state->GetIdleProcess())); - // As we run vblank interrupts the debugger must be able to suspend us. - vsync_worker_thread_->set_can_debugger_suspend(true); - vsync_worker_thread_->set_name("GPU VSync"); - vsync_worker_thread_->Create(); - vsync_worker_thread_->thread()->set_priority( - threading::ThreadPriority::kLowest); + AddConstantRegisterValue(0x0F00, 0x08100748); // RB_EDRAM_TIMING + AddConstantRegisterValue(0x0F01, 0x0000200E); // RB_BC_CONTROL + + AddConstantRegisterValue(0x194C, 0x000002D0); // R500_D1MODE_V_COUNTER + AddConstantRegisterValue(0x1951, 1); // interrupt status, vblank + AddConstantRegisterValue( + 0x1961, 0x050002D0); // AVIVO_D1MODE_VIEWPORT_SIZE + // Screen res - 1280x720 + // maximum [width(0x0FFF), height(0x0FFF)] if (cvars::trace_gpu_stream) { BeginTracing(); } @@ -152,6 +117,93 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, return X_STATUS_SUCCESS; } +void GraphicsSystem::AddConstantRegisterValue(uint32_t gpu_register, + uint32_t value) { + auto range = memory_->LookupVirtualMappedRange(0x7FC80000); + + range->constant_addresses[0x7FC80000 + (gpu_register * 4)] = value; +} +void GraphicsSystem::SetupVsync() { +#if XE_USE_TIMED_INTERRUPTS_FOR_VSYNC + //1000 microseconds = one millisecond, 1000 milliseconds = 1 second + vsync_relative_ts_ = cvars::vsync ? (1000ULL * 1000ULL) / cvars::vsync_fps + : (1000ULL * 1000ULL); + auto vsync_target_thread = processor()->GetCPUThread(2); + + auto interrupt_controller = vsync_target_thread->interrupt_controller(); + + cpu::CpuTimedInterrupt vsync_cti; + vsync_cti.destination_microseconds_ = + interrupt_controller->CreateRelativeUsTimestamp( + vsync_relative_ts_); // one second / vsync_fps + + vsync_cti.ud_ = reinterpret_cast(this); + vsync_cti.enqueue_ = &GraphicsSystem::VsyncInterruptEnqueueProcedure; + uint32_t clock_slot = interrupt_controller->AllocateTimedInterruptSlot(); + interrupt_controller->SetTimedInterruptArgs(clock_slot, &vsync_cti); + interrupt_controller->RecomputeNextEventCycles(); +#else + // 60hz vsync timer. + vsync_worker_running_ = true; + threading::Thread::CreationParameters crparams{}; + crparams.create_suspended = false; + crparams.stack_size = 16 * 1024 * 1024; + vsync_worker_thread_ = threading::Thread::Create(crparams, [this]() { + const double vsync_duration_d = + cvars::vsync ? std::max( + 5.0, 1000.0 / static_cast(cvars::vsync_fps)) + : 1.0; + uint64_t last_frame_time = Clock::QueryGuestTickCount(); + // Sleep for 90% of the vblank duration, spin for 10% + const double duration_scalar = 0.90; + + while (vsync_worker_running_) { + const uint64_t current_time = Clock::QueryGuestTickCount(); + const uint64_t tick_freq = Clock::guest_tick_frequency(); + const uint64_t time_delta = current_time - last_frame_time; + const double elapsed_d = static_cast(time_delta) / + (static_cast(tick_freq) / 1000.0); + if (elapsed_d >= vsync_duration_d) { + last_frame_time = current_time; + + // TODO(disjtqz): should recalculate the remaining time to a vblank + // after MarkVblank, no idea how long the guest code normally takes + MarkVblank(); + if (cvars::vsync) { + const uint64_t estimated_nanoseconds = static_cast( + (vsync_duration_d * 1000000.0) * + duration_scalar); // 1000 microseconds = 1 ms + + threading::NanoSleep(estimated_nanoseconds); + } + } + if (!cvars::vsync) { + xe::threading::Sleep(std::chrono::milliseconds(1)); + } + } + return 0; + }); + Emulator::Get()->RegisterGuestHardwareBlockThread(vsync_worker_thread_.get()); + // As we run vblank interrupts the debugger must be able to suspend us. + vsync_worker_thread_->set_name("GPU VSync"); +#endif +} +#if XE_USE_TIMED_INTERRUPTS_FOR_VSYNC + +void GraphicsSystem::VsyncInterruptEnqueueProcedure( + cpu::XenonInterruptController* controller, uint32_t slot, void* ud) { + GraphicsSystem* thiz = reinterpret_cast(ud); + + cpu::CpuTimedInterrupt reschedule_args{}; + reschedule_args.destination_microseconds_ = + controller->GetSlotUsTimestamp(slot) + thiz->vsync_relative_ts_; + reschedule_args.ud_ = ud; + reschedule_args.enqueue_ = &GraphicsSystem::VsyncInterruptEnqueueProcedure; + controller->SetTimedInterruptArgs(slot, &reschedule_args); + + thiz->MarkVblank(); +} +#endif void GraphicsSystem::Shutdown() { if (command_processor_) { EndTracing(); @@ -161,7 +213,9 @@ void GraphicsSystem::Shutdown() { if (vsync_worker_thread_) { vsync_worker_running_ = false; - vsync_worker_thread_->Wait(0, 0, 0, nullptr); + threading::Wait(vsync_worker_thread_.get(), false); + Emulator::Get()->UnregisterGuestHardwareBlockThread( + vsync_worker_thread_.get()); vsync_worker_thread_.reset(); } @@ -208,7 +262,7 @@ void GraphicsSystem::WriteRegisterThunk(void* ppc_context, GraphicsSystem* gs, uint32_t GraphicsSystem::ReadRegister(uint32_t addr) { uint32_t r = (addr & 0xFFFF) / 4; - + // most of these are handled by AddConstantRegisterValue switch (r) { case 0x0F00: // RB_EDRAM_TIMING return 0x08100748; @@ -267,7 +321,8 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback, } void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { - kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,interrupt_callback_data_, source, cpu); + kernel_state()->EmulateCPInterrupt(interrupt_callback_, + interrupt_callback_data_, source, cpu); } void GraphicsSystem::MarkVblank() { @@ -348,5 +403,10 @@ bool GraphicsSystem::Restore(ByteStream* stream) { return command_processor_->Restore(stream); } +void GraphicsSystem::SetKernelState(xe::kernel::KernelState* ks) { + kernel_state_ = ks; + command_processor()->SetKernelState(ks); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index ef58d45691..8807b55600 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -27,6 +27,8 @@ #include "xenia/ui/windowed_app_context.h" #include "xenia/xbox.h" +#define XE_USE_TIMED_INTERRUPTS_FOR_VSYNC 1 + namespace xe { class Emulator; } // namespace xe @@ -85,10 +87,13 @@ class GraphicsSystem { bool Save(ByteStream* stream); bool Restore(ByteStream* stream); + void SetupVsync(); + void SetKernelState(xe::kernel::KernelState* ks); + protected: GraphicsSystem(); - + void AddConstantRegisterValue(uint32_t register_, uint32_t value); virtual std::unique_ptr CreateCommandProcessor() = 0; static uint32_t ReadRegisterThunk(void* ppc_context, GraphicsSystem* gs, @@ -110,7 +115,7 @@ class GraphicsSystem { uint32_t interrupt_callback_data_ = 0; std::atomic vsync_worker_running_; - kernel::object_ref vsync_worker_thread_; + std::unique_ptr vsync_worker_thread_; RegisterFile* register_file_; std::unique_ptr command_processor_; @@ -121,6 +126,12 @@ class GraphicsSystem { std::unique_ptr presenter_; std::atomic_flag host_gpu_loss_reported_; +#if XE_USE_TIMED_INTERRUPTS_FOR_VSYNC + int64_t vsync_relative_ts_; + + static void VsyncInterruptEnqueueProcedure( + cpu::XenonInterruptController* controller, uint32_t slot, void* ud); +#endif }; } // namespace gpu diff --git a/src/xenia/guest_pointers.h b/src/xenia/guest_pointers.h index 6794e11f53..098fe9264f 100644 --- a/src/xenia/guest_pointers.h +++ b/src/xenia/guest_pointers.h @@ -10,6 +10,7 @@ #ifndef XENIA_GUEST_POINTERS_H_ #define XENIA_GUEST_POINTERS_H_ +#define offsetof32(...) static_cast(__builtin_offsetof(__VA_ARGS__)) namespace xe { template struct ShiftedPointer { @@ -22,6 +23,7 @@ struct ShiftedPointer { m_base = base; return *this; } + ShiftedPointer(TBase* base) { m_base = base; } inline this_type& operator=(this_type other) { m_base = other.m_base; @@ -41,11 +43,19 @@ struct TypedGuestPointer { m_ptr = ptr; return *this; } - inline bool operator==(uint32_t ptr) const { return m_ptr == ptr; } - inline bool operator!=(uint32_t ptr) const { return m_ptr != ptr; } + //inline bool operator==(uint32_t ptr) const { return m_ptr == ptr; } +// inline bool operator!=(uint32_t ptr) const { return m_ptr != ptr; } // use value directly, no endian swap needed - inline bool operator!() const { return !m_ptr.value; } + //inline bool operator!() const { return !m_ptr.value; } + inline operator uint32_t() const { return m_ptr; } }; + +//matches hexrays' ADJ operator +template +inline auto ADJ(TShiftedPointer ptr) { + return ptr.GetAdjacent(); +} + } // namespace xe #endif // XENIA_GUEST_POINTERS_H_ \ No newline at end of file diff --git a/src/xenia/kernel/kernel_guest_structures.h b/src/xenia/kernel/kernel_guest_structures.h new file mode 100644 index 0000000000..f8c3517cb2 --- /dev/null +++ b/src/xenia/kernel/kernel_guest_structures.h @@ -0,0 +1,594 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_KERNEL_KERNEL_GUEST_STRUCTURES_H_ +#define XENIA_KERNEL_KERNEL_GUEST_STRUCTURES_H_ +#include "xenia/base/memory.h" +#include "xenia/kernel/util/native_list.h" +#include "xenia/xbox.h" +namespace xe { +namespace kernel { +static constexpr uint32_t kKernelAuxstackSize = 65536; +enum Irql : uint8_t { + IRQL_PASSIVE = 0, + IRQL_APC = 1, + IRQL_DISPATCH = 2, + IRQL_DPC = 3, + IRQL_AUDIO = 68, // used a few times in the audio driver + IRQL_CLOCK = 116, //irql used by the clock interrupt + IRQL_HIGHEST = 124 +}; + +enum { + DISPATCHER_MANUAL_RESET_EVENT = 0, + DISPATCHER_AUTO_RESET_EVENT = 1, + DISPATCHER_MUTANT = 2, + DISPATCHER_QUEUE = 4, + DISPATCHER_SEMAPHORE = 5, + DISPATCHER_THREAD = 6, + DISPATCHER_MANUAL_RESET_TIMER = 8, + DISPATCHER_AUTO_RESET_TIMER = 9, +}; + +// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/ntos/ke/kthread_state.htm +enum : uint8_t { + KTHREAD_STATE_INITIALIZED = 0, + KTHREAD_STATE_READY = 1, + KTHREAD_STATE_RUNNING = 2, + KTHREAD_STATE_STANDBY = 3, + KTHREAD_STATE_TERMINATED = 4, + KTHREAD_STATE_WAITING = 5, + KTHREAD_STATE_UNKNOWN = 6, //"Transition" except that makes no sense here, so + //6 likely has a different meaning on xboxkrnl +}; + +static constexpr uint32_t XE_FLAG_THREAD_INITIALLY_SUSPENDED = 1, + XE_FLAG_SYSTEM_THREAD = 2, + XE_FLAG_PRIORITY_CLASS1 = 0x20, + XE_FLAG_PRIORITY_CLASS2 = 0x40, + XE_FLAG_RETURN_KTHREAD_PTR = 0x80, + XE_FLAG_AFFINITY_CPU0 = 1U << 24, + XE_FLAG_AFFINITY_CPU1 = 1U << 25, + XE_FLAG_AFFINITY_CPU2 = 1U << 26, + XE_FLAG_AFFINITY_CPU3 = 1U << 27, + XE_FLAG_AFFINITY_CPU4 = 1U << 28, + XE_FLAG_AFFINITY_CPU5 = 1U << 29; + +struct X_KTHREAD; +struct X_KPROCESS; +struct X_KPCR; +struct X_KPRCB; +#pragma pack(push, 1) + +enum X_OBJECT_HEADER_FLAGS : uint16_t { + OBJECT_HEADER_FLAG_NAMED_OBJECT = + 1, // if set, has X_OBJECT_HEADER_NAME_INFO prior to X_OBJECT_HEADER + OBJECT_HEADER_FLAG_IS_PERMANENT = 2, + OBJECT_HEADER_FLAG_CONTAINED_IN_DIRECTORY = + 4, // this object resides in an X_OBJECT_DIRECTORY + OBJECT_HEADER_IS_TITLE_OBJECT = 0x10, // used in obcreateobject + +}; + +// https://www.nirsoft.net/kernel_struct/vista/OBJECT_HEADER.html +struct X_OBJECT_HEADER { + xe::be pointer_count; + xe::be handle_count; + xe::be object_type_ptr; // -0x8 POBJECT_TYPE + xe::be flags; + uint8_t unknownE; + uint8_t unknownF; + // Object lives after this header. + // (There's actually a body field here which is the object itself) +}; +static_assert_size(X_OBJECT_HEADER, 0x10); + +struct X_OBJECT_DIRECTORY { + // each is a pointer to X_OBJECT_HEADER_NAME_INFO + // i believe offset 0 = pointer to next in bucket + xe::be name_buckets[13]; +}; +static_assert_size(X_OBJECT_DIRECTORY, 0x34); + +// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/ntos/ob/object_header_name_info.htm +// quite different, though +struct X_OBJECT_HEADER_NAME_INFO { + // i think that this is the next link in an X_OBJECT_DIRECTORY's buckets + xe::be next_in_directory; + xe::be object_directory; // pointer to X_OBJECT_DIRECTORY + X_ANSI_STRING name; +}; +struct X_OBJECT_ATTRIBUTES { + xe::be root_directory; // 0x0 + xe::be name_ptr; // 0x4 PANSI_STRING + xe::be attributes; // 0xC +}; +struct X_OBJECT_TYPE { + xe::be allocate_proc; // 0x0 + xe::be free_proc; // 0x4 + xe::be close_proc; // 0x8 + xe::be delete_proc; // 0xC + xe::be unknown_proc; // 0x10 + xe::be + unknown_size_or_object_; // this seems to be a union, it can be a pointer + // or it can be the size of the object + xe::be pool_tag; // 0x18 +}; +static_assert_size(X_OBJECT_TYPE, 0x1C); + +struct X_KSYMLINK { + xe::be refed_object_maybe; + X_ANSI_STRING refed_object_name_maybe; +}; +static_assert_size(X_KSYMLINK, 0xC); +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa363082.aspx +typedef struct { + // Renamed due to a collision with exception_code from Windows excpt.h. + xe::be code; + xe::be exception_flags; + xe::be exception_record; + xe::be exception_address; + xe::be number_parameters; + xe::be exception_information[15]; +} X_EXCEPTION_RECORD; +static_assert_size(X_EXCEPTION_RECORD, 0x50); + +struct X_KSPINLOCK { + xe::be pcr_of_owner; +}; +static_assert_size(X_KSPINLOCK, 4); + +struct XDPC { + xe::be type; + uint8_t selected_cpu_number; + uint8_t desired_cpu_number; + X_LIST_ENTRY list_entry; + xe::be routine; + xe::be context; + xe::be arg1; + xe::be arg2; + + void Initialize(uint32_t guest_func, uint32_t guest_context) { + type = 19; + selected_cpu_number = 0; + desired_cpu_number = 0; + routine = guest_func; + context = guest_context; + } +}; + +struct XAPC { + static const uint32_t kSize = 40; + + // KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag) + // This is 4b shorter than NT - looks like the reserved dword at +4 is gone. + // NOTE: stored in guest memory. + uint16_t type; // +0 + uint8_t apc_mode; // +2 + uint8_t enqueued; // +3 + EZPointer thread_ptr; // +4 + X_LIST_ENTRY list_entry; // +8 + xe::be kernel_routine; // +16 + xe::be rundown_routine; // +20 + xe::be normal_routine; // +24 + xe::be normal_context; // +28 + xe::be arg1; // +32 + xe::be arg2; // +36 +}; +// https://www.nirsoft.net/kernel_struct/vista/DISPATCHER_HEADER.html +struct X_DISPATCH_HEADER { + struct { + uint8_t type; + + union { + uint8_t abandoned; + uint8_t absolute; + }; + uint8_t process_type; // X_PROCTYPE_ + uint8_t inserted; + }; + xe::be signal_state; + X_LIST_ENTRY wait_list; +}; +static_assert_size(X_DISPATCH_HEADER, 0x10); + +enum : uint16_t { + WAIT_ALL = 0, + WAIT_ANY = 1, +}; + +// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/ntos/ke_x/kwait_block.htm +// pretty much the vista KWAIT_BLOCK verbatim, except that sparebyte is gone +// and WaitType is 2 bytes instead of 1 +struct X_KWAIT_BLOCK { + X_LIST_ENTRY wait_list_entry; // 0x0 + EZPointer thread; + EZPointer object; + EZPointer next_wait_block; + // this isnt the official vista name, but i think its better. + // this value is what will be returned to the waiter if this particular wait + // is satisfied + xe::be wait_result_xstatus; + // WAIT_ALL or WAIT_ANY + xe::be wait_type; +}; + +static_assert_size(X_KWAIT_BLOCK, 0x18); + +struct X_KSEMAPHORE { + X_DISPATCH_HEADER header; + xe::be limit; +}; +static_assert_size(X_KSEMAPHORE, 0x14); + +struct X_KMUTANT { + X_DISPATCH_HEADER header; // 0x0 + X_LIST_ENTRY unk_list; // 0x10 + EZPointer owner; // 0x18 + bool abandoned; // 0x1C + // these might just be padding + uint8_t unk_1D; // 0x1D + uint8_t unk_1E; // 0x1E + uint8_t unk_1F; // 0x1F +}; + +static_assert_size(X_KMUTANT, 0x20); + +// https://www.nirsoft.net/kernel_struct/vista/KEVENT.html +struct X_KEVENT { + X_DISPATCH_HEADER header; +}; +static_assert_size(X_KEVENT, 0x10); + +struct X_KTHREAD; +struct X_KPROCESS; +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_file_object +struct X_KFILE_OBJECT { + uint8_t unk_0[0x68]; +}; +static_assert_size(X_KFILE_OBJECT, 0x68); +struct X_KPRCB { + EZPointer current_thread; // 0x0 + EZPointer next_thread; // 0x4 + EZPointer idle_thread; // 0x8 + uint8_t current_cpu; // 0xC + uint8_t unk_D[3]; // 0xD + // should only have 1 bit set, used for ipis + xe::be processor_mask; // 0x10 + // incremented in clock interrupt + xe::be dpc_clock; // 0x14 + xe::be interrupt_clock; // 0x18 + xe::be unk_1C; // 0x1C + xe::be unk_20; // 0x20 + // various fields used by KeIpiGenericCall + xe::be ipi_args[3]; // 0x24 + // looks like the target cpus clear their corresponding bit + // in this mask to signal completion to the initiator + xe::be targeted_ipi_cpus_mask; // 0x30 + xe::be ipi_function; // 0x34 + // used to synchronize? + TypedGuestPointer ipi_initiator_prcb; // 0x38 + xe::be unk_3C; // 0x3C + xe::be dpc_related_40; // 0x40 + // must be held to modify any dpc-related fields in the kprcb + X_KSPINLOCK dpc_lock; // 0x44 + util::X_TYPED_LIST queued_dpcs_list_head; + // // 0x48 + xe::be dpc_active; // 0x50 + X_KSPINLOCK enqueued_processor_threads_lock; // 0x54 + // if the idle thread is running, this is set to point to it, else 0 + EZPointer running_idle_thread; // 0x58 + // definitely scheduler related + X_SINGLE_LIST_ENTRY enqueued_threads_list; // 0x5C + // if bit 0 set, have a thread at priority 0, etc + xe::be has_ready_thread_by_priority; // 0x60 + // i think the following mask has something to do with the array that comes + // after + xe::be unk_mask_64; // 0x64 + // have to hardcode this offset, KTHREAD not defined yet + util::X_TYPED_LIST ready_threads_by_priority[32]; // 0x68 + // ExTerminateThread tail calls a function that does KeInsertQueueDpc of this + // dpc + XDPC thread_exit_dpc; // 0x168 + // thread_exit_dpc's routine drains this list and frees each threads threadid, + // kernel stack and dereferences the thread + X_LIST_ENTRY terminating_threads_list; // 0x184 + XDPC switch_thread_processor_dpc; // 0x18C +}; +// Processor Control Region +struct X_KPCR { + xe::be tls_ptr; // 0x0 + xe::be msr_mask; // 0x4 + union { + xe::be software_interrupt_state; // 0x8 + struct { + // covers timers, dpcs, thread switches + uint8_t generic_software_interrupt; // 0x 8 // 0x8 + uint8_t apc_software_interrupt_state; // 0x9 + }; + }; + xe::be unk_0A; // 0xA + uint8_t processtype_value_in_dpc; // 0xC + uint8_t timeslice_ended; // 0xD + uint8_t timer_pending; // 0xE + uint8_t unk_0F; // 0xF + // used in KeSaveFloatingPointState / its vmx counterpart + xe::be thread_fpu_related; // 0x10 + xe::be thread_vmx_related; // 0x14 + uint8_t current_irql; // 0x18 + uint8_t background_scheduling_active; // 0x19 + uint8_t background_scheduling_1A; // 0x1A + uint8_t background_scheduling_1B; // 0x1B + xe::be timer_related; // 0x1C + uint8_t unk_20[0x10]; // 0x20 + xe::be pcr_ptr; // 0x30 + + // this seems to be just garbage data? we can stash a pointer to context here + // as a hack for now + union { + uint8_t unk_38[8]; // 0x38 + // points to XenonInterruptController + uint64_t emulated_interrupt; // 0x38 + }; + uint8_t unk_40[28]; // 0x40 + xe::be unk_stack_5c; // 0x5C + uint8_t unk_60[12]; // 0x60 + xe::be use_alternative_stack; // 0x6C + xe::be stack_base_ptr; // 0x70 Stack base address (high addr) + xe::be stack_end_ptr; // 0x74 Stack end (low addr) + + // maybe these are the stacks used in apcs? + // i know they're stacks, RtlGetStackLimits returns them if another var here + // is set + + xe::be alt_stack_base_ptr; // 0x78 + xe::be alt_stack_end_ptr; // 0x7C + // if bit 1 is set in a handler pointer, it actually points to a KINTERRUPT + // otherwise, it points to a function to execute + xe::be interrupt_handlers[32]; // 0x80 + X_KPRCB prcb_data; // 0x100 + // pointer to KPCRB? + TypedGuestPointer prcb; // 0x2A8 + uint8_t unk_2AC[0x2C]; // 0x2AC +}; + +/* + there must be two timer structures, because the size passed to + ObCreateObject does not make sense if we apply that structure size to the + timer embedded in KTHREAD +*/ +struct X_KTIMER { + X_DISPATCH_HEADER header; // 0x0 + xe::be due_time; // 0x10 + X_LIST_ENTRY table_bucket_entry; // 0x18 + TypedGuestPointer dpc; // 0x20 + xe::be period; // 0x24 +}; +static_assert_size(X_KTIMER, 0x28); + +struct X_EXTIMER { + X_KTIMER ktimer; // 0x0 + XDPC dpc; // 0x28 + XAPC apc; // 0x44 + X_LIST_ENTRY thread_timer_list_entry; // 0x6C + X_KSPINLOCK timer_lock; // 0x74 + // not confident in this name + xe::be period; // 0x78 + bool has_apc; // 0x7C + uint8_t unk_7D[3]; // 0x7D +}; + +static_assert_size(X_EXTIMER, 0x80); + +// iocompletions appear to just be a KQUEUE under another name +// seems to exactly match normal nt structure! +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/nf-ntifs-keinitializequeue +// todo: figure out where thread_list_head links into 360 KTHREAD +struct X_KQUEUE { + X_DISPATCH_HEADER header; // 0x0 + X_LIST_ENTRY entry_list_head; // 0x10 + xe::be current_count; // 0x18 + xe::be maximum_count; // 0x1C + util::X_TYPED_LIST thread_list_head; // 0x20 +}; +static_assert_size(X_KQUEUE, 0x28); + +// just an alias, they are identical structures +using X_KIO_COMPLETION = X_KQUEUE; + +struct X_KINTERRUPT { + xe::be service_routine; // 0x0 + xe::be service_context; // 0x4 + X_KSPINLOCK spinlock; // 0x8 + xe::be service_count; // 0xC + uint8_t unk_10; // 0x10 + uint8_t irql; // 0x11 + uint8_t unk_12; // 0x12 + uint8_t unk_13; // 0x13 +}; + +static_assert_size(X_KINTERRUPT, 0x14); + +struct X_KTHREAD { + X_DISPATCH_HEADER header; // 0x0 + util::X_TYPED_LIST + mutants_list; // 0x10 + X_KTIMER wait_timeout_timer; // 0x18 + X_KWAIT_BLOCK wait_timeout_block; // 0x40 + uint8_t unk_58[0x4]; // 0x58 + xe::be stack_base; // 0x5C + xe::be stack_limit; // 0x60 + xe::be stack_kernel; // 0x64 + xe::be tls_address; // 0x68 + // state = is thread running, suspended, etc + uint8_t thread_state; // 0x6C + // 0x70 = priority? + + uint8_t alerted[2]; // 0x6D + uint8_t alertable; // 0x6F + uint8_t priority; // 0x70 + uint8_t fpu_exceptions_on; // 0x71 + // these two process types both get set to the same thing, process_type is + // referenced most frequently, however process_type_dup gets referenced a few + // times while the process is being created + uint8_t process_type_dup; + uint8_t process_type; + // apc_mode determines which list an apc goes into + util::X_TYPED_LIST apc_lists[2]; + EZPointer process; // 0x84 + uint8_t executing_kernel_apc; // 0x88 + // when context switch happens, this is copied into + // apc_software_interrupt_state for kpcr + uint8_t deferred_apc_software_interrupt_state; // 0x89 + uint8_t user_apc_pending; // 0x8A + uint8_t may_queue_apcs; // 0x8B + X_KSPINLOCK apc_lock; // 0x8C + xe::be num_context_switches_to; // 0x90 + X_LIST_ENTRY ready_prcb_entry; // 0x94 + xe::be msr_mask; // 0x9C + xe::be wait_result; // 0xA0 + uint8_t wait_irql; // 0xA4 + uint8_t processor_mode; // 0xA5 + uint8_t wait_next; // 0xA6 + uint8_t wait_reason; // 0xA7 + EZPointer wait_blocks; // 0xA8 + uint8_t unk_AC[4]; // 0xAC + xe::be apc_disable_count; // 0xB0 + xe::be quantum; // 0xB4 + uint8_t unk_B8; // 0xB8 + uint8_t unk_B9; // 0xB9 + uint8_t unk_BA; // 0xBA + uint8_t boost_disabled; // 0xBB + uint8_t suspend_count; // 0xBC + uint8_t was_preempted; // 0xBD + uint8_t terminated; // 0xBE + uint8_t current_cpu; // 0xBF + EZPointer a_prcb_ptr; // 0xC0 + EZPointer another_prcb_ptr; // 0xC4 + uint8_t unk_C8; // 0xC8 + uint8_t unk_C9; // 0xC9 + uint8_t unk_CA; // 0xCA + uint8_t unk_CB; // 0xCB + X_KSPINLOCK timer_list_lock; // 0xCC + xe::be stack_alloc_base; // 0xD0 + XAPC on_suspend; // 0xD4 + X_KSEMAPHORE suspend_sema; // 0xFC + X_LIST_ENTRY process_threads; // 0x110 + EZPointer queue; // 0x118 + X_LIST_ENTRY queue_related; // 0x11c + xe::be unk_124; // 0x124 + xe::be unk_128; // 0x128 + xe::be unk_12C; // 0x12C + xe::be create_time; // 0x130 + xe::be exit_time; // 0x138 + xe::be exit_status; // 0x140 + // tracks all pending timers that have apcs which target this thread + X_LIST_ENTRY timer_list; // 0x144 + xe::be thread_id; // 0x14C + xe::be start_address; // 0x150 + X_LIST_ENTRY unk_154; // 0x154 + uint8_t unk_15C[0x4]; // 0x15C + xe::be last_error; // 0x160 + xe::be fiber_ptr; // 0x164 + uint8_t unk_168[0x4]; // 0x168 + xe::be creation_flags; // 0x16C + + // we handle context differently from a native kernel, so we can stash extra + // data here! the first 8 bytes of vscr are unused anyway + union { + vec128_t vscr; // 0x170 + struct { + void* host_xthread_stash; + uintptr_t vscr_remainder; + }; + }; + union { + // 2048 bytes + vec128_t vmx_context[128]; // 0x180 + struct { + // 1536 bytes + X_KWAIT_BLOCK scratch_waitblock_memory[65]; + // space for some more data! + uint32_t kernel_aux_stack_base_; + uint32_t kernel_aux_stack_current_; + uint32_t kernel_aux_stack_limit_; + }; + }; + xe::be fpscr; // 0x980 + xe::be fpu_context[32]; // 0x988 + + XAPC unk_A88; // 0xA88 + + // This struct is actually quite long... so uh, not filling this out! +}; +static_assert_size(X_KTHREAD, 0xAB0); + +static_assert(offsetof(X_KTHREAD, apc_lists[0]) == 0x74); +struct alignas(4096) X_KPCR_PAGE { + X_KPCR pcr; // 0x0 + char unk_2D8[40]; // 0x2D8 + X_KTHREAD idle_process_thread; +}; + +// (?), used by KeGetCurrentProcessType +constexpr uint32_t X_PROCTYPE_IDLE = 0; +constexpr uint32_t X_PROCTYPE_TITLE = 1; +constexpr uint32_t X_PROCTYPE_SYSTEM = 2; + +struct X_KPROCESS { + X_KSPINLOCK thread_list_spinlock; + // list of threads in this process, guarded by the spinlock above + util::X_TYPED_LIST + thread_list; + // quantum value assigned to each thread of the process + xe::be quantum; + // kernel sets this to point to a section of size 0x2F700 called CLRDATAA, + // except it clears bit 31 of the pointer. in 17559 the address is 0x801C0000, + // so it sets this ptr to 0x1C0000 + xe::be clrdataa_masked_ptr; + xe::be thread_count; + uint8_t unk_18; + uint8_t unk_19; + uint8_t unk_1A; + uint8_t unk_1B; + xe::be kernel_stack_size; + xe::be tls_static_data_address; + xe::be tls_data_size; + xe::be tls_raw_data_size; + xe::be tls_slot_size; + // ExCreateThread calls a subfunc references this field, returns + // X_STATUS_PROCESS_IS_TERMINATING if true + uint8_t is_terminating; + // one of X_PROCTYPE_ + uint8_t process_type; + xe::be tls_slot_bitmap[8]; + xe::be unk_50; + X_LIST_ENTRY unk_54; + xe::be unk_5C; +}; +static_assert_size(X_KPROCESS, 0x60); + +struct X_EVENT_INFORMATION { + xe::be type; + xe::be signal_state; +}; + +struct X_RTL_CRITICAL_SECTION { + X_DISPATCH_HEADER header; + xe::be lock_count; // 0x10 -1 -> 0 on first lock + xe::be recursion_count; // 0x14 0 -> 1 on first lock + xe::be owning_thread; // 0x18 PKTHREAD 0 unless locked +}; +static_assert_size(X_RTL_CRITICAL_SECTION, 28); +#pragma pack(pop) + +} // namespace kernel +} // namespace xe + +#endif // XENIA_KERNEL_KERNEL_GUEST_STRUCTURES_H_ diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index b4d461fbd6..fd4f915b30 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -36,7 +36,11 @@ DEFINE_bool(apply_title_update, true, "Apply title updates.", "Kernel"); namespace xe { namespace kernel { - +struct DispatchQueueEntry : public threading::AtomicListEntry { + std::function function; + DispatchQueueEntry(std::function fn) + : threading::AtomicListEntry(), function(std::move(fn)) {} +}; constexpr uint32_t kDeferredOverlappedDelayMillis = 100; // This is a global object initialized with the XboxkrnlModule. @@ -50,7 +54,6 @@ KernelState::KernelState(Emulator* emulator) : emulator_(emulator), memory_(emulator->memory()), dispatch_thread_running_(false), - dpc_list_(emulator->memory()), kernel_trampoline_group_(emulator->processor()->backend()) { assert_null(shared_kernel_state_); shared_kernel_state_ = this; @@ -60,8 +63,7 @@ KernelState::KernelState(Emulator* emulator) app_manager_ = std::make_unique(); achievement_manager_ = std::make_unique(); user_profiles_.emplace(0, std::make_unique(0)); - - InitializeKernelGuestGlobals(); + BootKernel(); auto content_root = emulator_->content_root(); if (!content_root.empty()) { @@ -69,9 +71,6 @@ KernelState::KernelState(Emulator* emulator) } content_manager_ = std::make_unique(this, content_root); - // Hardcoded maximum of 2048 TLS slots. - tls_bitmap_.Resize(2048); - auto hc_loc_heap = memory_->LookupHeap(strange_hardcoded_page_); bool fixed_alloc_worked = hc_loc_heap->AllocFixed( strange_hardcoded_page_, 65536, 0, @@ -88,7 +87,6 @@ KernelState::~KernelState() { if (dispatch_thread_running_) { dispatch_thread_running_ = false; - dispatch_cond_.notify_all(); dispatch_thread_->Wait(0, 0, 0, nullptr); } @@ -141,18 +139,57 @@ util::XdbfGameData KernelState::module_xdbf( return util::XdbfGameData(nullptr, resource_size); } -uint32_t KernelState::AllocateTLS() { return uint32_t(tls_bitmap_.Acquire()); } - -void KernelState::FreeTLS(uint32_t slot) { - const std::vector> threads = - object_table()->GetObjectsByType(); +uint32_t KernelState::AllocateTLS(cpu::ppc::PPCContext* context) { + auto tls_lock = &GetKernelGuestGlobals(context)->tls_lock; + auto old_irql = xboxkrnl::xeKeKfAcquireSpinLock(context, tls_lock); + int result = -1; + { + auto current_process = + context->TranslateVirtual(GetKThread(context)->process); + + for (xe::be* i = ¤t_process->tls_slot_bitmap[0]; + i < ¤t_process->tls_slot_bitmap[8]; ++i) { + uint32_t lowest_allocated_bit = xe::lzcnt(*i); + if (lowest_allocated_bit != 32) { + // todo: figure out what this pointer arith is doing + result = + ((8 * ((char*)i - (char*)current_process) - 384) & 0xFFFFFFE0) + + lowest_allocated_bit; + *i &= ~(1 << (31 - lowest_allocated_bit)); + break; + } + } + } + xboxkrnl::xeKeKfReleaseSpinLock(context, tls_lock, old_irql); + return result; +} - for (const object_ref& thread : threads) { - if (thread->is_guest_thread()) { - thread->SetTLSValue(slot, 0); +void KernelState::FreeTLS(cpu::ppc::PPCContext* context, uint32_t slot) { + auto current_process = + context->TranslateVirtual(GetKThread(context)->process); + + auto old_irql = xboxkrnl::xeKeKfAcquireSpinLock( + context, ¤t_process->thread_list_spinlock); + // zero out all the values in this slot in each thread of the current process + for (auto&& process_thread : + current_process->thread_list.IterateForward(context)) { + uint32_t tls_address = process_thread.tls_address; + if (tls_address) { + context->TranslateVirtualBE( + tls_address)[-static_cast(slot) - 1] = 0; } } - tls_bitmap_.Release(slot); + + // release spinlock, but keep the irql elevated + xboxkrnl::xeKeKfReleaseSpinLock( + context, ¤t_process->thread_list_spinlock, 0, false); + auto tls_lock = &GetKernelGuestGlobals(context)->tls_lock; + + xboxkrnl::xeKeKfAcquireSpinLock(context, tls_lock, false); + // set the free bit for this slot + current_process->tls_slot_bitmap[slot / 32] |= 1U << (31 - (slot % 32)); + // NOW we can lower the irql + xboxkrnl::xeKeKfReleaseSpinLock(context, tls_lock, old_irql); } void KernelState::RegisterTitleTerminateNotification(uint32_t routine, @@ -229,6 +266,11 @@ object_ref KernelState::GetKernelModule( return nullptr; } +// very slow! +void KernelState::XamCall(cpu::ppc::PPCContext* context, uint16_t ordinal) { + uint32_t address = this->GetModule("xam")->GetProcAddressByOrdinal(ordinal); + context->processor->Execute(context->thread_state(), address); +} object_ref KernelState::GetModule(const std::string_view name, bool user_only) { @@ -266,39 +308,53 @@ object_ref KernelState::GetModule(const std::string_view name, } return nullptr; } +struct LaunchInterrupt { + object_ref* module; + XThread* volatile thread; +}; + +void KernelState::LaunchModuleInterrupt(void* ud) { + LaunchInterrupt* launch = reinterpret_cast(ud); + auto kernel = kernel_state(); + kernel->SetExecutableModule(*launch->module); + kernel->CreateDispatchThread(); + launch->thread = + new XThread(kernel_state(), (*launch->module)->stack_size(), 0, + (*launch->module)->entry_point(), 0, 0x1000100, true, true); + + X_STATUS result = launch->thread->Create(); + // launch->thread->set_name("Main XThread"); + if (XFAILED(result)) { + XELOGE("Could not create launch thread: {:08X}", result); -object_ref KernelState::LaunchModule(object_ref module) { - if (!module->is_executable()) { - return nullptr; + delete launch->thread; + launch->thread = nullptr; } +} - SetExecutableModule(module); - XELOGI("KernelState: Launching module..."); - - // Create a thread to run in. - // We start suspended so we can run the debugger prep. - auto thread = object_ref( - new XThread(kernel_state(), module->stack_size(), 0, - module->entry_point(), 0, X_CREATE_SUSPENDED, true, true)); - - // We know this is the 'main thread'. - thread->set_name("Main XThread"); +LaunchInterrupt* volatile g_launchinterrupt = nullptr; - X_STATUS result = thread->Create(); - if (XFAILED(result)) { - XELOGE("Could not create launch thread: {:08X}", result); +void KernelState::CPU0WaitForLaunch(cpu::ppc::PPCContext* context) { + while (!g_launchinterrupt) { + threading::NanoSleep(1000 * 500); + } + LaunchModuleInterrupt((void*)g_launchinterrupt); +} +object_ref KernelState::LaunchModule(object_ref module) { + if (!module->is_executable()) { return nullptr; } - // Waits for a debugger client, if desired. - emulator()->processor()->PreLaunch(); - - // Resume the thread now. - // If the debugger has requested a suspend this will just decrement the - // suspend count without resuming it until the debugger wants. - thread->Resume(); + // this is pretty bad + LaunchInterrupt li; + li.module = &module; + li.thread = nullptr; - return thread; + g_launchinterrupt = &li; + while (g_launchinterrupt->thread == nullptr) { + threading::NanoSleep(10000); + } + return object_ref(li.thread); } object_ref KernelState::GetExecutableModule() { @@ -366,38 +422,36 @@ void KernelState::SetExecutableModule(object_ref module) { variable_ptr, executable_module_->path(), xboxkrnl::XboxkrnlModule::kExLoadedImageNameSize); } +} +void KernelState::CreateDispatchThread() { // Spin up deferred dispatch worker. - // TODO(benvanik): move someplace more appropriate (out of ctor, but around - // here). if (!dispatch_thread_running_) { dispatch_thread_running_ = true; - dispatch_thread_ = object_ref(new XHostThread( - this, 128 * 1024, 0, - [this]() { - // As we run guest callbacks the debugger must be able to suspend us. - dispatch_thread_->set_can_debugger_suspend(true); - - auto global_lock = global_critical_region_.AcquireDeferred(); + dispatch_thread_ = object_ref( + new XHostThread(this, 128 * 1024, XE_FLAG_AFFINITY_CPU2, [this]() { + auto context = cpu::ThreadState::GetContext(); while (dispatch_thread_running_) { - global_lock.lock(); - if (dispatch_queue_.empty()) { - dispatch_cond_.wait(global_lock); - if (!dispatch_thread_running_) { - global_lock.unlock(); - break; - } + context->CheckInterrupt(); + xboxkrnl::xeKeWaitForSingleObject( + context, + &context->kernel_state->GetKernelGuestGlobals(context) + ->dispatch_queue_event_.header, + 3, 0, false, nullptr); + DispatchQueueEntry* entry = + reinterpret_cast(dispatch_queue_.Pop()); + + if (!entry) { + xenia_assert(false); + continue; + } else { + entry->function(); + delete entry; } - auto fn = std::move(dispatch_queue_.front()); - dispatch_queue_.pop_front(); - global_lock.unlock(); - - fn(); } return 0; - }, - GetSystemProcess())); // don't think an equivalent exists on real hw - dispatch_thread_->set_name("Kernel Dispatch"); + })); // don't think an equivalent exists on real hw dispatch_thread_->Create(); + dispatch_thread_->set_name("Kernel Dispatch"); } } @@ -619,9 +673,6 @@ void KernelState::TerminateTitle() { // Unregister all notify listeners. notify_listeners_.clear(); - // Clear the TLS map. - tls_bitmap_.Reset(); - // Unset the executable module. executable_module_ = nullptr; @@ -841,22 +892,29 @@ void KernelState::CompleteOverlappedDeferredEx( ev.get()->Reset(); } } - auto global_lock = global_critical_region_.Acquire(); - dispatch_queue_.push_back([this, completion_callback, overlapped_ptr, - pre_callback, post_callback]() { - if (pre_callback) { - pre_callback(); - } - xe::threading::Sleep( - std::chrono::milliseconds(kDeferredOverlappedDelayMillis)); - uint32_t extended_error, length; - auto result = completion_callback(extended_error, length); - CompleteOverlappedEx(overlapped_ptr, result, extended_error, length); - if (post_callback) { - post_callback(); - } - }); - dispatch_cond_.notify_all(); + + DispatchQueueEntry* new_entry = + new DispatchQueueEntry([this, completion_callback, overlapped_ptr, + pre_callback, post_callback]() { + auto context = cpu::ThreadState::GetContext(); + context->CheckInterrupt(); + if (pre_callback) { + pre_callback(); + } + context->CheckInterrupt(); + uint32_t extended_error, length; + auto result = completion_callback(extended_error, length); + context->CheckInterrupt(); + CompleteOverlappedEx(overlapped_ptr, result, extended_error, length); + context->CheckInterrupt(); + if (post_callback) { + post_callback(); + } + }); + auto context = cpu::ThreadState::GetContext(); + dispatch_queue_.Push(new_entry); + xboxkrnl::xeKeSetEvent( + context, &GetKernelGuestGlobals(context)->dispatch_queue_event_, 1, 0); } bool KernelState::Save(ByteStream* stream) { @@ -866,13 +924,6 @@ bool KernelState::Save(ByteStream* stream) { // Save the object table object_table_.Save(stream); - // Write the TLS allocation bitmap - auto tls_bitmap = tls_bitmap_.data(); - stream->Write(uint32_t(tls_bitmap.size())); - for (size_t i = 0; i < tls_bitmap.size(); i++) { - stream->Write(tls_bitmap[i]); - } - // We save XThreads absolutely first, as they will execute code upon save // (which could modify the kernel state) auto threads = object_table_.GetObjectsByType(); @@ -929,53 +980,208 @@ bool KernelState::Save(ByteStream* stream) { return true; } -// this only gets triggered once per ms at most, so fields other than tick count -// will probably not be updated in a timely manner for guest code that uses them -void KernelState::UpdateKeTimestampBundle() { - X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle = - memory_->TranslateVirtual(ke_timestamp_bundle_ptr_); - uint32_t uptime_ms = Clock::QueryGuestUptimeMillis(); - xe::store_and_swap(&lpKeTimeStampBundle->interrupt_time, - Clock::QueryGuestInterruptTime()); - xe::store_and_swap(&lpKeTimeStampBundle->system_time, - Clock::QueryGuestSystemTime()); - xe::store_and_swap(&lpKeTimeStampBundle->tick_count, uptime_ms); +cpu::HWThread* KernelState::HWThreadFor(PPCContext* context) { + return context->processor->GetCPUThread( + context->kernel_state->GetPCRCpuNum(GetKPCR(context))); } -uint32_t KernelState::GetKeTimestampBundle() { - XE_LIKELY_IF(ke_timestamp_bundle_ptr_) { return ke_timestamp_bundle_ptr_; } - else { - global_critical_region::PrepareToAcquire(); - return CreateKeTimestampBundle(); +// length of a guest timer tick is normally 1 millisecond +void KernelState::SystemClockInterrupt() { + // todo: set interrupt priority, irql + auto context = cpu::ThreadState::GetContext(); + + auto kpcr = GetKPCR(context); + + auto cpu_num = GetPCRCpuNum(kpcr); + + auto ic = context->kernel_state->InterruptControllerFromPCR(context, kpcr); + + uint32_t old_irql = kpcr->current_irql; + kpcr->current_irql = IRQL_CLOCK; + ic->WriteRegisterOffset(0x8, IRQL_CLOCK); + + // only cpu 0 updates timestamp bundle + timers + if (cpu_num == 0) { + auto globals = GetKernelGuestGlobals(context); + X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle = &globals->KeTimestampBundle; + + uint64_t time_imprecise = (lpKeTimeStampBundle->interrupt_time += 10000ULL); + lpKeTimeStampBundle->system_time += 10000ULL; + lpKeTimeStampBundle->tick_count += 1; + + /* + check timers! + */ + + /* + on real hw, how does the kernel guarantee that no other thread is + writing the timers at this point? this lock acquire is a hack + */ + xboxkrnl::xeKeKfAcquireSpinLock(context, &globals->timer_table_spinlock, + false); + + if (!kpcr->timer_pending && !globals->running_timers.empty(context)) { + for (auto& timer : globals->running_timers.IterateForward(context)) { + if (&timer != nullptr) { + if (timer.due_time <= time_imprecise) { + kpcr->timer_pending = + IRQL_DISPATCH; // actual clock interrupt does a lot more + kpcr->generic_software_interrupt = IRQL_DISPATCH; + break; + } + } + } + } + xboxkrnl::xeKeKfReleaseSpinLock(context, &globals->timer_table_spinlock, 0, + false); } -} -XE_NOINLINE -XE_COLD -uint32_t KernelState::CreateKeTimestampBundle() { - auto crit = global_critical_region::Acquire(); + auto current_thread = + context->TranslateVirtual(kpcr->prcb_data.current_thread); + + auto idle_thread = &reinterpret_cast(kpcr)->idle_process_thread; + if (idle_thread != current_thread) { + auto quantum_decremented = current_thread->quantum - 3; + current_thread->quantum = quantum_decremented; + if (quantum_decremented <= 0) { + kpcr->timeslice_ended = IRQL_DISPATCH; + kpcr->generic_software_interrupt = IRQL_DISPATCH; + } + } - uint32_t pKeTimeStampBundle = - memory_->SystemHeapAlloc(sizeof(X_TIME_STAMP_BUNDLE)); - X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle = - memory_->TranslateVirtual(pKeTimeStampBundle); + ic->WriteRegisterOffset(0x8, old_irql); + kpcr->current_irql = old_irql; +} +void KernelState::GenericExternalInterruptEpilog(cpu::ppc::PPCContext* context, + uint32_t r3) { + auto kpcr = GetKPCR(context); + uint32_t r4 = kpcr->software_interrupt_state; + if (r3 < r4) { + xboxkrnl::xeDispatchProcedureCallInterrupt(r3, r4, context); + } else { + kpcr->current_irql = r3; + } +} - xe::store_and_swap(&lpKeTimeStampBundle->interrupt_time, - Clock::QueryGuestInterruptTime()); +void KernelState::TriggerTrueExternalInterrupt(cpu::ppc::PPCContext* context) { + auto kpcr = GetKPCR(context); + auto ic = context->kernel_state->InterruptControllerFromPCR(context, kpcr); + + uint32_t r7 = kpcr->use_alternative_stack; + uint32_t r4 = kpcr->current_irql; + uint32_t r1 = (uint32_t)context->r[1]; + uint8_t* r1_ptr = context->TranslateVirtual(r1); + uint32_t r3 = r1; + uint32_t r9; + uint32_t CTR; + uint32_t r0; + bool cr3; + auto r5 = ic; + + bool cr2 = r7 == 0; + if (!cr2) { + goto loc_8009BC44; + } - xe::store_and_swap(&lpKeTimeStampBundle->system_time, - Clock::QueryGuestSystemTime()); + uint32_t r8 = kpcr->alt_stack_base_ptr; + r1_ptr[0x150] = r4; + cr3 = r4 > 1; + if (cr3) { + goto loc_8009BC30; + } + r4 = 2; + kpcr->current_irql = 2; +loc_8009BC30: + kpcr->use_alternative_stack = r1; + r8 -= 0x140; + r8 = r1 - r8; + r9 = r1 + 0x700; + // stwux + store_and_swap(context->TranslateVirtual(r1 + r8), r9); +loc_8009BC44: + uint32_t r6 = (uint32_t)ic->ReadRegisterOffset(0x50); + r6 += offsetof(X_KPCR, interrupt_handlers); + + r6 = *reinterpret_cast*>(reinterpret_cast(kpcr) + r6); + r8 = r6 & 1; + if (r8 != 0) { + goto handle_kinterrupt_external; + } + CTR = r6; + context->r[3] = r3; + context->r[4] = r4; + context->r[5] = r5->GuestMMIOAddress(); + // bctr + context->processor->ExecuteRaw(context->thread_state(), CTR); + + r1 = r3; + context->r[1] = r1; +loc_8009BC68: + r8 = -1; + + if (!cr2) { + goto loc_8009BC98; + } + r3 = r1_ptr[0x150]; + r4 = kpcr->software_interrupt_state; + r0 = 0; + kpcr->use_alternative_stack = r0; - xe::store_and_swap(&lpKeTimeStampBundle->tick_count, - Clock::QueryGuestUptimeMillis()); + if (cr3) { + goto loc_8009BC98; + } + /* + cmplw r3, r4 + bge+ loc_8009BC94 + */ - xe::store_and_swap(&lpKeTimeStampBundle->padding, 0); + if (r3 >= r4) { + goto loc_8009BC94; + } + xboxkrnl::xeDispatchProcedureCallInterrupt(r3, r4, context); + goto loc_8009BC98; + +loc_8009BC94: + kpcr->current_irql = r3; + +loc_8009BC98:; + +handle_kinterrupt_external: + r6 &= ~3; + auto kinterrupt = context->TranslateVirtual(r6); + r7 = kinterrupt->irql; + r8 = kinterrupt->service_routine; + store_and_swap(context->TranslateVirtual(r3 + 0x154), r6); + *context->TranslateVirtual(r3 + 0x158) = r4; + r4 = kinterrupt->service_context; + auto r9_spin = &kinterrupt->spinlock; + kpcr->current_irql = r7; + r5->WriteRegisterOffset(0x8, r7); + + r7 = (uint32_t)r5->ReadRegisterOffset(0x8); + context->EnableEI(); + + xboxkrnl::xeKeKfAcquireSpinLock(context, r9_spin, false); + CTR = r8; + r3 = r6; + context->processor->ExecuteRaw(context->thread_state(), CTR); + r7 = *context->TranslateVirtualBE(r1); + r0 = 0; + r1 = r7 - 0x700; + r6 = load_and_swap(context->TranslateVirtual(r1 + 0x154)); + r4 = *context->TranslateVirtual(r3 + 0x158); + r9 = r6 + 8; + r9_spin->pcr_of_owner.value = 0; + context->DisableEI(); + kpcr->current_irql = r4; + r5->WriteRegisterOffset(0x68, r4); + r4 = (uint32_t)r5->ReadRegisterOffset(0x8); + goto loc_8009BC68; +} - timestamp_timer_ = xe::threading::HighResolutionTimer::CreateRepeating( - std::chrono::milliseconds(1), - [this]() { this->UpdateKeTimestampBundle(); }); - ke_timestamp_bundle_ptr_ = pKeTimeStampBundle; - return pKeTimeStampBundle; +uint32_t KernelState::GetKeTimestampBundle() { + return this->GetKernelGuestGlobals() + + offsetof(KernelGuestGlobals, KeTimestampBundle); } bool KernelState::Restore(ByteStream* stream) { @@ -989,11 +1195,6 @@ bool KernelState::Restore(ByteStream* stream) { // Read the TLS allocation bitmap auto num_bitmap_entries = stream->Read(); - auto& tls_bitmap = tls_bitmap_.data(); - tls_bitmap.resize(num_bitmap_entries); - for (uint32_t i = 0; i < num_bitmap_entries; i++) { - tls_bitmap[i] = stream->Read(); - } uint32_t num_threads = stream->Read(); XELOGD("Loading {} threads...", num_threads); @@ -1029,63 +1230,154 @@ uint8_t KernelState::GetConnectedUsers() const { return input_sys->GetConnectedSlots(); } -// todo: definitely need to do more to pretend to be in a dpc -void KernelState::BeginDPCImpersonation(cpu::ppc::PPCContext* context, - DPCImpersonationScope& scope) { - auto kpcr = context->TranslateVirtualGPR(context->r[13]); - xenia_assert(kpcr->prcb_data.dpc_active == 0); - scope.previous_irql_ = kpcr->current_irql; - - kpcr->current_irql = 2; - kpcr->prcb_data.dpc_active = 1; -} -void KernelState::EndDPCImpersonation(cpu::ppc::PPCContext* context, - DPCImpersonationScope& end_scope) { - auto kpcr = context->TranslateVirtualGPR(context->r[13]); - xenia_assert(kpcr->prcb_data.dpc_active == 1); - kpcr->current_irql = end_scope.previous_irql_; - kpcr->prcb_data.dpc_active = 0; -} -void KernelState::EmulateCPInterruptDPC(uint32_t interrupt_callback, - uint32_t interrupt_callback_data, - uint32_t source, uint32_t cpu) { - if (!interrupt_callback) { - return; +struct IPIParams { + cpu::Processor* processor_; + uint32_t source_; + uint32_t interrupt_callback_data_; + uint32_t interrupt_callback_; +}; +void KernelState::GraphicsInterruptDPC(PPCContext* context) { + uint32_t callback = static_cast(context->r[5]); + uint64_t callback_data[] = {context->r[4], context->r[6]}; + auto kpcr = GetKPCR(context); + // xenia_assert(kpcr->processtype_value_in_dpc == X_PROCTYPE_IDLE); + xenia_assert(kpcr->prcb_data.dpc_active != 0); + xenia_assert(context->msr == 0x9030); + + xenia_assert(context->kernel_state->GetPCRCpuNum(kpcr) == 2 || + callback_data[0] == 1); + if (callback) { + xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_TITLE, context); + context->processor->Execute(context->thread_state(), callback, + callback_data, countof(callback_data), true); + // xenia_assert(GetKPCR(context)->prcb_data.dpc_active != 0); + xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_IDLE, context); + } + // from markvblank + if (callback_data[0] == 0) { + xboxkrnl::xeKeEnterBackgroundMode(context); } +} - auto thread = kernel::XThread::GetCurrentThread(); - assert_not_null(thread); +void KernelState::CPInterruptIPI(void* ud) { + auto current_ts = cpu::ThreadState::Get(); + auto current_context = current_ts->context(); + auto pcr = + current_context->TranslateVirtualGPR(current_context->r[13]); + auto ic = current_context->kernel_state->InterruptControllerFromPCR( + current_context, pcr); + // 88 is level for vsync interrupt, 84 is level for cp interrupt + IPIParams* params = reinterpret_cast(ud); + + uint32_t old_irql = pcr->current_irql; + pcr->current_irql = params->interrupt_callback_data_ == 0 ? 88 : 84; + ic->WriteRegisterOffset(8, pcr->current_irql); + + auto kernel_state = current_context->kernel_state; + + auto kthread = + current_context->TranslateVirtual(pcr->prcb_data.current_thread); + + auto guest_globals = kernel_state->GetKernelGuestGlobals(current_context); + + auto dpc_to_use = + params->interrupt_callback_data_ == 0 + ? &guest_globals->graphics_interrupt_dpc + : &guest_globals + ->command_processor_interrupt_dpcs[pcr->prcb_data.current_cpu]; + + // in real xboxkrnl, it passes 0 for both args to the dpc, + // but its more convenient for us to pass the interrupt + dpc_to_use->context = params->source_; + xboxkrnl::xeKeInsertQueueDpc(dpc_to_use, params->interrupt_callback_, + params->interrupt_callback_data_, + current_context); + + delete params; + + ic->WriteRegisterOffset(8, old_irql); + KernelState::HWThreadFor(current_context)->interrupt_controller()->SetEOI(1); + pcr->current_irql = old_irql; +} + +void KernelState::EmulateCPInterrupt(uint32_t interrupt_callback, + uint32_t interrupt_callback_data, + uint32_t source, uint32_t cpu) { + // auto thread = kernel::XThread::GetCurrentThread(); + // assert_not_null(thread); // Pick a CPU, if needed. We're going to guess 2. Because. if (cpu == 0xFFFFFFFF) { cpu = 2; } - thread->SetActiveCpu(cpu); - /* in reality, our interrupt is a callback that is called in a dpc which is scheduled by the actual interrupt we need to impersonate a dpc */ - auto current_context = thread->thread_state()->context(); - auto kthread = memory()->TranslateVirtual(thread->guest_object()); - auto pcr = memory()->TranslateVirtual(thread->pcr_ptr()); + IPIParams* params = new IPIParams(); + params->processor_ = processor(); + params->source_ = source; + params->interrupt_callback_ = interrupt_callback; + params->interrupt_callback_data_ = interrupt_callback_data; + auto hwthread = processor_->GetCPUThread(cpu); + // while (!hwthread->TrySendInterruptFromHost(CPInterruptIPI, params)) { + // } + cpu::SendInterruptArguments interrupt_args{}; + interrupt_args.ipi_func = CPInterruptIPI; + interrupt_args.ud = params; + interrupt_args.wait_done = source != 0; + interrupt_args.irql_ = source == 0 ? 88 : 84; + hwthread->TrySendInterruptFromHost( + interrupt_args); // do not block if we're the vsync interrupt and on cpu 2! + // we will freeze +} + +X_KSPINLOCK* KernelState::GetDispatcherLock(cpu::ppc::PPCContext* context) { + return &context + ->TranslateVirtual(GetKernelGuestGlobals()) + ->dispatcher_lock; +} + +uint32_t KernelState::LockDispatcher(cpu::ppc::PPCContext* context) { + return xboxkrnl::xeKeKfAcquireSpinLock(context, GetDispatcherLock(context), + true); +} - DPCImpersonationScope dpc_scope{}; - BeginDPCImpersonation(current_context, dpc_scope); +void KernelState::UnlockDispatcher(cpu::ppc::PPCContext* context, + uint32_t irql) { + xboxkrnl::xeKeKfReleaseSpinLock(context, GetDispatcherLock(context), irql, + true); +} - // todo: check VdGlobalXamDevice here. if VdGlobalXamDevice is nonzero, should - // set X_PROCTYPE_SYSTEM - xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_TITLE, current_context); +void KernelState::LockDispatcherAtIrql(cpu::ppc::PPCContext* context) { + xboxkrnl::xeKeKfAcquireSpinLock(context, GetDispatcherLock(context), false); +} - uint64_t args[] = {source, interrupt_callback_data}; - processor_->Execute(thread->thread_state(), interrupt_callback, args, - xe::countof(args)); - xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_IDLE, current_context); +void KernelState::UnlockDispatcherAtIrql(cpu::ppc::PPCContext* context) { + xboxkrnl::xeKeKfReleaseSpinLock(context, GetDispatcherLock(context), 0, + false); +} + +uint32_t KernelState::ReferenceObjectByHandle(cpu::ppc::PPCContext* context, + uint32_t handle, + uint32_t guest_object_type, + uint32_t* object_out) { + return xboxkrnl::xeObReferenceObjectByHandle(handle, guest_object_type, + object_out); +} +void KernelState::DereferenceObject(cpu::ppc::PPCContext* context, + uint32_t object) { + xboxkrnl::xeObDereferenceObject(context, object); +} - EndDPCImpersonation(current_context, dpc_scope); +void KernelState::AssertDispatcherLocked(cpu::ppc::PPCContext* context) { + xenia_assert( + context->TranslateVirtual(GetKernelGuestGlobals()) + ->dispatcher_lock.pcr_of_owner == + static_cast(context->r[13])); } void KernelState::UpdateUsedUserProfiles() { @@ -1106,236 +1398,259 @@ void KernelState::UpdateUsedUserProfiles() { } } -void KernelState::InitializeProcess(X_KPROCESS* process, uint32_t type, - char unk_18, char unk_19, char unk_1A) { - uint32_t guest_kprocess = memory()->HostToGuestVirtual(process); +uint32_t KernelState::AllocateInternalHandle(void* ud) { + std::unique_lock lock{this->internal_handle_table_mutex_}; - uint32_t thread_list_guest_ptr = - guest_kprocess + offsetof(X_KPROCESS, thread_list); + uint32_t new_id = current_internal_handle_++; - process->unk_18 = unk_18; - process->unk_19 = unk_19; - process->unk_1A = unk_1A; - util::XeInitializeListHead(&process->thread_list, thread_list_guest_ptr); - process->unk_0C = 60; - // doubt any guest code uses this ptr, which i think probably has something to - // do with the page table - process->clrdataa_masked_ptr = 0; - // clrdataa_ & ~(1U << 31); - process->thread_count = 0; - process->unk_1B = 0x06; - process->kernel_stack_size = 16 * 1024; - process->tls_slot_size = 0x80; - - process->process_type = type; - uint32_t unk_list_guest_ptr = guest_kprocess + offsetof(X_KPROCESS, unk_54); - // TODO(benvanik): figure out what this list is. - util::XeInitializeListHead(&process->unk_54, unk_list_guest_ptr); + while (internal_handles_.count(new_id) == 1) { + new_id = current_internal_handle_++; + } + internal_handles_[new_id] = ud; + return new_id; +} +void* KernelState::_FreeInternalHandle(uint32_t id) { + std::unique_lock lock{this->internal_handle_table_mutex_}; + auto iter = internal_handles_.find(id); + xenia_assert(iter != internal_handles_.end()); + auto result = iter->second; + internal_handles_.erase(iter); + return result; +} +X_KPCR_PAGE* KernelState::KPCRPageForCpuNumber(uint32_t i) { + return memory()->TranslateVirtual(processor()->GetPCRForCPU(i)); } -void KernelState::SetProcessTLSVars(X_KPROCESS* process, int num_slots, - int tls_data_size, - int tls_static_data_address) { - uint32_t slots_padded = (num_slots + 3) & 0xFFFFFFFC; - process->tls_data_size = tls_data_size; - process->tls_raw_data_size = tls_data_size; - process->tls_static_data_address = tls_static_data_address; - process->tls_slot_size = 4 * slots_padded; - uint32_t count_div32 = slots_padded / 32; - for (unsigned word_index = 0; word_index < count_div32; ++word_index) { - process->bitmap[word_index] = -1; +X_STATUS KernelState::ContextSwitch(PPCContext* context, X_KTHREAD* guest, + bool from_idle_loop) { + // todo: disable interrupts here! + // this is incomplete + auto pre_swap = [this, context, guest]() { + auto kpcr = GetKPCR(); + + guest->thread_state = KTHREAD_STATE_RUNNING; + auto stkbase = guest->stack_base; + auto stklim = guest->stack_limit; + // it sets r1 to this? i dont think we need, because we have different + // contexts + auto kstask = guest->stack_kernel; + + auto thrd_tls = guest->tls_address; + + uint64_t old_msr = context->msr; + context->DisableEI(); + + kpcr->stack_base_ptr = stkbase; + kpcr->stack_end_ptr = stklim; + kpcr->tls_ptr = thrd_tls; + + guest->num_context_switches_to += 1; + xenia_assert(kpcr->prcb_data.enqueued_processor_threads_lock.pcr_of_owner == + context->HostToGuestVirtual(kpcr)); + context->msr = old_msr; + kpcr->prcb_data.enqueued_processor_threads_lock.pcr_of_owner = 0; + kpcr->apc_software_interrupt_state = + guest->deferred_apc_software_interrupt_state; + }; + + xenia_assert(GetKPCR(context)->prcb_data.current_thread.xlat() == guest); + + auto old_kpcr = GetKPCR(context); + auto xthrd = XThread::FromGuest(guest); + + if (!xthrd) { + xenia_assert(GetKPCR(context)->prcb_data.idle_thread.xlat() == guest); + // if theres no host object for this guest thread, its definitely the idle + // thread for this processor + xenia_assert(guest->process_type == X_PROCTYPE_IDLE && + guest->process_type_dup == X_PROCTYPE_IDLE && + guest->process == GetIdleProcess()); + auto prcb = context->TranslateVirtual(guest->a_prcb_ptr); + + xenia_assert(prcb == &GetKPCR(context)->prcb_data); + + auto hw_thread = processor()->GetCPUThread(prcb->current_cpu); + pre_swap(); + XThread::SetCurrentThread(nullptr); + GetKPCR(context)->prcb_data.current_thread = guest; + GetKPCR(context)->use_alternative_stack = 1; + hw_thread->YieldToScheduler(); + } else { + pre_swap(); + + // wait, what if we're switching threads because we changed the affinity of + // the current thread? would that break this? + xthrd->thread_state()->context()->r[13] = context->r[13]; + GetKPCR(context)->use_alternative_stack = 0; + xthrd->SwitchToDirect(); + } + if (GetKPCR(context) != old_kpcr) { + XELOGE("Thread was switched from one HW thread to another."); + } + xenia_assert(GetKPCR(context)->prcb_data.current_cpu == + GetKThread(context)->current_cpu); + // XThread::SetCurrentThread(saved_currthread); + + // r31 after the swap = our thread + if (!from_idle_loop) { + X_KTHREAD* thread_to_load_from = GetKThread(context); + xenia_assert(thread_to_load_from != guest); + uint32_t r3 = thread_to_load_from->wait_irql; + auto wait_result = thread_to_load_from->wait_result; + GetKPCR(context)->current_irql = r3; + uint32_t intstate = GetKPCR(context)->software_interrupt_state; + if (r3 < intstate) { + xboxkrnl::xeDispatchProcedureCallInterrupt(r3, intstate, context); + } + return wait_result; } + return 0; +} +cpu::XenonInterruptController* KernelState::InterruptControllerFromPCR( + cpu::ppc::PPCContext* context, X_KPCR* pcr) { + uint32_t cpunum = kernel_state()->GetPCRCpuNum(pcr); + auto hwthread = processor()->GetCPUThread(cpunum); + return hwthread->interrupt_controller(); +} - // set remainder of bitset - if (((num_slots + 3) & 0x1C) != 0) - process->bitmap[count_div32] = -1 << (32 - ((num_slots + 3) & 0x1C)); +void KernelState::SetCurrentInterruptPriority(cpu::ppc::PPCContext* context, + X_KPCR* pcr, uint32_t priority) { + auto ic = kernel_state()->InterruptControllerFromPCR(context, pcr); + ic->WriteRegisterOffset(8, static_cast(priority)); + uint64_t ack = ic->ReadRegisterOffset(8); } -void AllocateThread(PPCContext* context) { - uint32_t thread_mem_size = static_cast(context->r[3]); - uint32_t a2 = static_cast(context->r[4]); - uint32_t a3 = static_cast(context->r[5]); - if (thread_mem_size <= 0xFD8) thread_mem_size += 8; - uint32_t result = - xboxkrnl::xeAllocatePoolTypeWithTag(context, thread_mem_size, a2, a3); - if (((unsigned short)result & 0xFFF) != 0) { - result += 2; - } +uint32_t KernelState::GetKernelTickCount() { + return memory() + ->TranslateVirtual(GetKeTimestampBundle()) + ->tick_count; +} +uint64_t KernelState::GetKernelSystemTime() { + return memory() + ->TranslateVirtual(GetKeTimestampBundle()) + ->system_time; +} +uint64_t KernelState::GetKernelInterruptTime() { + return memory() + ->TranslateVirtual(GetKeTimestampBundle()) + ->interrupt_time; +} +void KernelState::KernelIdleProcessFunction(cpu::ppc::PPCContext* context) { + context->kernel_state = kernel_state(); + auto kpcr = GetKPCR(context); + auto kthread = GetKThread(context); + auto cpu_thread = context->processor->GetCPUThread( + context->kernel_state->GetPCRCpuNum(kpcr)); + auto interrupt_controller = cpu_thread->interrupt_controller(); + + // cpus 0 and 2 both have some very high priority timing related tasks to do + // (clock interrupt, gpu interrupt) + uint64_t microsleep_interval = kernel_state()->GetPCRCpuNum(kpcr) == 0 || + kernel_state()->GetPCRCpuNum(kpcr) == 2 + ? 20 + : 200; + while (true) { + kpcr->prcb_data.running_idle_thread = kpcr->prcb_data.idle_thread; + while (!kpcr->generic_software_interrupt) { + Clock::QpcParams current_qpc_params = Clock::GetQpcParams(); + auto current_quick_timestamp = Clock::QueryQuickCounter(); + xenia_assert(current_qpc_params == + interrupt_controller->last_qpc_params_); + xenia_assert(context->ExternalInterruptsEnabled()); + + xenia_assert(context->processor + ->GetCPUThread(context->kernel_state->GetPCRCpuNum(kpcr)) + ->interrupt_controller() + ->GetEOI() == 1); + xenia_assert(GetKThread(context) == kthread); + xenia_assert(kpcr->current_irql == IRQL_DISPATCH); + //if we don't execute any interrupt functions, and we don't have a software interrupt available, go to sleep + if (!context->CheckInterrupt()) { + if (!kpcr->generic_software_interrupt) { + // todo: check whether a timed interrupt would be missed due to wait + auto cpu_thread = context->processor->GetCPUThread( + context->kernel_state->GetPCRCpuNum(kpcr)); + + uint64_t appropriate_sleep = + interrupt_controller->ClampSleepMicrosecondsForTimedInterrupt( + microsleep_interval); + + //if less than 5 microseconds just skip sleeping + if (appropriate_sleep > 5) { + cpu_thread->IdleSleep(appropriate_sleep*1000ULL); + } + } + } + } - context->r[3] = static_cast(result); + /* + it doesnt call this function in normal kernel, but the code just looks to + be it inlined + pass true so that the function does not reinstert the idle thread into the + ready list + */ + xboxkrnl::xeHandleDPCsAndThreadSwapping(context, true); + } } -void FreeThread(PPCContext* context) { - uint32_t thread_memory = static_cast(context->r[3]); - if ((thread_memory & 0xFFF) != 0) { - thread_memory -= 8; + +void KernelState::KernelDecrementerInterrupt(void* ud) { + auto context = cpu::ThreadState::GetContext(); + auto kpcr = GetKPCR(context); + uint32_t r5 = kpcr->background_scheduling_active; + uint32_t r3 = kpcr->current_irql; + uint32_t r6 = 0x7FFFFFFF; + uint32_t r7 = 2; + auto cpu = context->processor->GetCPUThread( + context->kernel_state->GetPCRCpuNum(kpcr)); + cpu->SetDecrementerTicks(r6); + if (r5 == 0) { + return; + } + kpcr->generic_software_interrupt = r7; + kpcr->background_scheduling_1B = r7; + kpcr->timeslice_ended = r7; + uint32_t r4 = kpcr->software_interrupt_state; + if (r3 < IRQL_DISPATCH && r3 < r4) { + xboxkrnl::xeDispatchProcedureCallInterrupt(r3, r4, context); } - xboxkrnl::xeFreePool(context, thread_memory); } -void SimpleForwardAllocatePoolTypeWithTag(PPCContext* context) { - uint32_t a1 = static_cast(context->r[3]); - uint32_t a2 = static_cast(context->r[4]); - uint32_t a3 = static_cast(context->r[5]); - context->r[3] = static_cast( - xboxkrnl::xeAllocatePoolTypeWithTag(context, a1, a2, a3)); -} -void SimpleForwardFreePool(PPCContext* context) { - xboxkrnl::xeFreePool(context, static_cast(context->r[3])); -} - -void DeleteMutant(PPCContext* context) { - // todo: this should call kereleasemutant with some specific args - - xe::FatalError("DeleteMutant - need KeReleaseMutant(mutant, 1, 1, 0) "); -} -void DeleteTimer(PPCContext* context) { - // todo: this should call KeCancelTimer - xe::FatalError("DeleteTimer - need KeCancelTimer(mutant, 1, 1, 0) "); -} - -void DeleteIoCompletion(PPCContext* context) {} - -void UnknownProcIoDevice(PPCContext* context) {} - -void CloseFileProc(PPCContext* context) {} - -void DeleteFileProc(PPCContext* context) {} - -void UnknownFileProc(PPCContext* context) {} - -void DeleteSymlink(PPCContext* context) { - X_KSYMLINK* lnk = context->TranslateVirtualGPR(context->r[3]); - - context->r[3] = lnk->refed_object_maybe; - xboxkrnl::xeObDereferenceObject(context, lnk->refed_object_maybe); -} -void KernelState::InitializeKernelGuestGlobals() { - kernel_guest_globals_ = memory_->SystemHeapAlloc(sizeof(KernelGuestGlobals)); - - KernelGuestGlobals* block = - memory_->TranslateVirtual(kernel_guest_globals_); - memset(block, 0, sizeof(block)); - - auto idle_process = memory()->TranslateVirtual(GetIdleProcess()); - InitializeProcess(idle_process, X_PROCTYPE_IDLE, 0, 0, 0); - idle_process->unk_0C = 0x7F; - auto system_process = - memory()->TranslateVirtual(GetSystemProcess()); - InitializeProcess(system_process, X_PROCTYPE_SYSTEM, 2, 5, 9); - SetProcessTLSVars(system_process, 32, 0, 0); - - uint32_t oddobject_offset = - kernel_guest_globals_ + offsetof(KernelGuestGlobals, OddObj); - - // init unknown object - - block->OddObj.field0 = 0x1000000; - block->OddObj.field4 = 1; - block->OddObj.points_to_self = - oddobject_offset + offsetof(X_UNKNOWN_TYPE_REFED, points_to_self); - block->OddObj.points_to_prior = block->OddObj.points_to_self; - - // init thread object - block->ExThreadObjectType.pool_tag = 0x65726854; - block->ExThreadObjectType.allocate_proc = - kernel_trampoline_group_.NewLongtermTrampoline(AllocateThread); - - block->ExThreadObjectType.free_proc = - kernel_trampoline_group_.NewLongtermTrampoline(FreeThread); - - // several object types just call freepool/allocatepool - uint32_t trampoline_allocatepool = - kernel_trampoline_group_.NewLongtermTrampoline( - SimpleForwardAllocatePoolTypeWithTag); - uint32_t trampoline_freepool = - kernel_trampoline_group_.NewLongtermTrampoline(SimpleForwardFreePool); - - // init event object - block->ExEventObjectType.pool_tag = 0x76657645; - block->ExEventObjectType.allocate_proc = trampoline_allocatepool; - block->ExEventObjectType.free_proc = trampoline_freepool; - - // init mutant object - block->ExMutantObjectType.pool_tag = 0x6174754D; - block->ExMutantObjectType.allocate_proc = trampoline_allocatepool; - block->ExMutantObjectType.free_proc = trampoline_freepool; - - block->ExMutantObjectType.delete_proc = - kernel_trampoline_group_.NewLongtermTrampoline(DeleteMutant); - // init semaphore obj - block->ExSemaphoreObjectType.pool_tag = 0x616D6553; - block->ExSemaphoreObjectType.allocate_proc = trampoline_allocatepool; - block->ExSemaphoreObjectType.free_proc = trampoline_freepool; - // init timer obj - block->ExTimerObjectType.pool_tag = 0x656D6954; - block->ExTimerObjectType.allocate_proc = trampoline_allocatepool; - block->ExTimerObjectType.free_proc = trampoline_freepool; - block->ExTimerObjectType.delete_proc = - kernel_trampoline_group_.NewLongtermTrampoline(DeleteTimer); - // iocompletion object - block->IoCompletionObjectType.pool_tag = 0x706D6F43; - block->IoCompletionObjectType.allocate_proc = trampoline_allocatepool; - block->IoCompletionObjectType.free_proc = trampoline_freepool; - block->IoCompletionObjectType.delete_proc = - kernel_trampoline_group_.NewLongtermTrampoline(DeleteIoCompletion); - block->IoCompletionObjectType.unknown_size_or_object_ = oddobject_offset; - - // iodevice object - block->IoDeviceObjectType.pool_tag = 0x69766544; - block->IoDeviceObjectType.allocate_proc = trampoline_allocatepool; - block->IoDeviceObjectType.free_proc = trampoline_freepool; - block->IoDeviceObjectType.unknown_size_or_object_ = oddobject_offset; - block->IoDeviceObjectType.unknown_proc = - kernel_trampoline_group_.NewLongtermTrampoline(UnknownProcIoDevice); - - // file object - block->IoFileObjectType.pool_tag = 0x656C6946; - block->IoFileObjectType.allocate_proc = trampoline_allocatepool; - block->IoFileObjectType.free_proc = trampoline_freepool; - block->IoFileObjectType.unknown_size_or_object_ = - 0x38; // sizeof fileobject, i believe - block->IoFileObjectType.close_proc = - kernel_trampoline_group_.NewLongtermTrampoline(CloseFileProc); - block->IoFileObjectType.delete_proc = - kernel_trampoline_group_.NewLongtermTrampoline(DeleteFileProc); - block->IoFileObjectType.unknown_proc = - kernel_trampoline_group_.NewLongtermTrampoline(UnknownFileProc); - - // directory object - block->ObDirectoryObjectType.pool_tag = 0x65726944; - block->ObDirectoryObjectType.allocate_proc = trampoline_allocatepool; - block->ObDirectoryObjectType.free_proc = trampoline_freepool; - block->ObDirectoryObjectType.unknown_size_or_object_ = oddobject_offset; - - // symlink object - block->ObSymbolicLinkObjectType.pool_tag = 0x626D7953; - block->ObSymbolicLinkObjectType.allocate_proc = trampoline_allocatepool; - block->ObSymbolicLinkObjectType.free_proc = trampoline_freepool; - block->ObSymbolicLinkObjectType.unknown_size_or_object_ = oddobject_offset; - block->ObSymbolicLinkObjectType.delete_proc = - kernel_trampoline_group_.NewLongtermTrampoline(DeleteSymlink); - -#define offsetof32(s, m) static_cast( offsetof(s, m) ) - - host_object_type_enum_to_guest_object_type_ptr_ = { - {XObject::Type::Event, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, ExEventObjectType)}, - {XObject::Type::Semaphore, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, ExSemaphoreObjectType)}, - {XObject::Type::Thread, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, ExThreadObjectType)}, - {XObject::Type::File, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, IoFileObjectType)}, - {XObject::Type::Mutant, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, ExMutantObjectType)}, - {XObject::Type::Device, - kernel_guest_globals_ + - offsetof32(KernelGuestGlobals, IoDeviceObjectType)}}; - xboxkrnl::xeKeSetEvent(&block->UsbdBootEnumerationDoneEvent, 1, 0); +KernelGuestGlobals* KernelState::GetKernelGuestGlobals( + cpu::ppc::PPCContext* context) { + return context->TranslateVirtual( + GetKernelGuestGlobals()); +} + +void KernelState::AudioInterruptDPC(cpu::ppc::PPCContext* context) { + xboxkrnl::xeKeSetEvent(context, + &context->kernel_state->GetKernelGuestGlobals(context) + ->audio_interrupt_dpc_event_, + 1, 0); +} +// this executes at IRQL_AUDIO +void KernelState::AudioInterrupt(void* v) { + auto context = cpu::ThreadState::GetContext(); + auto kpcr = GetKPCR(context); + auto ic = context->kernel_state->InterruptControllerFromPCR(context, kpcr); + auto old_irql = kpcr->current_irql; + kpcr->current_irql = IRQL_AUDIO; + ic->WriteRegisterOffset(8, IRQL_AUDIO); + + xboxkrnl::xeKeInsertQueueDpc( + &context->kernel_state->GetKernelGuestGlobals(context) + ->audio_interrupt_dpc_, + 0, 0, context); + + kpcr->current_irql = old_irql; + ic->WriteRegisterOffset(8, old_irql); + ic->SetEOI(old_irql); +} + +void KernelState::InitKernelAuxstack(X_KTHREAD* thread) { + thread->kernel_aux_stack_base_ = + memory()->SystemHeapAlloc(kKernelAuxstackSize); + thread->kernel_aux_stack_current_ = thread->kernel_aux_stack_base_; + thread->kernel_aux_stack_limit_ = + thread->kernel_aux_stack_current_ + kKernelAuxstackSize; } } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/kernel_state.h b/src/xenia/kernel/kernel_state.h index dde6bd02b6..adabe28235 100644 --- a/src/xenia/kernel/kernel_state.h +++ b/src/xenia/kernel/kernel_state.h @@ -23,6 +23,9 @@ #include "xenia/base/mutex.h" #include "xenia/cpu/backend/backend.h" #include "xenia/cpu/export_resolver.h" +#include "xenia/kernel/kernel_guest_structures.h" +#include "xenia/kernel/util/guest_object_table.h" +#include "xenia/kernel/util/guest_timer_list.h" #include "xenia/kernel/util/kernel_fwd.h" #include "xenia/kernel/util/native_list.h" #include "xenia/kernel/util/object_table.h" @@ -33,8 +36,6 @@ #include "xenia/kernel/xevent.h" #include "xenia/memory.h" #include "xenia/vfs/virtual_file_system.h" -#include "xenia/xbox.h" - namespace xe { class ByteStream; class Emulator; @@ -48,43 +49,6 @@ namespace kernel { constexpr fourcc_t kKernelSaveSignature = make_fourcc("KRNL"); -// (?), used by KeGetCurrentProcessType -constexpr uint32_t X_PROCTYPE_IDLE = 0; -constexpr uint32_t X_PROCTYPE_TITLE = 1; -constexpr uint32_t X_PROCTYPE_SYSTEM = 2; - -struct X_KPROCESS { - X_KSPINLOCK thread_list_spinlock; - // list of threads in this process, guarded by the spinlock above - X_LIST_ENTRY thread_list; - - xe::be unk_0C; - // kernel sets this to point to a section of size 0x2F700 called CLRDATAA, - // except it clears bit 31 of the pointer. in 17559 the address is 0x801C0000, - // so it sets this ptr to 0x1C0000 - xe::be clrdataa_masked_ptr; - xe::be thread_count; - uint8_t unk_18; - uint8_t unk_19; - uint8_t unk_1A; - uint8_t unk_1B; - xe::be kernel_stack_size; - xe::be tls_static_data_address; - xe::be tls_data_size; - xe::be tls_raw_data_size; - xe::be tls_slot_size; - // ExCreateThread calls a subfunc references this field, returns - // X_STATUS_PROCESS_IS_TERMINATING if true - uint8_t is_terminating; - // one of X_PROCTYPE_ - uint8_t process_type; - xe::be bitmap[8]; - xe::be unk_50; - X_LIST_ENTRY unk_54; - xe::be unk_5C; -}; -static_assert_size(X_KPROCESS, 0x60); - struct TerminateNotification { uint32_t guest_routine; uint32_t priority; @@ -94,21 +58,13 @@ struct TerminateNotification { // a bit like the timers on KUSER_SHARED on normal win32 // https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm struct X_TIME_STAMP_BUNDLE { - uint64_t interrupt_time; + xe::be interrupt_time; // i assume system_time is in 100 ns intervals like on win32 - uint64_t system_time; - uint32_t tick_count; - uint32_t padding; + xe::be system_time; + xe::be tick_count; + xe::be padding; }; -struct X_UNKNOWN_TYPE_REFED { - xe::be field0; - xe::be field4; - // this is definitely a LIST_ENTRY? - xe::be points_to_self; // this field points to itself - xe::be - points_to_prior; // points to the previous field, which points to itself -}; -static_assert_size(X_UNKNOWN_TYPE_REFED, 16); + struct KernelGuestGlobals { X_OBJECT_TYPE ExThreadObjectType; X_OBJECT_TYPE ExEventObjectType; @@ -120,9 +76,16 @@ struct KernelGuestGlobals { X_OBJECT_TYPE IoFileObjectType; X_OBJECT_TYPE ObDirectoryObjectType; X_OBJECT_TYPE ObSymbolicLinkObjectType; + + // these are Xam object types, and not exported + X_OBJECT_TYPE XamNotifyListenerObjectType; + X_OBJECT_TYPE XamEnumeratorObjectType; + + X_DISPATCH_HEADER XamDefaultObject; + // a constant buffer that some object types' "unknown_size_or_object" field // points to - X_UNKNOWN_TYPE_REFED OddObj; + X_DISPATCH_HEADER XboxKernelDefaultObject; X_KPROCESS idle_process; // X_PROCTYPE_IDLE. runs in interrupt contexts. is // also the context the kernel starts in? X_KPROCESS title_process; // X_PROCTYPE_TITLE @@ -131,16 +94,60 @@ struct KernelGuestGlobals { // ExCreateThread and the thread flag 0x2 // locks. - X_KSPINLOCK dispatcher_lock; // called the "dispatcher lock" in nt 3.5 ppc - // .dbg file. Used basically everywhere that - // DISPATCHER_HEADER'd objects appear + alignas(128) + X_KSPINLOCK dispatcher_lock; // called the "dispatcher lock" in nt 3.5 + // ppc .dbg file. Used basically everywhere + // that DISPATCHER_HEADER'd objects appear + + X_KSPINLOCK timer_table_spinlock;//does not exist on real hw + // this lock is only used in some Ob functions. It's odd that it is used at // all, as each table already has its own spinlock. - X_KSPINLOCK ob_lock; + alignas(128) X_KSPINLOCK ob_lock; // if LLE emulating Xam, this is needed or you get an immediate freeze X_KEVENT UsbdBootEnumerationDoneEvent; + util::X_HANDLE_TABLE TitleObjectTable; + util::X_HANDLE_TABLE SystemObjectTable; + // threadids use a different table + util::X_HANDLE_TABLE TitleThreadIdTable; + util::X_HANDLE_TABLE SystemThreadIdTable; + + // util::X_TIMER_TABLE timer_table; + + // for very bad timer impl + util::X_TYPED_LIST + running_timers; + + X_TIME_STAMP_BUNDLE KeTimestampBundle; + + uint32_t guest_nullsub; + uint32_t suspendthread_apc_routine; + uint32_t extimer_dpc_routine; + uint32_t extimer_apc_kernel_routine; + XDPC graphics_interrupt_dpc; + + X_KSPINLOCK tls_lock; + + uint32_t background_processors; + + XDPC command_processor_interrupt_dpcs[6]; //one per hw thread + + X_KEVENT title_terminated_event; + xe::be VdGlobalDevice; + xe::be VdGlobalXamDevice; + xe::be VdGpuClockInMHz; + + X_RTL_CRITICAL_SECTION VdHSIOCalibrationLock; + + X_KEVENT dispatch_queue_event_; + + X_KEVENT audio_interrupt_dpc_event_; + XDPC audio_interrupt_dpc_; + }; + +struct X_KPCR_PAGE; struct DPCImpersonationScope { uint8_t previous_irql_; }; @@ -211,8 +218,8 @@ class KernelState { return kernel_guest_globals_ + offsetof(KernelGuestGlobals, idle_process); } - uint32_t AllocateTLS(); - void FreeTLS(uint32_t slot); + uint32_t AllocateTLS(cpu::ppc::PPCContext* context); + void FreeTLS(cpu::ppc::PPCContext* context, uint32_t slot); void RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority); void RemoveTitleTerminateNotification(uint32_t routine); @@ -235,6 +242,8 @@ class KernelState { void UnloadUserModule(const object_ref& module, bool call_entry = true); + void XamCall(cpu::ppc::PPCContext* context, uint16_t ordinal); + object_ref GetKernelModule(const std::string_view name); template object_ref LoadKernelModule() { @@ -263,8 +272,6 @@ class KernelState { void UnregisterNotifyListener(XNotifyListener* listener); void BroadcastNotification(XNotificationID id, uint32_t data); - util::NativeList* dpc_list() { return &dpc_list_; } - void CompleteOverlapped(uint32_t overlapped_ptr, X_RESULT result); void CompleteOverlappedEx(uint32_t overlapped_ptr, X_RESULT result, uint32_t extended_error, uint32_t length); @@ -302,22 +309,97 @@ class KernelState { XE_NOINLINE XE_COLD uint32_t CreateKeTimestampBundle(); - void UpdateKeTimestampBundle(); + void SystemClockInterrupt(); - void BeginDPCImpersonation(cpu::ppc::PPCContext* context, DPCImpersonationScope& scope); - void EndDPCImpersonation(cpu::ppc::PPCContext* context, - DPCImpersonationScope& end_scope); - - void EmulateCPInterruptDPC(uint32_t interrupt_callback,uint32_t interrupt_callback_data, uint32_t source, + void EmulateCPInterrupt(uint32_t interrupt_callback, + uint32_t interrupt_callback_data, uint32_t source, uint32_t cpu); + uint32_t LockDispatcher(cpu::ppc::PPCContext* context); + void UnlockDispatcher(cpu::ppc::PPCContext* context, uint32_t irql); + X_KSPINLOCK* GetDispatcherLock(cpu::ppc::PPCContext* context); + + void LockDispatcherAtIrql(cpu::ppc::PPCContext* context); + void UnlockDispatcherAtIrql(cpu::ppc::PPCContext* context); + uint32_t ReferenceObjectByHandle(cpu::ppc::PPCContext* context, + uint32_t handle, uint32_t guest_object_type, + uint32_t* object_out); + void DereferenceObject(cpu::ppc::PPCContext* context, uint32_t object); + + void AssertDispatcherLocked(cpu::ppc::PPCContext* context); + uint32_t AllocateInternalHandle(void* ud); + void* _FreeInternalHandle(uint32_t id); + template + T* LookupInternalHandle(uint32_t id) { + std::unique_lock lock{internal_handle_table_mutex_}; + return reinterpret_cast(internal_handles_.find(id)->second); + } + template + T* FreeInternalHandle(uint32_t id) { + return reinterpret_cast(_FreeInternalHandle(id)); + } + + uint32_t GetKernelTickCount(); + uint64_t GetKernelSystemTime(); + uint64_t GetKernelInterruptTime(); + X_KPCR_PAGE* KPCRPageForCpuNumber(uint32_t i); + X_STATUS ContextSwitch(cpu::ppc::PPCContext* context, X_KTHREAD* guest, + bool from_idle_loop = false); + // the cpu number is encoded in the pcr address + uint32_t GetPCRCpuNum(X_KPCR* pcr) { + return (memory_->HostToGuestVirtual(pcr) >> 12) & 0xF; + } + cpu::XenonInterruptController* InterruptControllerFromPCR( + cpu::ppc::PPCContext* context, X_KPCR* pcr); + void SetCurrentInterruptPriority(cpu::ppc::PPCContext* context, X_KPCR* pcr, + uint32_t priority); + static void GenericExternalInterruptEpilog(cpu::ppc::PPCContext* context, uint32_t r3); + + static void GraphicsInterruptDPC(cpu::ppc::PPCContext* context); + static void CPInterruptIPI(void* ud); + + static cpu::HWThread* HWThreadFor(cpu::ppc::PPCContext* context); + + static void TriggerTrueExternalInterrupt(cpu::ppc::PPCContext* context); + + static void AudioInterruptDPC(cpu::ppc::PPCContext* context); + static void AudioInterrupt(void* v); + void InitKernelAuxstack(X_KTHREAD* thread); private: + static void LaunchModuleInterrupt(void* ud); void LoadKernelModule(object_ref kernel_module); void InitializeProcess(X_KPROCESS* process, uint32_t type, char unk_18, char unk_19, char unk_1A); void SetProcessTLSVars(X_KPROCESS* process, int num_slots, int tls_data_size, int tls_static_data_address); - void InitializeKernelGuestGlobals(); + + void CPU0WaitForLaunch(cpu::ppc::PPCContext* context); + void BootKernel(); + void CreateDispatchThread(); + /* + initializes objects/data that is normally pre-initialized in the rdata + section of the kernel, or any other data that does not require execution on + a PPCContext to init + */ + void BootInitializeStatics(); + + static void ForwardBootInitializeCPU0InSystemThread( + cpu::ppc::PPCContext* context); + + //system thread gets created by cpu0 to perform additional init + void BootInitializeCPU0InSystemThread(cpu::ppc::PPCContext* context); + // runs on cpu0 + void BootInitializeXam(cpu::ppc::PPCContext* context); + + void BootCPU0(cpu::ppc::PPCContext* context, X_KPCR* kpcr); + void BootCPU1Through5(cpu::ppc::PPCContext* context, X_KPCR* kpcr); + + static void HWThreadBootFunction(cpu::ppc::PPCContext* context, void* ud); + + void SetupProcessorPCR(uint32_t which_processor_index); + void SetupProcessorIdleThread(uint32_t which_processor_index); + void InitProcessorStack(X_KPCR* pcr); + Emulator* emulator_; Memory* memory_; cpu::Processor* processor_; @@ -344,25 +426,28 @@ class KernelState { std::atomic dispatch_thread_running_; object_ref dispatch_thread_; - // Must be guarded by the global critical region. - util::NativeList dpc_list_; - std::condition_variable_any dispatch_cond_; - std::list> dispatch_queue_; - - BitMap tls_bitmap_; - uint32_t ke_timestamp_bundle_ptr_ = 0; - std::unique_ptr timestamp_timer_; + threading::AtomicListHeader dispatch_queue_; cpu::backend::GuestTrampolineGroup kernel_trampoline_group_; - //fixed address referenced by dashboards. Data is currently unknown + // fixed address referenced by dashboards. Data is currently unknown uint32_t strange_hardcoded_page_ = 0x8E038634 & (~0xFFFF); uint32_t strange_hardcoded_location_ = 0x8E038634; + // assign integer ids to arbitrary data, for stuffing threading::WaitHandle + // into header flink_ptr + std::unordered_map internal_handles_; + uint32_t current_internal_handle_ = 0x66180000; + xe_mutex internal_handle_table_mutex_; + static void KernelIdleProcessFunction(cpu::ppc::PPCContext* context); + static void KernelDecrementerInterrupt(void* ud); + void SetupKPCRPageForCPU(uint32_t cpunum); friend class XObject; + public: uint32_t dash_context_ = 0; std::unordered_map host_object_type_enum_to_guest_object_type_ptr_; uint32_t GetKernelGuestGlobals() const { return kernel_guest_globals_; } + KernelGuestGlobals* GetKernelGuestGlobals(cpu::ppc::PPCContext* context); }; } // namespace kernel diff --git a/src/xenia/kernel/kernel_state_boot.cc b/src/xenia/kernel/kernel_state_boot.cc new file mode 100644 index 0000000000..b4918c4afd --- /dev/null +++ b/src/xenia/kernel/kernel_state_boot.cc @@ -0,0 +1,799 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/kernel/kernel_state.h" + +#include + +#include "third_party/fmt/include/fmt/format.h" +#include "xenia/apu/audio_system.h" +#include "xenia/base/assert.h" +#include "xenia/base/byte_stream.h" +#include "xenia/base/logging.h" +#include "xenia/base/string.h" +#include "xenia/cpu/processor.h" +#include "xenia/emulator.h" +#include "xenia/gpu/graphics_system.h" +#include "xenia/hid/input_system.h" +#include "xenia/kernel/user_module.h" +#include "xenia/kernel/util/shim_utils.h" +#include "xenia/kernel/xam/xam_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +#include "xenia/kernel/xevent.h" +#include "xenia/kernel/xmodule.h" +#include "xenia/kernel/xnotifylistener.h" +#include "xenia/kernel/xobject.h" +#include "xenia/kernel/xthread.h" +namespace xe { +namespace kernel { + +void KernelState::InitializeProcess(X_KPROCESS* process, uint32_t type, + char unk_18, char unk_19, char unk_1A) { + uint32_t guest_kprocess = memory()->HostToGuestVirtual(process); + + uint32_t thread_list_guest_ptr = + guest_kprocess + offsetof(X_KPROCESS, thread_list); + + process->unk_18 = unk_18; + process->unk_19 = unk_19; + process->unk_1A = unk_1A; + util::XeInitializeListHead(&process->thread_list, thread_list_guest_ptr); + process->quantum = 60; + // doubt any guest code uses this ptr, which i think probably has something to + // do with the page table + process->clrdataa_masked_ptr = 0; + // clrdataa_ & ~(1U << 31); + process->thread_count = 0; + // process->unk_1B = 0x06; + process->kernel_stack_size = 16 * 1024; + process->tls_slot_size = 0x80; + + process->process_type = type; + uint32_t unk_list_guest_ptr = guest_kprocess + offsetof(X_KPROCESS, unk_54); + // TODO(benvanik): figure out what this list is. + util::XeInitializeListHead(&process->unk_54, unk_list_guest_ptr); +} + +void KernelState::SetProcessTLSVars(X_KPROCESS* process, int num_slots, + int tls_data_size, + int tls_static_data_address) { + uint32_t slots_padded = (num_slots + 3) & 0xFFFFFFFC; + process->tls_data_size = tls_data_size; + process->tls_raw_data_size = tls_data_size; + process->tls_static_data_address = tls_static_data_address; + process->tls_slot_size = 4 * slots_padded; + uint32_t count_div32 = slots_padded / 32; + for (unsigned word_index = 0; word_index < count_div32; ++word_index) { + process->tls_slot_bitmap[word_index] = -1; + } + + // set remainder of bitset + if (((num_slots + 3) & 0x1C) != 0) + process->tls_slot_bitmap[count_div32] = static_cast(-1) + << (32 - ((num_slots + 3) & 0x1C)); +} +void AllocateThread(PPCContext* context) { + uint32_t thread_mem_size = static_cast(context->r[3]); + uint32_t a2 = static_cast(context->r[4]); + uint32_t a3 = static_cast(context->r[5]); + if (thread_mem_size <= 0xFD8) { + thread_mem_size += 8; + } + uint32_t result = + xboxkrnl::xeAllocatePoolTypeWithTag(context, thread_mem_size, a2, a3); + if ((result & 0xFFF) != 0) { + result += 2; + } + + context->r[3] = static_cast(result); +} +void FreeThread(PPCContext* context) { + uint32_t thread_memory = static_cast(context->r[3]); + if ((thread_memory & 0xFFF) != 0) { + thread_memory -= 8; + } + xboxkrnl::xeFreePool(context, thread_memory); +} + +void SimpleForwardAllocatePoolTypeWithTag(PPCContext* context) { + uint32_t size = static_cast(context->r[3]); + uint32_t tag = static_cast(context->r[4]); + uint32_t pool_selector = static_cast(context->r[5]); + context->r[3] = static_cast( + xboxkrnl::xeAllocatePoolTypeWithTag(context, size, tag, pool_selector)); +} +void SimpleForwardFreePool(PPCContext* context) { + xboxkrnl::xeFreePool(context, static_cast(context->r[3])); +} + +void DeleteMutant(PPCContext* context) { + xboxkrnl::xeKeReleaseMutant( + context, context->TranslateVirtualGPR(context->r[3]), 1, true, + 0); +} +void DeleteTimer(PPCContext* context) { + xboxkrnl::xeKeCancelTimer( + context, context->TranslateVirtualGPR(context->r[3])); +} + +void DeleteIoCompletion(PPCContext* context) {} + +void UnknownProcIoDevice(PPCContext* context) {} + +void CloseFileProc(PPCContext* context) {} + +void DeleteFileProc(PPCContext* context) {} + +void UnknownFileProc(PPCContext* context) {} + +void DeleteSymlink(PPCContext* context) { + X_KSYMLINK* lnk = context->TranslateVirtualGPR(context->r[3]); + + context->r[3] = lnk->refed_object_maybe; + xboxkrnl::xeObDereferenceObject(context, lnk->refed_object_maybe); +} + +static void InitializeHandleTable(util::X_HANDLE_TABLE* result, + unsigned char poolarg, + unsigned char handle_high_byte, + unsigned char unk_36) { + result->unk_pool_arg_34 = poolarg; + result->handle_high_byte = handle_high_byte; + result->unk_36 = unk_36; + result->num_handles = 0; + result->free_offset = 0; + result->highest_allocated_offset = 0; + result->table_dynamic_buckets = 0; +} + +static void GuestClockInterruptForwarder(void* ud) { + reinterpret_cast(ud)->SystemClockInterrupt(); +} +// called by HWClock on hw clock thread. sends an interrupt to guest cpu 0 to +// run the kernels clock interrupt function +static void HWClockCallback(cpu::Processor* processor) { + for (unsigned thrd = 0; thrd < 6; ++thrd) { + auto thrd0 = processor->GetCPUThread(thrd); + cpu::SendInterruptArguments interrupt_arguments; + interrupt_arguments.ipi_func = GuestClockInterruptForwarder; + interrupt_arguments.ud = kernel_state(); + interrupt_arguments.wait_done = false; + interrupt_arguments.irql_ = IRQL_CLOCK; + + while (!thrd0->SendGuestIPI(interrupt_arguments) ) { + } + } +} +static void DefaultInterruptProc(PPCContext* context) {} + +static void IPIInterruptProc(PPCContext* context) {} + +// ues _KTHREAD list_entry field at 0x94 +// this dpc uses none of the routine args +static void DestroyThreadDpc(PPCContext* context) { + ShiftedPointer v10 = nullptr; + + context->kernel_state->LockDispatcherAtIrql(context); + auto v6 = &GetKPCR(context)->prcb_data.terminating_threads_list; + while (1) { + v10 = context->TranslateVirtual(v6->flink_ptr); + if (v10.m_base == v6) { + break; + } + auto v7 = ADJ(v10)->ready_prcb_entry.flink_ptr; + auto thrd = ADJ(v10); + auto v9 = ADJ(v10)->ready_prcb_entry.blink_ptr; + v9->flink_ptr = v7; + v7->blink_ptr = v9; + --context->TranslateVirtual(thrd->process)->thread_count; + context->kernel_state->UnlockDispatcherAtIrql(context); + context->kernel_state->object_table()->RemoveHandle(thrd->thread_id); + + if (!thrd->unk_CB) { + xboxkrnl::xeMmDeleteKernelStack(thrd->stack_alloc_base, + thrd->stack_limit); + context->kernel_state->memory()->SystemHeapFree( + thrd->kernel_aux_stack_base_); + thrd->kernel_aux_stack_base_ = 0U; + } else { + xenia_assert(false); + } + // todo: this needs to be kept uncommented for now, until object rework + // xboxkrnl::xeObDereferenceObject(context, + // context->HostToGuestVirtual(thrd)); + xboxkrnl::xeObDereferenceObject(context, thrd); + context->kernel_state->LockDispatcherAtIrql(context); + } + auto kgb = context->kernel_state->GetKernelGuestGlobals(context); + auto title_process = &kgb->title_process; + + if (title_process->is_terminating) { + if (!title_process->thread_count) { + auto term_event = &kgb->title_terminated_event; + + term_event->header.signal_state = 1; + + if (!util::XeIsListEmpty(&term_event->header.wait_list, context)) { + xboxkrnl::xeDispatchSignalStateChange(context, &term_event->header, 1); + } + } + } + xboxkrnl::xeDispatcherSpinlockUnlock(context, &kgb->dispatcher_lock, + IRQL_DISPATCH); +} + +static void ThreadSwitchHelper(PPCContext* context, X_KPROCESS* process) { + xboxkrnl::xeKeKfAcquireSpinLock(context, &process->thread_list_spinlock, + false); + + context->kernel_state->LockDispatcherAtIrql(context); + + auto v3 = &GetKPCR(context)->prcb_data; + for (auto&& i : process->thread_list.IterateForward(context)) { + if (i.a_prcb_ptr.xlat() == v3 && i.another_prcb_ptr.xlat() != v3 && + i.thread_state != KTHREAD_STATE_UNKNOWN) { + xboxkrnl::xeHandleReadyThreadOnDifferentProcessor(context, &i); + } + } + context->kernel_state->UnlockDispatcherAtIrql(context); + + xboxkrnl::xeKeKfReleaseSpinLock(context, &process->thread_list_spinlock, 0, + false); +} + +static void ThreadSwitchRelatedDpc(PPCContext* context) { + // iterates over threads in the game process + threads in the system process + auto kgg = context->kernel_state->GetKernelGuestGlobals(context); + + ThreadSwitchHelper(context, &kgg->title_process); + ThreadSwitchHelper(context, &kgg->system_process); +} + +void KernelState::InitProcessorStack(X_KPCR* pcr) { + pcr->unk_stack_5c = xboxkrnl::xeMmCreateKernelStack(0x4000, 2); + uint32_t other_stack = xboxkrnl::xeMmCreateKernelStack(0x4000, 2); + pcr->stack_base_ptr = other_stack; + pcr->alt_stack_base_ptr = other_stack; + pcr->use_alternative_stack = other_stack; + pcr->stack_end_ptr = other_stack - 0x4000; + pcr->alt_stack_end_ptr = other_stack - 0x4000; +} + +void KernelState::SetupProcessorPCR(uint32_t which_processor_index) { + XELOGD("Setting up processor {} pcr", which_processor_index); + X_KPCR_PAGE* page_for = this->KPCRPageForCpuNumber(which_processor_index); + memset(page_for, 0, 4096); + + auto pcr = &page_for->pcr; + pcr->prcb_data.current_cpu = static_cast(which_processor_index); + pcr->prcb_data.processor_mask = 1U << which_processor_index; + pcr->prcb = memory()->HostToGuestVirtual(&pcr->prcb_data); + + XeInitializeListHead(&pcr->prcb_data.queued_dpcs_list_head, memory()); + for (uint32_t i = 0; i < 32; ++i) { + util::XeInitializeListHead(&pcr->prcb_data.ready_threads_by_priority[i], + memory()); + } + pcr->prcb_data.unk_mask_64 = 0xF6DBFC03; + pcr->prcb_data.thread_exit_dpc.Initialize( + kernel_trampoline_group_.NewLongtermTrampoline(DestroyThreadDpc), 0); + // remember, DPC cpu indices start at 1 + pcr->prcb_data.thread_exit_dpc.desired_cpu_number = which_processor_index + 1; + util::XeInitializeListHead(&pcr->prcb_data.terminating_threads_list, + memory()); + + pcr->prcb_data.switch_thread_processor_dpc.Initialize( + kernel_trampoline_group_.NewLongtermTrampoline(ThreadSwitchRelatedDpc), + 0); + + pcr->prcb_data.switch_thread_processor_dpc.desired_cpu_number = + which_processor_index + 1; + + // this cpu needs special handling, its initializing the kernel + // InitProcessorStack gets called for it later, after all kernel init + if (which_processor_index == 0) { + uint32_t protdata = processor()->GetPCRForCPU(0); + uint32_t protdata_stackbase = processor()->GetPCRForCPU(0) + 0x7000; + + pcr->stack_base_ptr = protdata_stackbase; + pcr->alt_stack_base_ptr = protdata_stackbase; + pcr->use_alternative_stack = protdata_stackbase; + // it looks like it actually sets it to pcr3?? that seems wrong + // probably a hexrays/ida bug or even a kernel bug + + // we are only giving it a page of stack though + pcr->alt_stack_end_ptr = protdata + 0x6000; + pcr->stack_end_ptr = protdata + 0x6000; + } else { + this->InitProcessorStack(pcr); + } + uint32_t default_interrupt = + kernel_trampoline_group_.NewLongtermTrampoline(DefaultInterruptProc); + for (uint32_t i = 0; i < 32; ++i) { + pcr->interrupt_handlers[i] = default_interrupt; + } + + // todo: missing some interrupts here + + pcr->interrupt_handlers[0x1E] = + kernel_trampoline_group_.NewLongtermTrampoline(IPIInterruptProc); + + pcr->current_irql = IRQL_PASSIVE; + pcr->thread_fpu_related = -1; + pcr->msr_mask = -1; + pcr->thread_vmx_related = -1; +} +// need to implement "initialize thread" function! +// this gets called after initial pcr +void KernelState::SetupProcessorIdleThread(uint32_t which_processor_index) { + XELOGD("Setting up processor {} idle thread", which_processor_index); + X_KPCR_PAGE* page_for = this->KPCRPageForCpuNumber(which_processor_index); + X_KTHREAD* thread = &page_for->idle_process_thread; + thread->thread_state = KTHREAD_STATE_RUNNING; + + thread->priority = 31; + thread->wait_irql = IRQL_DISPATCH; + thread->may_queue_apcs = true; + + auto prcb_guest = memory()->HostToGuestVirtual(&page_for->pcr.prcb_data); + thread->a_prcb_ptr = prcb_guest; + thread->another_prcb_ptr = prcb_guest; + thread->current_cpu = page_for->pcr.prcb_data.current_cpu; + auto idle_process_ptr = GetIdleProcess(); + thread->process = idle_process_ptr; + auto guest_thread = memory()->HostToGuestVirtual(thread); + page_for->pcr.prcb_data.current_thread = guest_thread; + page_for->pcr.prcb_data.idle_thread = guest_thread; + + auto process = memory()->TranslateVirtual(idle_process_ptr); + // priority related values + thread->unk_C8 = process->unk_18; + auto v19 = process->unk_19; + thread->unk_C9 = v19; + auto v20 = process->unk_1A; + thread->unk_B9 = v19; + thread->unk_CA = v20; + // timeslice related + thread->quantum = process->quantum; + thread->msr_mask = 0xFDFFD7FF; + InitKernelAuxstack(thread); +} + +void KernelState::SetupKPCRPageForCPU(uint32_t cpunum) { + XELOGD("SetupKPCRPageForCpu - cpu {}", cpunum); + SetupProcessorPCR(cpunum); + SetupProcessorIdleThread(cpunum); +} + +static void KernelNullsub(PPCContext* context) {} +void KernelState::BootInitializeStatics() { + XELOGD("Initializing kernel statics"); + kernel_guest_globals_ = memory_->SystemHeapAlloc(sizeof(KernelGuestGlobals)); + + KernelGuestGlobals* block = + memory_->TranslateVirtual(kernel_guest_globals_); + memset(block, 0, sizeof(block)); + + block->background_processors = 0x3C; + + auto idle_process = memory()->TranslateVirtual(GetIdleProcess()); + InitializeProcess(idle_process, X_PROCTYPE_IDLE, 0, 0, 0); + idle_process->quantum = 0x7F; + auto system_process = + memory()->TranslateVirtual(GetSystemProcess()); + InitializeProcess(system_process, X_PROCTYPE_SYSTEM, 2, 5, 9); + SetProcessTLSVars(system_process, 32, 0, 0); + + InitializeHandleTable(&block->TitleObjectTable, X_PROCTYPE_TITLE, 0xF8, 0); + InitializeHandleTable(&block->TitleThreadIdTable, X_PROCTYPE_TITLE, 0xF9, 0); + + // i cant find where these get initialized on 17559, but we already know what + // to fill in here + InitializeHandleTable(&block->SystemObjectTable, X_PROCTYPE_SYSTEM, 0xFA, 0); + InitializeHandleTable(&block->SystemThreadIdTable, X_PROCTYPE_SYSTEM, 0xFB, + 0); + + block->running_timers.Initialize(memory()); + uint32_t oddobject_offset = + kernel_guest_globals_ + + offsetof(KernelGuestGlobals, XboxKernelDefaultObject); + + // init unknown object + + block->XboxKernelDefaultObject.type = DISPATCHER_AUTO_RESET_EVENT; + + block->XboxKernelDefaultObject.signal_state = 1; + util::XeInitializeListHead( + &block->XboxKernelDefaultObject.wait_list, + oddobject_offset + offsetof32(X_DISPATCH_HEADER, wait_list)); + + // several object types just call freepool/allocatepool + uint32_t trampoline_allocatepool = + kernel_trampoline_group_.NewLongtermTrampoline( + SimpleForwardAllocatePoolTypeWithTag); + uint32_t trampoline_freepool = + kernel_trampoline_group_.NewLongtermTrampoline(SimpleForwardFreePool); + // init thread object + block->ExThreadObjectType.pool_tag = 0x65726854; + block->ExThreadObjectType.allocate_proc = + trampoline_allocatepool; // kernel_trampoline_group_.NewLongtermTrampoline(AllocateThread); + + block->ExThreadObjectType.free_proc = trampoline_freepool; + // kernel_trampoline_group_.NewLongtermTrampoline(FreeThread); + + // init event object + block->ExEventObjectType.pool_tag = 0x76657645; + block->ExEventObjectType.allocate_proc = trampoline_allocatepool; + block->ExEventObjectType.free_proc = trampoline_freepool; + + // init mutant object + block->ExMutantObjectType.pool_tag = 0x6174754D; + block->ExMutantObjectType.allocate_proc = trampoline_allocatepool; + block->ExMutantObjectType.free_proc = trampoline_freepool; + + block->ExMutantObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline(DeleteMutant); + // init semaphore obj + block->ExSemaphoreObjectType.pool_tag = 0x616D6553; + block->ExSemaphoreObjectType.allocate_proc = trampoline_allocatepool; + block->ExSemaphoreObjectType.free_proc = trampoline_freepool; + // init timer obj + block->ExTimerObjectType.pool_tag = 0x656D6954; + block->ExTimerObjectType.allocate_proc = trampoline_allocatepool; + block->ExTimerObjectType.free_proc = trampoline_freepool; + block->ExTimerObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline(DeleteTimer); + // iocompletion object + block->IoCompletionObjectType.pool_tag = 0x706D6F43; + block->IoCompletionObjectType.allocate_proc = trampoline_allocatepool; + block->IoCompletionObjectType.free_proc = trampoline_freepool; + block->IoCompletionObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline(DeleteIoCompletion); + block->IoCompletionObjectType.unknown_size_or_object_ = oddobject_offset; + + // iodevice object + block->IoDeviceObjectType.pool_tag = 0x69766544; + block->IoDeviceObjectType.allocate_proc = trampoline_allocatepool; + block->IoDeviceObjectType.free_proc = trampoline_freepool; + block->IoDeviceObjectType.unknown_size_or_object_ = oddobject_offset; + block->IoDeviceObjectType.unknown_proc = + kernel_trampoline_group_.NewLongtermTrampoline(UnknownProcIoDevice); + + // file object + block->IoFileObjectType.pool_tag = 0x656C6946; + block->IoFileObjectType.allocate_proc = trampoline_allocatepool; + block->IoFileObjectType.free_proc = trampoline_freepool; + block->IoFileObjectType.unknown_size_or_object_ = + 0x38; // sizeof fileobject, i believe + block->IoFileObjectType.close_proc = + kernel_trampoline_group_.NewLongtermTrampoline(CloseFileProc); + block->IoFileObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline(DeleteFileProc); + block->IoFileObjectType.unknown_proc = + kernel_trampoline_group_.NewLongtermTrampoline(UnknownFileProc); + + // directory object + block->ObDirectoryObjectType.pool_tag = 0x65726944; + block->ObDirectoryObjectType.allocate_proc = trampoline_allocatepool; + block->ObDirectoryObjectType.free_proc = trampoline_freepool; + block->ObDirectoryObjectType.unknown_size_or_object_ = oddobject_offset; + + // symlink object + block->ObSymbolicLinkObjectType.pool_tag = 0x626D7953; + block->ObSymbolicLinkObjectType.allocate_proc = trampoline_allocatepool; + block->ObSymbolicLinkObjectType.free_proc = trampoline_freepool; + block->ObSymbolicLinkObjectType.unknown_size_or_object_ = oddobject_offset; + block->ObSymbolicLinkObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline(DeleteSymlink); + + host_object_type_enum_to_guest_object_type_ptr_ = { + {XObject::Type::Event, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, ExEventObjectType)}, + {XObject::Type::Semaphore, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, ExSemaphoreObjectType)}, + {XObject::Type::Thread, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, ExThreadObjectType)}, + {XObject::Type::File, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, IoFileObjectType)}, + {XObject::Type::Mutant, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, ExMutantObjectType)}, + {XObject::Type::Device, + kernel_guest_globals_ + + offsetof32(KernelGuestGlobals, IoDeviceObjectType)}}; + + block->guest_nullsub = + kernel_trampoline_group_.NewLongtermTrampoline(KernelNullsub); + block->suspendthread_apc_routine = + kernel_trampoline_group_.NewLongtermTrampoline( + xboxkrnl::xeSuspendThreadApcRoutine); + block->extimer_dpc_routine = kernel_trampoline_group_.NewLongtermTrampoline( + xboxkrnl::xeEXTimerDPCRoutine); + + block->extimer_apc_kernel_routine = + kernel_trampoline_group_.NewLongtermTrampoline( + xboxkrnl::xeEXTimerAPCKernelRoutine); + + block->graphics_interrupt_dpc.Initialize( + kernel_trampoline_group_.NewLongtermTrampoline( + &KernelState::GraphicsInterruptDPC), + 0); + // cpu2,remember all dpc cpu numbers are +1, because 0 means "any cpu" + block->graphics_interrupt_dpc.desired_cpu_number = 3; + + uint32_t dpc_routine_for_cp = block->graphics_interrupt_dpc.routine; + for (unsigned i = 0; i < 6; ++i) { + block->command_processor_interrupt_dpcs[i].Initialize(dpc_routine_for_cp, + 0); + block->command_processor_interrupt_dpcs[i].desired_cpu_number = i + 1; + } + block->VdGlobalDevice = 0; + block->VdGlobalXamDevice = 0; + block->VdGpuClockInMHz = 500; + block->VdHSIOCalibrationLock.header.type = DISPATCHER_AUTO_RESET_EVENT; + block->VdHSIOCalibrationLock.header.absolute = 4; + util::XeInitializeListHead(&block->VdHSIOCalibrationLock.header.wait_list, + memory_); + + block->VdHSIOCalibrationLock.lock_count = ~0u; + + block->audio_interrupt_dpc_.Initialize( + kernel_trampoline_group_.NewLongtermTrampoline( + &KernelState::AudioInterruptDPC), + 0U); + block->audio_interrupt_dpc_event_.header.type = DISPATCHER_AUTO_RESET_EVENT; + util::XeInitializeListHead( + &block->audio_interrupt_dpc_event_.header.wait_list, memory_); +} +static void SetupIdleThreadPriority(cpu::ppc::PPCContext* context, + X_KPCR* kpcr) { + xboxkrnl::xeKeSetPriorityThread(context, context->TranslateVirtual(kpcr->prcb_data.idle_thread), + 0); + kpcr->prcb_data.idle_thread->priority = 18; + if (!kpcr->prcb_data.next_thread) { + kpcr->prcb_data.running_idle_thread.m_ptr = 1; + } +} +void KernelState::ForwardBootInitializeCPU0InSystemThread( + cpu::ppc::PPCContext* context) { + context->kernel_state->BootInitializeCPU0InSystemThread(context); +} + +void KernelState::BootCPU0(cpu::ppc::PPCContext* context, X_KPCR* kpcr) { + KernelGuestGlobals* block = + memory_->TranslateVirtual(kernel_guest_globals_); + + util::XeInitializeListHead( + &block->UsbdBootEnumerationDoneEvent.header.wait_list, context); + xboxkrnl::xeKeSetEvent(context, &block->UsbdBootEnumerationDoneEvent, 1, 0); + + block->title_terminated_event.header.type = DISPATCHER_AUTO_RESET_EVENT; + util::XeInitializeListHead(&block->title_terminated_event.header.wait_list, + context); + + xe::be handle_ptr; + + X_STATUS create_res = xboxkrnl::ExCreateThread( + &handle_ptr, 0x8000u, nullptr, 0, + kernel_trampoline_group_.NewLongtermTrampoline( + &KernelState::ForwardBootInitializeCPU0InSystemThread), + 0, 0x422); + xenia_assert(create_res == 0); + xboxkrnl::NtClose(handle_ptr); + // this is deliberate, does not change the interrupt priority! + kpcr->current_irql = IRQL_DISPATCH; + SetupIdleThreadPriority(context, kpcr); +} + +static void XamNotifyListenerDeleteProc(PPCContext* context) { + uint32_t a1 = static_cast(context->r[3]); + if (a1) { + uint32_t deref1 = *context->TranslateVirtualBE(a1); + uint32_t deref2 = *context->TranslateVirtualBE(deref1); + context->processor->ExecuteRaw(context->thread_state(), deref2); + return; + } +} + +void KernelState::BootInitializeXam(cpu::ppc::PPCContext* context) { + XELOGD("BootInitializeXam"); + auto globals = context->kernel_state->GetKernelGuestGlobals(context); + + uint32_t trampoline_allocatepool = + kernel_trampoline_group_.NewLongtermTrampoline( + SimpleForwardAllocatePoolTypeWithTag); + uint32_t trampoline_freepool = + kernel_trampoline_group_.NewLongtermTrampoline(SimpleForwardFreePool); + + globals->XboxKernelDefaultObject.type = DISPATCHER_AUTO_RESET_EVENT; + globals->XboxKernelDefaultObject.signal_state = 1; + + util::XeInitializeListHead(&globals->XboxKernelDefaultObject.wait_list, + context); + + globals->XamNotifyListenerObjectType.allocate_proc = trampoline_allocatepool; + globals->XamNotifyListenerObjectType.free_proc = trampoline_freepool; + globals->XamNotifyListenerObjectType.delete_proc = + kernel_trampoline_group_.NewLongtermTrampoline( + XamNotifyListenerDeleteProc); + globals->XamNotifyListenerObjectType.unknown_size_or_object_ = 0xC; + globals->XamNotifyListenerObjectType.pool_tag = 0x66746F4E; + + globals->XamEnumeratorObjectType.allocate_proc = trampoline_allocatepool; + globals->XamEnumeratorObjectType.free_proc = trampoline_freepool; + globals->XamEnumeratorObjectType.unknown_size_or_object_ = + context->HostToGuestVirtual(&globals->XamDefaultObject); + globals->dispatch_queue_event_.header.type = DISPATCHER_AUTO_RESET_EVENT; + util::XeInitializeListHead(&globals->dispatch_queue_event_.header.wait_list, + context); + + // todo: Enumerator! +} + +void KernelState::BootCPU1Through5(cpu::ppc::PPCContext* context, + X_KPCR* kpcr) { + // todo: sets priority here! need to fill that in + + xboxkrnl::xeKfLowerIrql(context, IRQL_DISPATCH); + SetupIdleThreadPriority(context, kpcr); +} + +void ClockInterruptEnqueueProc(cpu::XenonInterruptController* controller, + uint32_t slot, void* ud) { + // immediately reschedule ourselves to keep drift at a minimum + + cpu::CpuTimedInterrupt reschedule_args{}; + reschedule_args.destination_microseconds_ = + controller->GetSlotUsTimestamp(slot) + 1000ULL; + reschedule_args.ud_ = ud; + reschedule_args.enqueue_ = ClockInterruptEnqueueProc; + controller->SetTimedInterruptArgs(slot, &reschedule_args); + + auto thiz = reinterpret_cast(ud); + + cpu::SendInterruptArguments interrupt_arguments; + interrupt_arguments.ipi_func = GuestClockInterruptForwarder; + interrupt_arguments.ud = kernel_state(); + interrupt_arguments.irql_ = IRQL_CLOCK; + interrupt_arguments.wait_done = false; + thiz->SendGuestIPI(interrupt_arguments); + + // don't free our slot, we repeat forever +} + +void KernelState::HWThreadBootFunction(cpu::ppc::PPCContext* context, + void* ud) { + KernelState* ks = reinterpret_cast(ud); + context->kernel_state = ks; + + /* + todo: the hypervisor or bootloader does some initialization before this + point + + */ + + auto kpcr = GetKPCR(context); + auto cpunum = ks->GetPCRCpuNum(kpcr); + auto hwthread = context->processor->GetCPUThread(cpunum); + auto interrupt_controller = hwthread->interrupt_controller(); + kpcr->emulated_interrupt = reinterpret_cast(interrupt_controller); + + kpcr->prcb_data.current_cpu = cpunum; + kpcr->prcb_data.processor_mask = 1U << cpunum; + + if (cpunum == 0) { + ks->InitProcessorStack(kpcr); + ks->BootCPU0(context, kpcr); + } else { + ks->BootCPU1Through5(context, kpcr); + } + // todo: all cpus won't be executing this at exactly the same time, so they'll + // all be a bit off, but that may not matter much +#if XE_USE_TIMED_INTERRUPTS_FOR_CLOCK == 1 + cpu::CpuTimedInterrupt clock_cti; + clock_cti.destination_microseconds_ = + interrupt_controller->CreateRelativeUsTimestamp(1000ULL); // one millisecond + + clock_cti.ud_ = reinterpret_cast(hwthread); + clock_cti.enqueue_ = ClockInterruptEnqueueProc; + + // this slot stays allocated forever + uint32_t clock_slot = interrupt_controller->AllocateTimedInterruptSlot(); + interrupt_controller->SetTimedInterruptArgs(clock_slot, &clock_cti); + interrupt_controller->RecomputeNextEventCycles(); +#endif + + if (cpunum == 2) { + auto graphics_system = ks->emulator()->graphics_system(); + graphics_system->SetKernelState(context->kernel_state); + graphics_system->SetupVsync(); + } +} +void KernelState::BootKernel() { + XELOGD("Booting kernel"); + BootInitializeStatics(); + + // initialize the idle process' thread list prior to startup for convenience + + auto idle_process_ptr = GetIdleProcess(); + auto idle_process = memory()->TranslateVirtual(idle_process_ptr); + idle_process->thread_count = 6; + + for (unsigned i = 0; i < 6; ++i) { + auto cpu_thread = processor()->GetCPUThread(i); + cpu_thread->SetIdleProcessFunction(&KernelState::KernelIdleProcessFunction); + cpu_thread->SetBootFunction(&KernelState::HWThreadBootFunction, this); + cpu_thread->SetDecrementerInterruptCallback( + &KernelState::KernelDecrementerInterrupt, nullptr); + // dont need the thread list lock, because no guest code is running atm + util::XeInsertTailList( + &idle_process->thread_list, + &this->KPCRPageForCpuNumber(i)->idle_process_thread.process_threads, + memory()); + } + SetupKPCRPageForCPU(0); + // cpu 0 boots all other cpus + processor()->GetCPUThread(0)->Boot(); + + while (!processor()->AllHWThreadsBooted()) { + threading::NanoSleep(10000); // 10 microseconds + } + XELOGD("All processor HW threads have booted up"); + + processor()->GetHWClock()->SetInterruptCallback(HWClockCallback); + + auto bundle = + memory()->TranslateVirtual(GetKeTimestampBundle()); + uint32_t initial_ms = static_cast(Clock::QueryGuestTickCount()); + uint64_t initial_systemtime = Clock::QueryGuestSystemTime(); + + bundle->interrupt_time = initial_systemtime; + bundle->system_time = initial_systemtime; + bundle->tick_count = initial_ms; +#if XE_USE_TIMED_INTERRUPTS_FOR_CLOCK == 0 + processor()->GetHWClock()->Start(); +#endif +} + +void KernelState::BootInitializeCPU0InSystemThread( + cpu::ppc::PPCContext* context) { + for (unsigned i = 1; i < 6; ++i) { + SetupKPCRPageForCPU(i); + } + + xboxkrnl::xeKfLowerIrql(context, IRQL_APC); + for (unsigned i = 1; i < 6; ++i) { + auto cpu_thread = processor()->GetCPUThread(i); + uint64_t mftb_time = processor()->GetCPUThread(0)->mftb(); + uint64_t systemtime = Clock::QueryHostSystemTime(); + cpu_thread->SetCycleSync(mftb_time, systemtime); + + cpu_thread->Boot(); + } + emulator()->audio_system()->StartGuestWorkerThread(this); + BootInitializeXam(context); + uint32_t prev_affinity; + xboxkrnl::xeKeSetAffinityThread(context, GetKThread(context), 4, + &prev_affinity); + xenia_assert(prev_affinity == 1); + xenia_assert(context->kernel_state->GetPCRCpuNum(GetKPCR(context)) == 2); + + xboxkrnl::xeKeSetPriorityThread(context, GetKThread(context), 20); + CPU0WaitForLaunch(context); +} + +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/kernel_state_debug.cc b/src/xenia/kernel/kernel_state_debug.cc new file mode 100644 index 0000000000..f94131b694 --- /dev/null +++ b/src/xenia/kernel/kernel_state_debug.cc @@ -0,0 +1,99 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/kernel/kernel_state.h" + +#include + +#include "third_party/fmt/include/fmt/format.h" +#include "xenia/base/assert.h" +#include "xenia/base/byte_stream.h" +#include "xenia/base/logging.h" +#include "xenia/base/string.h" +#include "xenia/cpu/processor.h" +#include "xenia/emulator.h" +#include "xenia/hid/input_system.h" +#include "xenia/kernel/user_module.h" +#include "xenia/kernel/util/shim_utils.h" +#include "xenia/kernel/xam/xam_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +#include "xenia/kernel/xevent.h" +#include "xenia/kernel/xmodule.h" +#include "xenia/kernel/xnotifylistener.h" +#include "xenia/kernel/xobject.h" +#include "xenia/kernel/xthread.h" +namespace xe { +namespace kernel { + +struct KernelDebugStringBuffer : public StringBuffer { + KernelState* const kernel_state_; + static const char* ProcessTypeToString(uint8_t proctype); + KernelDebugStringBuffer(KernelState* kernel_state) + : kernel_state_(kernel_state) { + this->Reserve(65536); + } +}; + +static void DumpSpinlock(KernelState* ks, X_KSPINLOCK* lock, + KernelDebugStringBuffer* sbuffer) { + uint32_t held_by_pcr = lock->pcr_of_owner; + + sbuffer->AppendFormat("(Owner = CPU {})", (held_by_pcr >> 12) & 0xF); +} + +const char* KernelDebugStringBuffer::ProcessTypeToString(uint8_t proctype) { + switch (proctype) { + case X_PROCTYPE_IDLE: + return "idle"; + case X_PROCTYPE_SYSTEM: + return "system"; + case X_PROCTYPE_TITLE: + return "title"; + default: + xenia_assert(false); + return "unknown"; + } +} + +static void DumpProcess(KernelState* ks, X_KPROCESS* process, + KernelDebugStringBuffer* sbuffer) { + sbuffer->Append("thread_list_spinlock = "); + DumpSpinlock(ks, &process->thread_list_spinlock, sbuffer); +#define SIMPF(field_name) \ + sbuffer->AppendFormat("\n" #field_name " = {:X}", \ + static_cast(process->field_name)) + + SIMPF(quantum); + SIMPF(clrdataa_masked_ptr); + SIMPF(thread_count); + SIMPF(unk_18); + SIMPF(unk_19); + SIMPF(unk_1A); + SIMPF(unk_1B); + + SIMPF(kernel_stack_size); + SIMPF(tls_static_data_address); + SIMPF(tls_data_size); + + SIMPF(tls_raw_data_size); + SIMPF(tls_slot_size); + SIMPF(is_terminating); + SIMPF(process_type); + sbuffer->AppendFormat("\nprocess_type = {}", + KernelDebugStringBuffer::ProcessTypeToString(process->process_type)); +} + +static void DumpThread(KernelState* ks, X_KTHREAD* kthread, + KernelDebugStringBuffer* sbuffer) {} + +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/util/guest_object_table.cc b/src/xenia/kernel/util/guest_object_table.cc index 843643e855..d8eeb2a184 100644 --- a/src/xenia/kernel/util/guest_object_table.cc +++ b/src/xenia/kernel/util/guest_object_table.cc @@ -6,7 +6,7 @@ * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ - +#include "xenia/cpu/ppc/ppc_context.h" #include "xenia/kernel/util/guest_object_table.h" #include "xenia/base/atomic.h" #include "xenia/cpu/processor.h" diff --git a/src/xenia/kernel/util/guest_object_table.h b/src/xenia/kernel/util/guest_object_table.h index c2995ef4ce..2dbd6e07ac 100644 --- a/src/xenia/kernel/util/guest_object_table.h +++ b/src/xenia/kernel/util/guest_object_table.h @@ -10,8 +10,7 @@ #ifndef XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_ #define XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_ -#include "xenia/kernel/kernel_state.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { namespace util { diff --git a/src/xenia/kernel/util/guest_timer_list.cc b/src/xenia/kernel/util/guest_timer_list.cc new file mode 100644 index 0000000000..b19e89f5cb --- /dev/null +++ b/src/xenia/kernel/util/guest_timer_list.cc @@ -0,0 +1,42 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ +#include "xenia/kernel/util/guest_timer_list.h" +#include "xenia/base/atomic.h" +#include "xenia/cpu/ppc/ppc_context.h" +#include "xenia/cpu/processor.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +namespace xe { +namespace kernel { +namespace util { + +void InitializeTimerTable(uint32_t table_ptr, cpu::ppc::PPCContext* context) { + X_TIMER_TABLE* table = context->TranslateVirtual(table_ptr); + for (uint32_t i = 0; i < countof(table->buckets); ++i) { + table->buckets[i].Initialize(context); + } +} + +void TimersExpire(uint32_t table_ptr, uint32_t scratch_list, + cpu::ppc::PPCContext* context) { + X_TIMER_TABLE* table = context->TranslateVirtual(table_ptr); + X_LIST_ENTRY* expired_head = + context->TranslateVirtual(scratch_list); + util::XeInitializeListHead(expired_head, scratch_list); +} + +void TimersComputeHighestExpiredIndex(uint32_t table_ptr, + uint32_t ktimestamp_bundle, + uint32_t& highest_bucket, + uint8_t& lowest_bucket, + cpu::ppc::PPCContext* context) {} + +} // namespace util +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/util/guest_timer_list.h b/src/xenia/kernel/util/guest_timer_list.h new file mode 100644 index 0000000000..971b268317 --- /dev/null +++ b/src/xenia/kernel/util/guest_timer_list.h @@ -0,0 +1,44 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_KERNEL_UTIL_GUEST_TIMER_LIST_H_ +#define XENIA_KERNEL_UTIL_GUEST_TIMER_LIST_H_ + +#include "xenia/kernel/kernel_guest_structures.h" +#include "xenia/cpu/ppc/ppc_context.h" +namespace xe { +namespace kernel { +namespace util { + +/* + xboxkrnl uses a 32 entry table. each entry is an X_LIST_ENTRY which + is X_KTIMER.table_bucket_entry +*/ + +struct X_TIMER_TABLE { + util::X_TYPED_LIST + buckets[32]; + +}; + +void InitializeTimerTable(uint32_t table_ptr, cpu::ppc::PPCContext* context); + +//must be under dispatcher lock! +void TimersExpire(uint32_t table_ptr, uint32_t scratch_list, cpu::ppc::PPCContext* context); + +void TimersComputeHighestExpiredIndex(uint32_t table_ptr, uint32_t ktimestamp_bundle, + uint32_t& highest_bucket, + uint8_t& lowest_bucket, + cpu::ppc::PPCContext* context); + +} // namespace util +} // namespace kernel +} // namespace xe + +#endif // XENIA_KERNEL_UTIL_GUEST_TIMER_LIST_H_ diff --git a/src/xenia/kernel/util/native_list.h b/src/xenia/kernel/util/native_list.h index 96e950ed25..c82f4f3c81 100644 --- a/src/xenia/kernel/util/native_list.h +++ b/src/xenia/kernel/util/native_list.h @@ -168,8 +168,8 @@ static void XeInsertHeadList(uint32_t list_head, X_LIST_ENTRY* entry, template static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry, VirtualTranslator context) { - XeInsertHeadList(list_head, XeGuestList(list_head, context), - entry, XeGuestList(entry, context), context); + XeInsertHeadList(list_head, XeGuestList(list_head, context), entry, + XeGuestList(entry, context), context); } template @@ -216,7 +216,7 @@ struct X_TYPED_LIST : public X_LIST_ENTRY { uint32_t end() { return vt->HostToGuestVirtual(thiz); } }; - template + template ForwardIteratorBegin IterateForward(VirtualTranslator vt) { return ForwardIteratorBegin{vt, this}; } @@ -227,13 +227,14 @@ struct X_TYPED_LIST : public X_LIST_ENTRY { } template void InsertHead(TObject* entry, VirtualTranslator translator) { - XeInsertHeadList(static_cast(this), ObjectListEntry(entry), translator); + XeInsertHeadList(static_cast(this), ObjectListEntry(entry), + translator); } template void InsertTail(TObject* entry, VirtualTranslator translator) { XeInsertTailList(this, ObjectListEntry(entry), translator); } - template + template bool empty(VirtualTranslator vt) const { return vt->TranslateVirtual(flink_ptr) == this; } @@ -241,6 +242,21 @@ struct X_TYPED_LIST : public X_LIST_ENTRY { TObject* HeadObject(VirtualTranslator vt) { return ListEntryObject(vt->TranslateVirtual(flink_ptr)); } + + template + TObject* UnlinkHeadObject(VirtualTranslator vt) { + uint32_t old = util::XeRemoveHeadList(this, vt); + + return ListEntryObject(vt->TranslateVirtual(old)); + } + + template + TObject* UnlinkTailObject(VirtualTranslator vt) { + uint32_t old = util::XeRemoveTailList(this, vt); + + return ListEntryObject(vt->TranslateVirtual(old)); + } + template TObject* TailObject(VirtualTranslator vt) { return ListEntryObject(vt->TranslateVirtual(blink_ptr)); diff --git a/src/xenia/kernel/util/object_table.cc b/src/xenia/kernel/util/object_table.cc index ad3652e167..99c8c750c5 100644 --- a/src/xenia/kernel/util/object_table.cc +++ b/src/xenia/kernel/util/object_table.cc @@ -221,7 +221,7 @@ X_STATUS ObjectTable::RemoveHandle(X_HANDLE handle) { if (entry->object) { auto object = entry->object; entry->object = nullptr; - assert_zero(entry->handle_ref_count); + //assert_zero(entry->handle_ref_count); entry->handle_ref_count = 0; // Walk the object's handles and remove this one. @@ -503,6 +503,19 @@ void ObjectTable::UnmapGuestObjectHostHandle(uint32_t guest_object) { guest_to_host_handle_.erase(iter); } } +void ObjectTable::FlushGuestToHostMapping(uint32_t base_address, + uint32_t length) { + auto global_lock = global_critical_region_.Acquire(); + auto iterator = guest_to_host_handle_.lower_bound(base_address); + + while (iterator !=guest_to_host_handle_.end() && iterator->first >= base_address && iterator->first < (base_address + length)) { + auto old_mapping = iterator; + + iterator++; + auto node_handle = guest_to_host_handle_.extract(old_mapping); + + } +} } // namespace util } // namespace kernel diff --git a/src/xenia/kernel/util/object_table.h b/src/xenia/kernel/util/object_table.h index 7223f15487..54bf4fbdc1 100644 --- a/src/xenia/kernel/util/object_table.h +++ b/src/xenia/kernel/util/object_table.h @@ -83,6 +83,7 @@ class ObjectTable { void MapGuestObjectToHostHandle(uint32_t guest_object, X_HANDLE host_handle); void UnmapGuestObjectHostHandle(uint32_t guest_object); bool HostHandleForGuestObject(uint32_t guest_object, X_HANDLE& out); + void FlushGuestToHostMapping(uint32_t base_address, uint32_t length); private: struct ObjectTableEntry { int handle_ref_count = 0; @@ -110,7 +111,7 @@ class ObjectTable { uint32_t last_free_entry_ = 0; uint32_t last_free_host_entry_ = 0; std::unordered_map name_table_; - std::unordered_map guest_to_host_handle_; + std::map guest_to_host_handle_; }; // Generic lookup diff --git a/src/xenia/kernel/util/shim_utils.h b/src/xenia/kernel/util/shim_utils.h index adb8bf450f..cab68d66d3 100644 --- a/src/xenia/kernel/util/shim_utils.h +++ b/src/xenia/kernel/util/shim_utils.h @@ -413,10 +413,10 @@ inline void AppendParam(StringBuffer* string_buffer, word_t param) { string_buffer->AppendFormat("{:04X}", uint16_t(param)); } inline void AppendParam(StringBuffer* string_buffer, dword_t param) { - string_buffer->AppendFormat("{:08X}", uint32_t(param)); + string_buffer->AppendHexUInt32(uint32_t(param)); } inline void AppendParam(StringBuffer* string_buffer, qword_t param) { - string_buffer->AppendFormat("{:016X}", uint64_t(param)); + string_buffer->AppendHexUInt64(uint64_t(param)); } inline void AppendParam(StringBuffer* string_buffer, float_t param) { string_buffer->AppendFormat("{:G}", static_cast(param)); @@ -425,28 +425,28 @@ inline void AppendParam(StringBuffer* string_buffer, double_t param) { string_buffer->AppendFormat("{:G}", static_cast(param)); } inline void AppendParam(StringBuffer* string_buffer, lpvoid_t param) { - string_buffer->AppendFormat("{:08X}", uint32_t(param)); + string_buffer->AppendHexUInt32(uint32_t(param)); } inline void AppendParam(StringBuffer* string_buffer, lpdword_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { - string_buffer->AppendFormat("({:08X})", param.value()); + string_buffer->AppendParenthesizedHexUInt32(param.value()); } } inline void AppendParam(StringBuffer* string_buffer, lpqword_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { - string_buffer->AppendFormat("({:016X})", param.value()); + string_buffer->AppendParenthesizedHexUInt64(param.value()); } } inline void AppendParam(StringBuffer* string_buffer, lpfloat_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { string_buffer->AppendFormat("({:G})", param.value()); } } inline void AppendParam(StringBuffer* string_buffer, lpdouble_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { string_buffer->AppendFormat("({:G})", param.value()); } @@ -455,20 +455,20 @@ inline void AppendParam(StringBuffer* string_buffer, ppc_context_t param) { string_buffer->Append("ContextArg"); } inline void AppendParam(StringBuffer* string_buffer, lpstring_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { string_buffer->AppendFormat("({})", param.value()); } } inline void AppendParam(StringBuffer* string_buffer, lpu16string_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); if (param) { string_buffer->AppendFormat("({})", xe::to_utf8(param.value())); } } inline void AppendParam(StringBuffer* string_buffer, pointer_t record) { - string_buffer->AppendFormat("{:08X}", record.guest_address()); + string_buffer->AppendHexUInt32(record.guest_address()); if (record) { auto name_string = kernel_memory()->TranslateVirtual(record->name_ptr); @@ -494,7 +494,7 @@ inline void AppendParam(StringBuffer* string_buffer, } template void AppendParam(StringBuffer* string_buffer, pointer_t param) { - string_buffer->AppendFormat("{:08X}", param.guest_address()); + string_buffer->AppendHexUInt32(param.guest_address()); } enum class KernelModuleId { @@ -567,6 +567,7 @@ struct ExportRegistrerHelper { new cpu::Export(ORDINAL, xe::cpu::Export::Type::kFunction, name, TAGS); struct X { static void Trampoline(PPCContext* ppc_context) { + ppc_context->CheckInterrupt(); Param::Init init = { ppc_context, 0, @@ -589,6 +590,7 @@ struct ExportRegistrerHelper { KernelTrampoline(fn, std::forward>(params), std::make_index_sequence()); result.Store(ppc_context); + ppc_context->CheckInterrupt(); if (TAGS & (xe::cpu::ExportTag::kLog | xe::cpu::ExportTag::kLogResult)) { // TODO(benvanik): log result. @@ -598,6 +600,7 @@ struct ExportRegistrerHelper { }; struct Y { static void Trampoline(PPCContext* ppc_context) { + ppc_context->CheckInterrupt(); Param::Init init = { ppc_context, 0, @@ -612,6 +615,7 @@ struct ExportRegistrerHelper { std::make_index_sequence()); result.Store(ppc_context); } + ppc_context->CheckInterrupt(); } }; export_entry->function_data.trampoline = &X::Trampoline; diff --git a/src/xenia/kernel/xam/apps/xmp_app.cc b/src/xenia/kernel/xam/apps/xmp_app.cc index 2f42c1a12b..6d707ebcf3 100644 --- a/src/xenia/kernel/xam/apps/xmp_app.cc +++ b/src/xenia/kernel/xam/apps/xmp_app.cc @@ -12,7 +12,7 @@ #include "xenia/base/logging.h" #include "xenia/base/threading.h" - +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" namespace xe { namespace kernel { namespace xam { @@ -35,7 +35,8 @@ X_HRESULT XmpApp::XMPGetStatus(uint32_t state_ptr) { if (!XThread::GetCurrentThread()->main_thread()) { // Some stupid games will hammer this on a thread - induce a delay // here to keep from starving real threads. - xe::threading::Sleep(std::chrono::milliseconds(1)); + int64_t interval = -10000LL; + xboxkrnl::xeKeDelayExecutionThread(cpu::ThreadState::GetContext(), 0, false, &interval); } XELOGD("XMPGetStatus({:08X})", state_ptr); @@ -428,9 +429,11 @@ X_HRESULT XmpApp::DispatchMessageSync(uint32_t message, uint32_t buffer_ptr, xe::store_and_swap(memory_->TranslateVirtual(args->locked_ptr), 0); - if (!XThread::GetCurrentThread()->main_thread()) { + if (!XThread::GetCurrentThread()->main_thread()) { // Atrain spawns a thread 82437FD0 to call this in a tight loop forever. - xe::threading::Sleep(std::chrono::milliseconds(10)); + int64_t interval = -100000LL; + xboxkrnl::xeKeDelayExecutionThread(cpu::ThreadState::GetContext(), 0, + false, &interval); } return X_E_SUCCESS; diff --git a/src/xenia/kernel/xam/xam_content.cc b/src/xenia/kernel/xam/xam_content.cc index a08da87bd8..b9b20b558a 100644 --- a/src/xenia/kernel/xam/xam_content.cc +++ b/src/xenia/kernel/xam/xam_content.cc @@ -449,7 +449,7 @@ dword_result_t XamSwapDisc_entry( } auto completion_event = [completion_handle]() -> void { - auto kevent = xboxkrnl::xeKeSetEvent(completion_handle, 1, 0); + auto kevent = xboxkrnl::xeKeSetEvent(cpu::ThreadState::GetContext(), completion_handle, 1, 0); // Release the completion handle auto object = diff --git a/src/xenia/kernel/xam/xam_enum.cc b/src/xenia/kernel/xam/xam_enum.cc index fe4d0a4579..4a02fa53b5 100644 --- a/src/xenia/kernel/xam/xam_enum.cc +++ b/src/xenia/kernel/xam/xam_enum.cc @@ -94,7 +94,7 @@ dword_result_t XamCreateEnumeratorHandle_entry( new XStaticUntypedEnumerator(kernel_state(), item_count, extra_size)); auto result = - e->Initialize(user_index, app_id, open_message, close_message, flags); + e->Initialize(user_index, app_id, open_message, close_message, flags, extra_size, nullptr); if (XFAILED(result)) { return result; } diff --git a/src/xenia/kernel/xam/xam_guest_structures.h b/src/xenia/kernel/xam/xam_guest_structures.h new file mode 100644 index 0000000000..d820ba9973 --- /dev/null +++ b/src/xenia/kernel/xam/xam_guest_structures.h @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_KERNEL_XAM_XAM_GUEST_STRUCTURES_H_ +#define XENIA_KERNEL_XAM_XAM_GUEST_STRUCTURES_H_ +#include "xenia/kernel/kernel_guest_structures.h" +namespace xe { +namespace kernel { +struct X_XAMNOTIFY { + char field_0[12]; + X_KEVENT event; + X_KSPINLOCK spinlock; + char an_irql; + char field_21[3]; + int a_pointer; + int field_28; + int process_type_related; +}; +static_assert_size(X_XAMNOTIFY, 48); + +} // namespace kernel +} // namespace xe + +#endif // XENIA_KERNEL_XAM_XAM_GUEST_STRUCTURES_H_ diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index aad29c5c3f..84cbe6bef7 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -459,7 +459,7 @@ void Sleep_entry(dword_t dwMilliseconds) { DECLARE_XAM_EXPORT1(Sleep, kNone, kImplemented); // https://learn.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-gettickcount -dword_result_t GetTickCount_entry() { return Clock::QueryGuestUptimeMillis(); } +dword_result_t GetTickCount_entry(const ppc_context_t& context) { return context->kernel_state->GetKernelTickCount(); } DECLARE_XAM_EXPORT1(GetTickCount, kNone, kImplemented); dword_result_t RtlSetLastNTError_entry(dword_t error_code) { diff --git a/src/xenia/kernel/xam/xam_input.cc b/src/xenia/kernel/xam/xam_input.cc index dc1b8cb537..0072614cb0 100644 --- a/src/xenia/kernel/xam/xam_input.cc +++ b/src/xenia/kernel/xam/xam_input.cc @@ -93,6 +93,7 @@ DECLARE_XAM_EXPORT1(XamInputGetCapabilities, kInput, kSketchy); // https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.reference.xinputgetstate(v=vs.85).aspx dword_result_t XamInputGetState_entry(dword_t user_index, dword_t flags, pointer_t input_state) { + cpu::MFTBFence timing_fence{24}; if (input_state) { memset((void*)input_state.host_address(), 0, sizeof(X_INPUT_STATE)); } diff --git a/src/xenia/kernel/xam/xam_net.cc b/src/xenia/kernel/xam/xam_net.cc index 68c829d497..107817e60c 100644 --- a/src/xenia/kernel/xam/xam_net.cc +++ b/src/xenia/kernel/xam/xam_net.cc @@ -381,7 +381,7 @@ dword_result_t NetDll_WSAWaitForMultipleEvents_entry(dword_t num_events, lpdword_t events, dword_t wait_all, dword_t timeout, - dword_t alertable) { + dword_t alertable, const ppc_context_t& context) { if (num_events > 64) { XThread::SetLastError(uint32_t(X_WSAError::X_WSA_INVALID_PARAMETER)); return ~0u; @@ -393,7 +393,7 @@ dword_result_t NetDll_WSAWaitForMultipleEvents_entry(dword_t num_events, do { result = xboxkrnl::xeNtWaitForMultipleObjectsEx( num_events, events, wait_all, 1, alertable, - timeout != -1 ? &timeout_wait : nullptr); + timeout != -1 ? &timeout_wait : nullptr, context); } while (result == X_STATUS_ALERTED); if (XFAILED(result)) { diff --git a/src/xenia/kernel/xam/xam_task.cc b/src/xenia/kernel/xam/xam_task.cc index 0a809b3fed..b2f1661c06 100644 --- a/src/xenia/kernel/xam/xam_task.cc +++ b/src/xenia/kernel/xam/xam_task.cc @@ -15,8 +15,10 @@ #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xam/xam_module.h" #include "xenia/kernel/xam/xam_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_modules.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xthread.h" -#include "xenia/xbox.h" #if XE_PLATFORM_WIN32 #include "xenia/base/platform_win.h" @@ -39,48 +41,152 @@ struct XTASK_MESSAGE { }; struct XAM_TASK_ARGS { - be value1; + be flags; be value2; // i think there might be another value here, it might be padding }; static_assert_size(XTASK_MESSAGE, 0x1C); +// handles defaults of various bitfields +static uint32_t unk_encode(uint32_t flags) { + auto v1 = flags; + if ((v1 & 0x1F) == 0) { + flags = v1 | 4; + } + auto v2 = flags; + if ((v2 & 0x700000) == 0) { + flags = v2 | 0x200000; + } + auto v3 = flags; + if ((v3 & 0x880000) == 0) { + flags = v3 | 0x800000; + } + auto v4 = flags; + if ((v4 & 0x6000000) == 0) { + if ((v4 & 0x10) != 0) { + flags = v4 | 0x4000000; + } else { + // todo: a lot more happens here! + flags |= 0x4000000u; + } + } + return flags; +} +static uint32_t g_thread_incrementer = 0; + +static uint32_t get_cpunum_from_arg1(uint32_t dword8) { + auto v1 = dword8 & 0xF1000000; + switch (v1) { + case 0x10000000u: + return 5; + case 0x20000000u: + return (g_thread_incrementer++ & 1) + 3; + case 0x40000000u: + case 0x80000000: + return 2; + } + return 5; +} +/* + used for XamTaskSchedule, but on initialization Xam also calls this to create 8 different threads. + two seem to be for specific tasks, and the other 6 probably execute pooled tasks (task type 4, used by h4) +*/ +static X_KTHREAD* XamThreadCreate(PPCContext* context, uint32_t arg1_from_flags, + uint32_t callback, uint32_t message, XAM_TASK_ARGS* optional_args=nullptr) { + uint32_t dword8 = arg1_from_flags; + xe::be kthreaad_u; + + uint32_t create_result = xboxkrnl::ExCreateThread( + &kthreaad_u, 65536, nullptr, 0, callback, message, + XE_FLAG_THREAD_INITIALLY_SUSPENDED | XE_FLAG_RETURN_KTHREAD_PTR | + XE_FLAG_SYSTEM_THREAD); + + auto resulting_kthread = context->TranslateVirtual(kthreaad_u); + + if (XFAILED(create_result)) { + // Failed! + XELOGE("XAM task creation failed: {:08X}", create_result); + xboxkrnl::xeKeLeaveCriticalRegion(context); + return nullptr; + } + uint32_t cpunum; + if (optional_args && optional_args->flags & 0x80000000) { + cpunum = optional_args->value2; + } else { + cpunum = get_cpunum_from_arg1(arg1_from_flags); + } + + if (arg1_from_flags & 0x80000) { + xboxkrnl::xeKeSetPriorityClassThread(context, resulting_kthread, 1); + } + + xboxkrnl::xeKeSetBasePriorityThread(context, resulting_kthread, 0); + uint32_t old_aff; + xboxkrnl::xeKeSetAffinityThread(context, resulting_kthread, 1U << cpunum, + &old_aff); + + xboxkrnl::xeKeResumeThread(context, resulting_kthread); + return resulting_kthread; +} + dword_result_t XamTaskSchedule_entry(lpvoid_t callback, pointer_t message, dword_t optional_ptr, lpdword_t handle_ptr, const ppc_context_t& ctx) { + xboxkrnl::xeKeEnterCriticalRegion(ctx); // TODO(gibbed): figure out what this is for *handle_ptr = 12345; + XAM_TASK_ARGS args{}; + if (optional_ptr) { auto option = ctx->TranslateVirtual(optional_ptr); - auto v1 = option->value1; - auto v2 = option->value2; //typically 0? + args = *option; + auto v1 = option->flags; + auto v2 = option->value2; // typically 0? XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2); + } else { + args.flags = 0; + args.value2 = 0; } - uint32_t stack_size = kernel_state()->GetExecutableModule()->stack_size(); - - // Stack must be aligned to 16kb pages - stack_size = std::max((uint32_t)0x4000, ((stack_size + 0xFFF) & 0xFFFFF000)); + args.flags = unk_encode(args.flags); - auto thread = - object_ref(new XThread(kernel_state(), stack_size, 0, callback, - message.guest_address(), 0, true, false, kernel_state()->GetSystemProcess())); - - X_STATUS result = thread->Create(); + if ((args.flags & 0x80000) != 0 && (args.flags & 0x20000000) == 0 && + !xboxkrnl::xeXexCheckExecutablePrivilege(1)) { + args.flags &= 0xFFF7FFFF; + } - if (XFAILED(result)) { - // Failed! - XELOGE("XAM task creation failed: {:08X}", result); - return result; + uint32_t what = args.flags & 0x1F; + if (what == 2) { + uint32_t arg1_from_flags = + xe::rotate_right(1, 4) & 0xFF07FFFF | args.flags & 0xF80000; + + X_KTHREAD* resulting_kthread = + XamThreadCreate(ctx, arg1_from_flags, callback, message, &args); + if (!resulting_kthread) { + return X_STATUS_UNSUCCESSFUL; + } + + // this is done in the destructor of the thread param in xam + xboxkrnl::xeObDereferenceObject(ctx, + ctx->HostToGuestVirtual(resulting_kthread)); + + xboxkrnl::NtClose(XThread::FromGuest(resulting_kthread)->handle()); + } + else if (what == 4) { + //pooled task? goes on the xam worker threads i think + xenia_assert(false && "pooled tasks are unsupported"); + } + else { + xenia_assert(false); // unhandled task type } XELOGD("XAM task ({:08X}) scheduled asynchronously", callback.guest_address()); - + xboxkrnl::xeKeLeaveCriticalRegion(ctx); return X_STATUS_SUCCESS; } DECLARE_XAM_EXPORT2(XamTaskSchedule, kNone, kImplemented, kSketchy); diff --git a/src/xenia/kernel/xam/xam_ui.cc b/src/xenia/kernel/xam/xam_ui.cc index 7f274920c4..d1f3d31bab 100644 --- a/src/xenia/kernel/xam/xam_ui.cc +++ b/src/xenia/kernel/xam/xam_ui.cc @@ -94,7 +94,7 @@ X_RESULT xeXamDispatchDialog(T* dialog, return result; }; auto post = []() { - xe::threading::Sleep(std::chrono::milliseconds(100)); + // xe::threading::Sleep(std::chrono::milliseconds(100)); // Broadcast XN_SYS_UI = false kernel_state()->BroadcastNotification(0x9, false); }; @@ -138,7 +138,7 @@ X_RESULT xeXamDispatchDialogEx( return result; }; auto post = []() { - xe::threading::Sleep(std::chrono::milliseconds(100)); + //xe::threading::Sleep(std::chrono::milliseconds(100)); // Broadcast XN_SYS_UI = false kernel_state()->BroadcastNotification(0x9, false); }; @@ -162,7 +162,7 @@ X_RESULT xeXamDispatchHeadless(std::function run_callback, kernel_state()->BroadcastNotification(0x9, true); }; auto post = []() { - xe::threading::Sleep(std::chrono::milliseconds(100)); + // xe::threading::Sleep(std::chrono::milliseconds(100)); // Broadcast XN_SYS_UI = false kernel_state()->BroadcastNotification(0x9, false); }; @@ -186,7 +186,7 @@ X_RESULT xeXamDispatchHeadlessEx( kernel_state()->BroadcastNotification(0x9, true); }; auto post = []() { - xe::threading::Sleep(std::chrono::milliseconds(100)); + //xe::threading::Sleep(std::chrono::milliseconds(100)); // Broadcast XN_SYS_UI = false kernel_state()->BroadcastNotification(0x9, false); }; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio.cc index 0cf538ffae..f702fc9211 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio.cc @@ -29,7 +29,7 @@ dword_result_t XAudioGetVoiceCategoryVolumeChangeMask_entry( lpunknown_t driver_ptr, lpdword_t out_ptr) { assert_true((driver_ptr.guest_address() & 0xFFFF0000) == 0x41550000); - xe::threading::MaybeYield(); + //xe::threading::MaybeYield(); // Checking these bits to see if any voice volume changed. // I think. diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc index b0ea18c508..8c7bbaecc5 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc @@ -379,7 +379,7 @@ dword_result_t XMADisableContext_entry(lpvoid_t context_ptr, dword_t wait) { DECLARE_XBOXKRNL_EXPORT2(XMADisableContext, kAudio, kImplemented, kHighFrequency); -dword_result_t XMABlockWhileInUse_entry(lpvoid_t context_ptr) { +dword_result_t XMABlockWhileInUse_entry(lpvoid_t context_ptr, const ppc_context_t& ppc_context) { do { XMA_CONTEXT_DATA context(context_ptr); if (!context.input_buffer_0_valid && !context.input_buffer_1_valid) { @@ -388,7 +388,8 @@ dword_result_t XMABlockWhileInUse_entry(lpvoid_t context_ptr) { if (!context.work_buffer_ptr) { break; } - xe::threading::Sleep(std::chrono::milliseconds(1)); + // xe::threading::Sleep(std::chrono::milliseconds(1)); + ppc_context->CheckInterrupt(); } while (true); return 0; } diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_debug.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_debug.cc index cc362476a0..ac7081aa42 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_debug.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_debug.cc @@ -37,6 +37,7 @@ void HandleSetThreadName(pointer_t record) { // TODO(benvanik): check record->number_parameters to make sure it's a // correct size. + #if 0 auto thread_info = reinterpret_cast(&record->exception_information[0]); @@ -70,7 +71,7 @@ void HandleSetThreadName(pointer_t record) { XELOGD("SetThreadName({}, {})", thread->thread_id(), name); thread->set_name(name); } - + #endif // TODO(benvanik): unwinding required here? } diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc index d042a723a2..fb4ced8d72 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_io_info.cc @@ -248,6 +248,11 @@ dword_result_t NtSetInformationFile_entry( } break; } + case XFileBasicInformation: { + XELOGE("SetInformationFile called with basic information; ignoring"); + out_length = 0; + break; + } default: // Unsupported, for now. assert_always(); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index 8cc80bc8fb..1b942bc007 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -16,6 +16,7 @@ #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/xbox.h" DEFINE_bool( ignore_offset_for_ranged_allocations, false, @@ -63,11 +64,9 @@ uint32_t FromXdkProtectFlags(uint32_t protect) { return result; } -dword_result_t NtAllocateVirtualMemory_entry(lpdword_t base_addr_ptr, - lpdword_t region_size_ptr, - dword_t alloc_type, - dword_t protect_bits, - dword_t debug_memory) { +dword_result_t NtAllocateVirtualMemory_entry( + lpdword_t base_addr_ptr, lpdword_t region_size_ptr, dword_t alloc_type, + dword_t protect_bits, dword_t debug_memory, const ppc_context_t& context) { // NTSTATUS // _Inout_ PVOID *BaseAddress, // _Inout_ PSIZE_T RegionSize, @@ -79,7 +78,7 @@ dword_result_t NtAllocateVirtualMemory_entry(lpdword_t base_addr_ptr, assert_not_null(region_size_ptr); // Set to TRUE when allocation is from devkit memory area. - assert_true(debug_memory == 0); + // assert_true(debug_memory == 0); // This allocates memory from the kernel heap, which is initialized on startup // and shared by both the kernel implementation and user code. @@ -148,10 +147,13 @@ dword_result_t NtAllocateVirtualMemory_entry(lpdword_t base_addr_ptr, HeapAllocationInfo prev_alloc_info = {}; bool was_commited = false; + xboxkrnl::xeKeEnterCriticalRegion(context); + if (adjusted_base != 0) { heap = kernel_memory()->LookupHeap(adjusted_base); if (heap->page_size() != page_size) { // Specified the wrong page size for the wrong heap. + xboxkrnl::xeKeLeaveCriticalRegion(context); return X_STATUS_ACCESS_DENIED; } was_commited = heap->QueryRegionInfo(adjusted_base, &prev_alloc_info) && @@ -169,6 +171,7 @@ dword_result_t NtAllocateVirtualMemory_entry(lpdword_t base_addr_ptr, } if (!address) { // Failed - assume no memory available. + xboxkrnl::xeKeLeaveCriticalRegion(context); return X_STATUS_NO_MEMORY; } @@ -187,7 +190,7 @@ dword_result_t NtAllocateVirtualMemory_entry(lpdword_t base_addr_ptr, } } } - + xboxkrnl::xeKeLeaveCriticalRegion(context); XELOGD("NtAllocateVirtualMemory = {:08X}", address); // Stash back. @@ -263,7 +266,7 @@ dword_result_t NtFreeVirtualMemory_entry(lpdword_t base_addr_ptr, // _In_ BOOLEAN DebugMemory // Set to TRUE when freeing external devkit memory. - assert_true(debug_memory == 0); + // assert_true(debug_memory == 0); if (!base_addr_value) { return X_STATUS_MEMORY_NOT_ALLOCATED; @@ -286,6 +289,8 @@ dword_result_t NtFreeVirtualMemory_entry(lpdword_t base_addr_ptr, if (!result) { return X_STATUS_UNSUCCESSFUL; } + kernel_state()->object_table()->FlushGuestToHostMapping(base_addr_value, + region_size_value); *base_addr_ptr = base_addr_value; *region_size_ptr = region_size_value; @@ -473,7 +478,11 @@ void MmFreePhysicalMemory_entry(dword_t type, dword_t base_address) { assert_true((base_address & 0x1F) == 0); auto heap = kernel_state()->memory()->LookupHeap(base_address); - heap->Release(base_address); + uint32_t region_size = 0; + if (heap->Release(base_address, ®ion_size)) { + kernel_state()->object_table()->FlushGuestToHostMapping(base_address, + region_size); + } } DECLARE_XBOXKRNL_EXPORT1(MmFreePhysicalMemory, kMemory, kImplemented); @@ -616,6 +625,7 @@ DECLARE_XBOXKRNL_EXPORT1(MmQueryStatistics, kMemory, kImplemented); // https://msdn.microsoft.com/en-us/library/windows/hardware/ff554547(v=vs.85).aspx dword_result_t MmGetPhysicalAddress_entry(dword_t base_address) { + cpu::MFTBFence timing_fence{16}; // PHYSICAL_ADDRESS MmGetPhysicalAddress( // _In_ PVOID BaseAddress // ); @@ -651,7 +661,7 @@ struct X_POOL_ALLOC_HEADER { }; uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size, - uint32_t tag, uint32_t zero) { + uint32_t tag, uint32_t pool_selector) { if (size <= 0xFD8) { uint32_t adjusted_size = size + sizeof(X_POOL_ALLOC_HEADER); @@ -665,7 +675,7 @@ uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size, return addr + sizeof(X_POOL_ALLOC_HEADER); } else { return kernel_state()->memory()->SystemHeapAlloc(size, 4096); - } +} } dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag, @@ -690,12 +700,18 @@ DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented); void xeFreePool(PPCContext* context, uint32_t base_address) { auto memory = context->kernel_state->memory(); - //if 4kb aligned, there is no pool header! + // if 4kb aligned, there is no pool header! + uint32_t released_region_size = 0; if ((base_address & (4096 - 1)) == 0) { - memory->SystemHeapFree(base_address); + memory->SystemHeapFree(base_address, &released_region_size); } else { - memory->SystemHeapFree(base_address - sizeof(X_POOL_ALLOC_HEADER)); + memory->SystemHeapFree(base_address - sizeof(X_POOL_ALLOC_HEADER), + &released_region_size); } + xenia_assert(released_region_size != 0); + + kernel_state()->object_table()->FlushGuestToHostMapping(base_address, + released_region_size); } void ExFreePool_entry(lpvoid_t base_address, const ppc_context_t& context) { @@ -757,15 +773,25 @@ dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) { } DECLARE_XBOXKRNL_EXPORT1(MmCreateKernelStack, kMemory, kImplemented); -dword_result_t MmDeleteKernelStack_entry(lpvoid_t stack_base, - lpvoid_t stack_end) { +uint32_t xeMmDeleteKernelStack(uint32_t stack_base, uint32_t stack_end) { + uint32_t released_region_size = 0; // Release the stack (where stack_end is the low address) - if (kernel_memory()->LookupHeap(0x70000000)->Release(stack_end)) { + if (kernel_memory() + ->LookupHeap(0x70000000) + ->Release(stack_end, &released_region_size)) { + xenia_assert(released_region_size); + kernel_state()->object_table()->FlushGuestToHostMapping( + stack_end, released_region_size); return X_STATUS_SUCCESS; } return X_STATUS_UNSUCCESSFUL; } + +dword_result_t MmDeleteKernelStack_entry(lpvoid_t stack_base, + lpvoid_t stack_end) { + return xeMmDeleteKernelStack(stack_base, stack_end); +} DECLARE_XBOXKRNL_EXPORT1(MmDeleteKernelStack, kMemory, kImplemented); dword_result_t MmIsAddressValid_entry(dword_t address, diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h index 839aaababe..521c3d1d8c 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h @@ -11,7 +11,7 @@ #define XENIA_KERNEL_XBOXKRNL_XBOXKRNL_MEMORY_H_ #include "xenia/kernel/util/shim_utils.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { @@ -30,6 +30,7 @@ uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size, void xeFreePool(PPCContext* context, uint32_t base_address); uint32_t xeMmCreateKernelStack(uint32_t size, uint32_t r4); +uint32_t xeMmDeleteKernelStack(uint32_t stack_base, uint32_t stack_end); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc index ca345ccd26..d5bf119d20 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc @@ -13,6 +13,7 @@ #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xthread.h" #include "xenia/xbox.h" namespace xe { @@ -26,106 +27,44 @@ void KeEnableFpuExceptions_entry( // has to be saved to kthread, the irql changes, the machine state register is // changed to enable exceptions - X_KTHREAD* kthread = ctx->TranslateVirtual( - ctx->TranslateVirtualGPR(ctx->r[13])->prcb_data.current_thread); - kthread->fpu_exceptions_on = static_cast(ctx->r[3]) != 0; -} -DECLARE_XBOXKRNL_EXPORT1(KeEnableFpuExceptions, kNone, kStub); -#if 0 -struct __declspec(align(8)) fpucontext_ptr_t { - char unknown_data[158]; - __int16 field_9E; - char field_A0[2272]; - unsigned __int64 saved_FPSCR; - double saved_fpu_regs[32]; -}; -#pragma pack(push, 1) -struct __declspec(align(1)) r13_struct_t { - char field_0[6]; - __int16 field_6; - char field_8[2]; - char field_A; - char field_B[5]; - int field_10; - char field_14[315]; - char field_14F; - unsigned int field_150; - char field_154[427]; - char field_2FF; - char field_300; -}; -#pragma pack(pop) + auto old_irql = GetKPCR(ctx)->current_irql; + GetKPCR(ctx)->current_irql = 2; + GetKThread(ctx)->fpu_exceptions_on = static_cast(ctx->r[3]) != 0; -static uint64_t Do_mfmsr(ppc_context_t& ctx) { - auto frontend = ctx->thread_state->processor()->frontend(); - cpu::ppc::CheckGlobalLock( - ctx, reinterpret_cast(&xe::global_critical_region::mutex()), - reinterpret_cast(&frontend->builtins()->global_lock_count)); - return ctx->scratch; + xboxkrnl::xeKfLowerIrql(ctx, old_irql); } +DECLARE_XBOXKRNL_EXPORT1(KeEnableFpuExceptions, kNone, kStub); +//these are not properly implemented void KeSaveFloatingPointState_entry(ppc_context_t& ctx) { - xe::Memory* memory = ctx->thread_state->memory(); - unsigned int r13 = static_cast(ctx->r[13]); - - - - - r13_struct_t* st = memory->TranslateVirtual(r13); - /* - lwz r10, 0x150(r13) - lbz r11, 0xA(r13) - tweqi r10, 0 - twnei r11, 0 - */ - - unsigned int r10 = st->field_150; - unsigned char r11 = st->field_A; + auto kthread = GetKThread(ctx); - if (r10 == 0 || r11 != 0) { - //trap! + for (unsigned i = 0; i < 32; ++i) { + kthread->fpu_context[i] = ctx->f[i]; } +} - //should do mfmsr here - - unsigned int r3 = xe::load_and_swap(&st->field_10); - - //too much work to do the mfmsr/mtmsr stuff right now - int to_store = -2049; - xe::store_and_swap(&st->field_10, (unsigned int)to_store); - xe::store_and_swap(&st->field_6, (short)to_store); - +DECLARE_XBOXKRNL_EXPORT1(KeSaveFloatingPointState, kNone, kImplemented); +void KeRestoreFloatingPointState_entry(ppc_context_t& ctx) { + auto kthread = GetKThread(ctx); - if (r3 != ~0u) { - fpucontext_ptr_t* fpucontext = - memory->TranslateVirtual(r3); - xe::store_and_swap(&fpucontext->saved_FPSCR, ctx->fpscr.value); - - for (unsigned int i = 0; i < 32; ++i) { - xe::store_and_swap(&fpucontext->saved_fpu_regs[i], ctx->f[i]); - } - xe::store_and_swap(&fpucontext->field_9E, 0xD7FF); + for (unsigned i = 0; i < 32; ++i) { + ctx->f[i] = kthread->fpu_context[i]; } - ctx->processor->backend()->SetGuestRoundingMode(ctx.value(), 0); - ctx->fpscr.value = 0; - st->field_A = 1; - - xe::store_and_swap(&st->field_10, r13 + 0x300); - ctx->r[3] = r3; - } -DECLARE_XBOXKRNL_EXPORT1(KeSaveFloatingPointState, kNone, kImplemented); -#endif +DECLARE_XBOXKRNL_EXPORT1(KeRestoreFloatingPointState, kNone, kImplemented); + static qword_result_t KeQueryInterruptTime_entry(const ppc_context_t& ctx) { auto kstate = ctx->kernel_state; uint32_t ts_bundle = kstate->GetKeTimestampBundle(); X_TIME_STAMP_BUNDLE* bundle = ctx->TranslateVirtual(ts_bundle); - return xe::load_and_swap(&bundle->interrupt_time); + uint64_t int_time = bundle->interrupt_time; + return int_time; } DECLARE_XBOXKRNL_EXPORT1(KeQueryInterruptTime, kNone, kImplemented); } // namespace xboxkrnl diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc index 78f82db06a..fd5f2ebd84 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc @@ -234,6 +234,10 @@ XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state) EXPORT_KVAR(ObDirectoryObjectType); EXPORT_KVAR(ObSymbolicLinkObjectType); EXPORT_KVAR(UsbdBootEnumerationDoneEvent); + EXPORT_KVAR(VdGlobalDevice); + EXPORT_KVAR(VdGlobalXamDevice); + EXPORT_KVAR(VdGpuClockInMHz); + EXPORT_KVAR(VdHSIOCalibrationLock); #undef EXPORT_KVAR } diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.cc index 607d81c8e1..1bf364a095 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.cc @@ -21,7 +21,7 @@ namespace xe { namespace kernel { namespace xboxkrnl { -dword_result_t XexCheckExecutablePrivilege_entry(dword_t privilege) { +uint32_t xeXexCheckExecutablePrivilege(uint32_t privilege) { // BOOL // DWORD Privilege @@ -39,6 +39,10 @@ dword_result_t XexCheckExecutablePrivilege_entry(dword_t privilege) { return (flags & mask) > 0; } + +dword_result_t XexCheckExecutablePrivilege_entry(dword_t privilege) { + return xeXexCheckExecutablePrivilege(privilege); +} DECLARE_XBOXKRNL_EXPORT1(XexCheckExecutablePrivilege, kModules, kImplemented); dword_result_t XexGetModuleHandle(std::string module_name, diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.h index e6c9f9f53b..07dc7227d1 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_modules.h @@ -18,7 +18,7 @@ namespace xboxkrnl { dword_result_t XexGetModuleHandle(std::string module_name, xe::be* hmodule_ptr); - +uint32_t xeXexCheckExecutablePrivilege(uint32_t privilege); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc index 1f07530613..643018c074 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc @@ -7,21 +7,187 @@ ****************************************************************************** */ +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" #include "xenia/base/assert.h" +#include "xenia/base/atomic.h" #include "xenia/base/logging.h" +#include "xenia/base/utf8.h" +#include "xenia/cpu/processor.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" -#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xobject.h" #include "xenia/kernel/xsemaphore.h" #include "xenia/kernel/xthread.h" #include "xenia/xbox.h" - namespace xe { namespace kernel { namespace xboxkrnl { +void xeObSplitName(X_ANSI_STRING input_string, + X_ANSI_STRING* leading_path_component, + X_ANSI_STRING* remaining_path_components, + PPCContext* context) { + xe::FatalError("xeObSplitName unimplemented!"); +} + +uint32_t xeObHashObjectName(X_ANSI_STRING* ElementName, PPCContext* context) { + uint8_t* current_character_ptr = + context->TranslateVirtual(ElementName->pointer); + uint32_t result = 0; + uint8_t* name_span_end = ¤t_character_ptr[ElementName->length]; + while (current_character_ptr < name_span_end) { + uint32_t current_character = *current_character_ptr++; + if (current_character < 0x80) { + result = (current_character | 0x20) + (result >> 1) + 3 * result; + } + } + return result % 0xD; +} + +uint32_t xeObCreateObject(X_OBJECT_TYPE* object_factory, + X_OBJECT_ATTRIBUTES* optional_attributes, + uint32_t object_size_without_headers, + uint32_t* out_object, cpu::ppc::PPCContext* context) { + cpu::ppc::PPCGprSnapshot savegplr; + context->TakeGPRSnapshot(&savegplr); + unsigned int resulting_header_flags = 0; + *out_object = 0; + unsigned int poolarg = 0; + + auto get_flags_and_poolarg_for_process_type = [&resulting_header_flags, + &poolarg, context]() { + uint32_t process_type = xboxkrnl::xeKeGetCurrentProcessType(context); + if (process_type == X_PROCTYPE_TITLE) { + poolarg = 1; + resulting_header_flags = OBJECT_HEADER_IS_TITLE_OBJECT; + } else { + poolarg = 2; + } + }; + if (!optional_attributes) { + get_flags_and_poolarg_for_process_type(); + } + + else if ((optional_attributes->attributes & 0x1000) == 0) { + if ((optional_attributes->attributes & 0x2000) != 0) { + poolarg = 2; + } else { + get_flags_and_poolarg_for_process_type(); + } + } else { + poolarg = 1; + resulting_header_flags = OBJECT_HEADER_IS_TITLE_OBJECT; + } + uint32_t desired_object_path_ptr; + + if (!optional_attributes || + (desired_object_path_ptr = optional_attributes->name_ptr) == 0) { + /* + object has no name provided, just allocate an object with a basic header + */ + uint64_t allocate_args[] = { + object_size_without_headers + sizeof(X_OBJECT_HEADER), + object_factory->pool_tag, poolarg}; + context->processor->Execute(context->thread_state(), + object_factory->allocate_proc, allocate_args, 3, + true); + + uint32_t allocation = static_cast(context->r[3]); + + if (allocation) { + X_OBJECT_HEADER* new_object_header = + context->TranslateVirtual(allocation); + new_object_header->pointer_count = 1; + new_object_header->handle_count = 0; + new_object_header->object_type_ptr = + context->HostToGuestVirtual(object_factory); + new_object_header->flags = resulting_header_flags; + + *out_object = allocation + sizeof(X_OBJECT_HEADER); + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_SUCCESS; + } + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_INSUFFICIENT_RESOURCES; + } + /* + iterate through all path components until we obtain the final one, which is + the objects actual name + */ + X_ANSI_STRING trailing_path_component; + X_ANSI_STRING remaining_path; + X_ANSI_STRING loaded_object_name; + loaded_object_name = + *context->TranslateVirtual(desired_object_path_ptr); + trailing_path_component.pointer = 0; + trailing_path_component.length = 0; + remaining_path = loaded_object_name; + while (remaining_path.length) { + xeObSplitName(remaining_path, &trailing_path_component, &remaining_path, + context); + if (remaining_path.length) { + if (*context->TranslateVirtual(remaining_path.pointer) == '\\') { + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_OBJECT_NAME_INVALID; + } + } + } + if (!trailing_path_component.length) { + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_OBJECT_NAME_INVALID; + } + // the object and its name are all created in a single allocation + + unsigned int aligned_object_size = + xe::align(object_size_without_headers, 4); + { + uint64_t allocate_args[] = { + trailing_path_component.length + aligned_object_size + + sizeof(X_OBJECT_HEADER_NAME_INFO) + sizeof(X_OBJECT_HEADER), + object_factory->pool_tag, poolarg}; + + context->processor->Execute(context->thread_state(), + object_factory->allocate_proc, allocate_args, 3, + true); + } + uint32_t named_object_allocation = static_cast(context->r[3]); + if (!named_object_allocation) { + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_INSUFFICIENT_RESOURCES; + } + + X_OBJECT_HEADER_NAME_INFO* nameinfo = + context->TranslateVirtual( + named_object_allocation); + nameinfo->next_in_directory = 0; + nameinfo->object_directory = 0; + + X_OBJECT_HEADER* header_for_named_object = + reinterpret_cast(nameinfo + 1); + + char* name_string_memory_for_named_object = &reinterpret_cast( + header_for_named_object + 1)[aligned_object_size]; + nameinfo->name.pointer = + context->HostToGuestVirtual(name_string_memory_for_named_object); + nameinfo->name.length = trailing_path_component.length; + nameinfo->name.maximum_length = trailing_path_component.length; + + memcpy(name_string_memory_for_named_object, + context->TranslateVirtual(trailing_path_component.pointer), + trailing_path_component.length); + + header_for_named_object->pointer_count = 1; + header_for_named_object->handle_count = 0; + header_for_named_object->object_type_ptr = + context->HostToGuestVirtual(object_factory); + header_for_named_object->flags = + resulting_header_flags & 0xFFFE | OBJECT_HEADER_FLAG_NAMED_OBJECT; + *out_object = context->HostToGuestVirtual(&header_for_named_object[1]); + context->RestoreGPRSnapshot(&savegplr); + return X_STATUS_SUCCESS; +} dword_result_t ObOpenObjectByName_entry(lpunknown_t obj_attributes_ptr, lpunknown_t object_type_ptr, dword_t unk, lpdword_t handle_ptr) { @@ -91,9 +257,8 @@ dword_result_t ObLookupAnyThreadByThreadId_entry(dword_t thread_id, } DECLARE_XBOXKRNL_EXPORT1(ObLookupAnyThreadByThreadId, kNone, kImplemented); -dword_result_t ObReferenceObjectByHandle_entry(dword_t handle, - dword_t object_type_ptr, - lpdword_t out_object_ptr) { +uint32_t xeObReferenceObjectByHandle(uint32_t handle, uint32_t object_type_ptr, + uint32_t* out_object_ptr) { // chrispy: gotta preinit this to 0, kernel is expected to do that *out_object_ptr = 0; @@ -116,14 +281,26 @@ dword_result_t ObReferenceObjectByHandle_entry(dword_t handle, } // Caller takes the reference. // It's released in ObDereferenceObject. - object->RetainHandle(); + object->Retain(); xenia_assert(native_ptr != 0); - if (out_object_ptr.guest_address()) { + if (out_object_ptr) { *out_object_ptr = native_ptr; } return X_STATUS_SUCCESS; } + +dword_result_t ObReferenceObjectByHandle_entry(dword_t handle, + dword_t object_type_ptr, + lpdword_t out_object_ptr) { + // chrispy: gotta preinit this to 0, kernel is expected to do that + uint32_t output = 0; + auto result = xeObReferenceObjectByHandle(handle, object_type_ptr, &output); + if (out_object_ptr.guest_address()) { + *out_object_ptr = output; + } + return result; +} DECLARE_XBOXKRNL_EXPORT1(ObReferenceObjectByHandle, kNone, kImplemented); dword_result_t ObReferenceObjectByName_entry(pointer_t name, @@ -146,42 +323,61 @@ dword_result_t ObReferenceObjectByName_entry(pointer_t name, } DECLARE_XBOXKRNL_EXPORT1(ObReferenceObjectByName, kNone, kImplemented); -void xeObDereferenceObject(PPCContext* context, uint32_t native_ptr) { - // Check if a dummy value from ObReferenceObjectByHandle. - if (native_ptr == 0xDEADF00D) { - return; - } - if (!native_ptr) { - XELOGE("Null native ptr in ObDereferenceObject!"); - return; - } +void xeObDereferenceObject(PPCContext* context, void* ptr) { + XObject* object = nullptr; + auto header = &reinterpret_cast(ptr)[-1]; - auto object = XObject::GetNativeObject( - kernel_state(), kernel_memory()->TranslateVirtual(native_ptr)); + if (header->object_type_ptr == + context->kernel_state->GetKernelGuestGlobals() + + offsetof32(KernelGuestGlobals, ExThreadObjectType)) { + object = XThread::FromGuest(reinterpret_cast(ptr)); + } else { + object = XObject::GetNativeObject(kernel_state(), ptr).release(); + } if (object) { - object->ReleaseHandle(); + object->Release(); } else { - if (native_ptr) { + if (ptr) { XELOGW("Unregistered guest object provided to ObDereferenceObject {:08X}", - native_ptr); + (uintptr_t)ptr); } } return; } +void xeObDereferenceObject(PPCContext* context, uint32_t native_ptr) { + // Check if a dummy value from ObReferenceObjectByHandle. + if (native_ptr == 0xDEADF00D) { + return; + } + if (!native_ptr) { + XELOGE("Null native ptr in ObDereferenceObject!"); + return; + } + xeObDereferenceObject(context, context->TranslateVirtual(native_ptr)); +} void ObDereferenceObject_entry(dword_t native_ptr, const ppc_context_t& ctx) { xeObDereferenceObject(ctx, native_ptr); } DECLARE_XBOXKRNL_EXPORT1(ObDereferenceObject, kNone, kImplemented); -void ObReferenceObject_entry(dword_t native_ptr) { - // Check if a dummy value from ObReferenceObjectByHandle. - auto object = XObject::GetNativeObject( - kernel_state(), kernel_memory()->TranslateVirtual(native_ptr)); +void ObReferenceObject_entry(dword_t native_ptr, const ppc_context_t& context) { + + XObject* object = nullptr; + auto hdr = context->TranslateVirtual(native_ptr); + auto header = &hdr[-1]; + + if (header->object_type_ptr == + context->kernel_state->GetKernelGuestGlobals() + + offsetof32(KernelGuestGlobals, ExThreadObjectType)) { + object = XThread::FromGuest(reinterpret_cast(hdr)); + } else { + object = XObject::GetNativeObject(kernel_state(), hdr).release(); + } if (object) { - object->RetainHandle(); + object->Retain(); } else { if (native_ptr) { XELOGW("Unregistered guest object provided to ObReferenceObject {:08X}", @@ -253,6 +449,21 @@ uint32_t NtClose(uint32_t handle) { dword_result_t NtClose_entry(dword_t handle) { return NtClose(handle); } DECLARE_XBOXKRNL_EXPORT1(NtClose, kNone, kImplemented); +dword_result_t ObCreateObject_entry( + pointer_t object_factory, + pointer_t optional_attributes, + dword_t object_size_sans_headers, lpdword_t out_object, + const ppc_context_t& context) { + uint32_t out_object_tmp = 0; + + uint32_t result = + xeObCreateObject(object_factory, optional_attributes, + object_size_sans_headers, &out_object_tmp, context); + *out_object = out_object_tmp; + return result; +} +DECLARE_XBOXKRNL_EXPORT1(ObCreateObject, kNone, kImplemented); + } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.h index 7d5afb4d95..c16d7db995 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.h @@ -11,12 +11,24 @@ #define XENIA_KERNEL_XBOXKRNL_XBOXKRNL_OB_H_ #include "xenia/kernel/util/shim_utils.h" - +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { namespace xboxkrnl { -void xeObDereferenceObject(PPCContext* context, uint32_t native_ptr); +uint32_t xeObReferenceObjectByHandle(uint32_t handle, uint32_t object_type_ptr, + uint32_t* out_object_ptr); +void xeObDereferenceObject(PPCContext* context, void* ptr); +void xeObDereferenceObject(PPCContext* context, uint32_t native_ptr); +void xeObSplitName(X_ANSI_STRING input_string, + X_ANSI_STRING* leading_path_component, + X_ANSI_STRING* remaining_path_components, + PPCContext* context); +uint32_t xeObHashObjectName(X_ANSI_STRING* ElementName, PPCContext* context); +uint32_t xeObCreateObject(X_OBJECT_TYPE* object_factory, + X_OBJECT_ATTRIBUTES* optional_attributes, + uint32_t object_size_sans_headers, + uint32_t* out_object, cpu::ppc::PPCContext* context); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.cc index b537eccaf7..d18a4fbe75 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.cc @@ -6,7 +6,6 @@ * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ - #include "xenia/kernel/xboxkrnl/xboxkrnl_rtl.h" #include @@ -18,6 +17,7 @@ #include "xenia/base/logging.h" #include "xenia/base/string.h" #include "xenia/base/threading.h" +#include "xenia/cpu/processor.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" #include "xenia/kernel/util/shim_utils.h" @@ -434,9 +434,11 @@ pointer_result_t RtlImageNtHeader_entry(lpvoid_t module) { } DECLARE_XBOXKRNL_EXPORT1(RtlImageNtHeader, kNone, kImplemented); // https://learn.microsoft.com/en-us/windows/win32/api/dbghelp/nf-dbghelp-imagedirectoryentrytodata -dword_result_t RtlImageDirectoryEntryToData_entry(dword_t Base, dword_t MappedAsImage_, - word_t DirectoryEntry, dword_t Size, - const ppc_context_t& ctx) { +dword_result_t RtlImageDirectoryEntryToData_entry(dword_t Base, + dword_t MappedAsImage_, + word_t DirectoryEntry, + dword_t Size, + const ppc_context_t& ctx) { bool MappedAsImage = static_cast(MappedAsImage_); uint32_t aligned_base = Base; if ((Base & 1) != 0) { @@ -510,39 +512,23 @@ pointer_result_t RtlImageXexHeaderField_entry(pointer_t xex_header, } DECLARE_XBOXKRNL_EXPORT1(RtlImageXexHeaderField, kNone, kImplemented); -// Unfortunately the Windows RTL_CRITICAL_SECTION object is bigger than the one -// on the 360 (32b vs. 28b). This means that we can't do in-place splatting of -// the critical sections. Also, the 360 never calls RtlDeleteCriticalSection -// so we can't clean up the native handles. -// -// Because of this, we reimplement it poorly. Hooray. -// We have 28b to work with so we need to be careful. We map our struct directly -// into guest memory, as it should be opaque and so long as our size is right -// the user code will never know. -// -// Ref: -// https://web.archive.org/web/20161214022602/https://msdn.microsoft.com/en-us/magazine/cc164040.aspx -// Ref: -// https://github.com/reactos/reactos/blob/master/sdk/lib/rtl/critical.c - -// This structure tries to match the one on the 360 as best I can figure out. -// Unfortunately some games have the critical sections pre-initialized in -// their embedded data and InitializeCriticalSection will never be called. -#pragma pack(push, 1) -struct X_RTL_CRITICAL_SECTION { - X_DISPATCH_HEADER header; - int32_t lock_count; // 0x10 -1 -> 0 on first lock - xe::be recursion_count; // 0x14 0 -> 1 on first lock - xe::be owning_thread; // 0x18 PKTHREAD 0 unless locked -}; -#pragma pack(pop) -static_assert_size(X_RTL_CRITICAL_SECTION, 28); +static void AssertValidCriticalSection(X_RTL_CRITICAL_SECTION* cs) { + // must be 4-byte aligned + xenia_assert(!(reinterpret_cast(cs) & 3)); + xenia_assert(cs->header.type == 1); + xenia_assert(cs->header.wait_list.blink_ptr != 0 && + cs->header.wait_list.flink_ptr != 0); + xenia_assert(cs->recursion_count >= 0); + xenia_assert(static_cast(cs->lock_count) > -2); +} void xeRtlInitializeCriticalSection(X_RTL_CRITICAL_SECTION* cs, uint32_t cs_ptr) { cs->header.type = 1; // EventSynchronizationObject (auto reset) cs->header.absolute = 0; // spin count div 256 cs->header.signal_state = 0; + // todo: context should be arg + util::XeInitializeListHead(&cs->header.wait_list, kernel_memory()); cs->lock_count = -1; cs->recursion_count = 0; cs->owning_thread = 0; @@ -566,6 +552,8 @@ X_STATUS xeRtlInitializeCriticalSectionAndSpinCount(X_RTL_CRITICAL_SECTION* cs, cs->header.type = 1; // EventSynchronizationObject (auto reset) cs->header.absolute = spin_count_div_256; cs->header.signal_state = 0; + // todo: context should be arg + util::XeInitializeListHead(&cs->header.wait_list, kernel_memory()); cs->lock_count = -1; cs->recursion_count = 0; cs->owning_thread = 0; @@ -589,62 +577,78 @@ static void CriticalSectionPrefetchW(const void* vp) { #endif } -void RtlEnterCriticalSection_entry(pointer_t cs) { - if (!cs.guest_address()) { +void xeRtlEnterCriticalSection(PPCContext* context, + X_RTL_CRITICAL_SECTION* cs) { + if (!cs) { XELOGE("Null critical section in RtlEnterCriticalSection!"); return; } + AssertValidCriticalSection(cs); CriticalSectionPrefetchW(&cs->lock_count); - uint32_t cur_thread = XThread::GetCurrentThread()->guest_object(); + uint32_t cur_thread = GetKPCR(context)->prcb_data.current_thread; uint32_t spin_count = cs->header.absolute * 256; if (cs->owning_thread == cur_thread) { // We already own the lock. - xe::atomic_inc(&cs->lock_count); + // xe::atomic_inc(&cs->lock_count); + context->processor->GuestAtomicIncrement32(context, &cs->lock_count); cs->recursion_count++; return; } // Spin loop while (spin_count--) { - if (xe::atomic_cas(-1, 0, &cs->lock_count)) { + if (context->processor->GuestAtomicCAS32(context, 0xFFFFFFFFU, 0, + &cs->lock_count)) { + assert_true(cs->owning_thread == 0); // Acquired. cs->owning_thread = cur_thread; cs->recursion_count = 1; return; + } else { + context->CheckInterrupt(); } } - if (xe::atomic_inc(&cs->lock_count) != 0) { + if (context->processor->GuestAtomicIncrement32(context, &cs->lock_count) + + 1 != + 0) { + xenia_assert(cs->owning_thread != cur_thread); // Create a full waiter. - xeKeWaitForSingleObject(reinterpret_cast(cs.host_address()), 8, 0, 0, - nullptr); + xeKeWaitForSingleObject(context, &cs->header, 8, 0, 0, nullptr); } assert_true(cs->owning_thread == 0); cs->owning_thread = cur_thread; cs->recursion_count = 1; } + +void RtlEnterCriticalSection_entry(pointer_t cs, + const ppc_context_t& context) { + return xeRtlEnterCriticalSection(context, cs); +} DECLARE_XBOXKRNL_EXPORT2(RtlEnterCriticalSection, kNone, kImplemented, kHighFrequency); -dword_result_t RtlTryEnterCriticalSection_entry( - pointer_t cs) { - if (!cs.guest_address()) { +uint32_t xeRtlTryEnterCriticalSection(PPCContext* context, + X_RTL_CRITICAL_SECTION* cs) { + if (!cs) { XELOGE("Null critical section in RtlTryEnterCriticalSection!"); return 1; // pretend we got the critical section. } + AssertValidCriticalSection(cs); CriticalSectionPrefetchW(&cs->lock_count); - uint32_t thread = XThread::GetCurrentThread()->guest_object(); + uint32_t thread = GetKPCR(context)->prcb_data.current_thread; - if (xe::atomic_cas(-1, 0, &cs->lock_count)) { + if (context->processor->GuestAtomicCAS32(context, 0xFFFFFFFFU, 0, + &cs->lock_count)) { // Able to steal the lock right away. cs->owning_thread = thread; cs->recursion_count = 1; return 1; } else if (cs->owning_thread == thread) { // Already own the lock. - xe::atomic_inc(&cs->lock_count); + context->processor->GuestAtomicIncrement32(context, &cs->lock_count); ++cs->recursion_count; return 1; } @@ -652,32 +656,44 @@ dword_result_t RtlTryEnterCriticalSection_entry( // Failed to acquire lock. return 0; } + +dword_result_t RtlTryEnterCriticalSection_entry( + pointer_t cs, const ppc_context_t& context) { + return xeRtlTryEnterCriticalSection(context, cs); +} DECLARE_XBOXKRNL_EXPORT2(RtlTryEnterCriticalSection, kNone, kImplemented, kHighFrequency); -void RtlLeaveCriticalSection_entry(pointer_t cs) { - if (!cs.guest_address()) { +void xeRtlLeaveCriticalSection(PPCContext* context, + X_RTL_CRITICAL_SECTION* cs) { + if (!cs) { XELOGE("Null critical section in RtlLeaveCriticalSection!"); return; } - assert_true(cs->owning_thread == XThread::GetCurrentThread()->guest_object()); + AssertValidCriticalSection(cs); + assert_true(cs->owning_thread == GetKPCR(context)->prcb_data.current_thread); // Drop recursion count - if it isn't zero we still have the lock. assert_true(cs->recursion_count > 0); if (--cs->recursion_count != 0) { - assert_true(cs->recursion_count >= 0); - - xe::atomic_dec(&cs->lock_count); + assert_true(cs->recursion_count > 0); + context->processor->GuestAtomicDecrement32(context, &cs->lock_count); return; } // Not owned - unlock! cs->owning_thread = 0; - if (xe::atomic_dec(&cs->lock_count) != -1) { + if ((context->processor->GuestAtomicDecrement32(context, &cs->lock_count) - + 1) != -1) { // There were waiters - wake one of them. - xeKeSetEvent(reinterpret_cast(cs.host_address()), 1, 0); + xeKeSetEvent(context, reinterpret_cast(cs), 1, 0); } } + +void RtlLeaveCriticalSection_entry(pointer_t cs, + const ppc_context_t& context) { + xeRtlLeaveCriticalSection(context, cs); +} DECLARE_XBOXKRNL_EXPORT2(RtlLeaveCriticalSection, kNone, kImplemented, kHighFrequency); @@ -830,7 +846,7 @@ static void RtlRip_entry(const ppc_context_t& ctx) { XELOGE("RtlRip called, arg1 = {}, arg2 = {}\n", msg_str1, msg_str2); - //we should break here... not sure what to do exactly + // we should break here... not sure what to do exactly } DECLARE_XBOXKRNL_EXPORT1(RtlRip, kNone, kImportant); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.h index 755bc5e120..1f2d0847f1 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_rtl.h @@ -10,20 +10,23 @@ #ifndef XENIA_KERNEL_XBOXKRNL_XBOXKRNL_RTL_H_ #define XENIA_KERNEL_XBOXKRNL_XBOXKRNL_RTL_H_ -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { namespace xboxkrnl { -struct X_RTL_CRITICAL_SECTION; - void xeRtlInitializeCriticalSection(X_RTL_CRITICAL_SECTION* cs, uint32_t cs_ptr); X_STATUS xeRtlInitializeCriticalSectionAndSpinCount(X_RTL_CRITICAL_SECTION* cs, uint32_t cs_ptr, uint32_t spin_count); - +void xeRtlEnterCriticalSection(cpu::ppc::PPCContext* context, + X_RTL_CRITICAL_SECTION* cs); +uint32_t xeRtlTryEnterCriticalSection(cpu::ppc::PPCContext* context, + X_RTL_CRITICAL_SECTION* cs); +void xeRtlLeaveCriticalSection(cpu::ppc::PPCContext* context, + X_RTL_CRITICAL_SECTION* cs); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 511f25bd9b..4804516119 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -113,11 +113,11 @@ uint32_t ExCreateThread(xe::be* handle_ptr, uint32_t stack_size, auto kernel_state_var = kernel_state(); // xenia_assert((creation_flags & 2) == 0); // creating system thread? - if (creation_flags & 2) { + if (creation_flags & XE_FLAG_SYSTEM_THREAD) { XELOGE("Guest is creating a system thread!"); } - uint32_t thread_process = (creation_flags & 2) + uint32_t thread_process = (creation_flags & XE_FLAG_SYSTEM_THREAD) ? kernel_state_var->GetSystemProcess() : kernel_state_var->GetTitleProcess(); X_KPROCESS* target_process = @@ -146,8 +146,9 @@ uint32_t ExCreateThread(xe::be* handle_ptr, uint32_t stack_size, if (XSUCCEEDED(result)) { if (handle_ptr) { - if (creation_flags & 0x80) { + if (creation_flags & XE_FLAG_RETURN_KTHREAD_PTR) { *handle_ptr = thread->guest_object(); + thread->Retain(); } else { *handle_ptr = thread->handle(); } @@ -191,7 +192,10 @@ uint32_t NtResumeThread(uint32_t handle, uint32_t* suspend_count_ptr) { if (thread) { if (thread->type() == XObject::Type::Thread) { - result = thread->Resume(&suspend_count); + // result = thread->Resume(&suspend_count); + result = X_STATUS_SUCCESS; + suspend_count = xeKeResumeThread(cpu::ThreadState::GetContext(), + thread->guest_object()); } else { return X_STATUS_OBJECT_TYPE_MISMATCH; } @@ -221,16 +225,9 @@ dword_result_t NtResumeThread_entry(dword_t handle, } DECLARE_XBOXKRNL_EXPORT1(NtResumeThread, kThreading, kImplemented); -dword_result_t KeResumeThread_entry(pointer_t thread_ptr) { - X_STATUS result = X_STATUS_SUCCESS; - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - if (thread) { - result = thread->Resume(); - } else { - result = X_STATUS_INVALID_HANDLE; - } - - return result; +dword_result_t KeResumeThread_entry(pointer_t thread_ptr, + const ppc_context_t& context) { + return xeKeResumeThread(context, thread_ptr); } DECLARE_XBOXKRNL_EXPORT1(KeResumeThread, kThreading, kImplemented); @@ -243,11 +240,12 @@ dword_result_t NtSuspendThread_entry(dword_t handle, auto thread = kernel_state()->object_table()->LookupObject(handle); if (thread) { if (thread->type() == XObject::Type::Thread) { - auto current_pcr = context->TranslateVirtualGPR(context->r[13]); + auto current_pcr = GetKPCR(context); if (current_pcr->prcb_data.current_thread == thread->guest_object() || !thread->guest_object()->terminated) { - result = thread->Suspend(&suspend_count); + suspend_count = + xeKeSuspendThread(context, thread->guest_object()); } else { return X_STATUS_THREAD_IS_TERMINATING; } @@ -268,18 +266,7 @@ DECLARE_XBOXKRNL_EXPORT1(NtSuspendThread, kThreading, kImplemented); dword_result_t KeSuspendThread_entry(pointer_t kthread, const ppc_context_t& context) { - auto thread = - XObject::GetNativeObject(context->kernel_state, kthread); - uint32_t suspend_count_out = 0; - - if (thread) { - suspend_count_out = thread->suspend_count(); - - uint32_t discarded_new_suspend_count = 0; - thread->Suspend(&discarded_new_suspend_count); - } - - return suspend_count_out; + return xeKeSuspendThread(context, kthread); } DECLARE_XBOXKRNL_EXPORT1(KeSuspendThread, kThreading, kImplemented); @@ -290,7 +277,7 @@ void KeSetCurrentStackPointers_entry(lpvoid_t stack_ptr, const ppc_context_t& context) { auto current_thread = XThread::GetCurrentThread(); - auto pcr = context->TranslateVirtualGPR(context->r[13]); + auto pcr = GetKPCR(context); // also supposed to load msr mask, and the current msr with that, and store thread->stack_alloc_base = stack_alloc_base.value(); thread->stack_base = stack_base.value(); @@ -310,8 +297,10 @@ void KeSetCurrentStackPointers_entry(lpvoid_t stack_ptr, DECLARE_XBOXKRNL_EXPORT2(KeSetCurrentStackPointers, kThreading, kImplemented, kHighFrequency); -dword_result_t KeSetAffinityThread_entry(lpvoid_t thread_ptr, dword_t affinity, - lpdword_t previous_affinity_ptr) { +dword_result_t KeSetAffinityThread_entry(pointer_t thread_ptr, + dword_t affinity, + lpdword_t previous_affinity_ptr, + const ppc_context_t& context) { // The Xbox 360, according to disassembly of KeSetAffinityThread, unlike // Windows NT, stores the previous affinity via the pointer provided as an // argument, not in the return value - the return value is used for the @@ -319,66 +308,49 @@ dword_result_t KeSetAffinityThread_entry(lpvoid_t thread_ptr, dword_t affinity, if (!affinity) { return X_STATUS_INVALID_PARAMETER; } - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - if (thread) { - if (previous_affinity_ptr) { - *previous_affinity_ptr = uint32_t(1) << thread->active_cpu(); - } - thread->SetAffinity(affinity); - } + uint32_t prev_affinity = 0; + xeKeSetAffinityThread(context, thread_ptr, affinity, &prev_affinity); + *previous_affinity_ptr = prev_affinity; return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented); -dword_result_t KeQueryBasePriorityThread_entry(lpvoid_t thread_ptr) { - int32_t priority = 0; - - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - if (thread) { - priority = thread->QueryPriority(); - } - - return priority; +dword_result_t KeQueryBasePriorityThread_entry(pointer_t thread_ptr, + const ppc_context_t& context) { + return xeKeQueryBasePriorityThread(context, thread_ptr); } DECLARE_XBOXKRNL_EXPORT1(KeQueryBasePriorityThread, kThreading, kImplemented); -dword_result_t KeSetBasePriorityThread_entry(lpvoid_t thread_ptr, - dword_t increment) { - int32_t prev_priority = 0; - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - - if (thread) { - prev_priority = thread->QueryPriority(); - thread->SetPriority(increment); - } - - return prev_priority; +dword_result_t KeSetBasePriorityThread_entry(pointer_t thread_ptr, + dword_t increment, + const ppc_context_t& context) { + return xeKeSetBasePriorityThread(context, thread_ptr, increment); } DECLARE_XBOXKRNL_EXPORT1(KeSetBasePriorityThread, kThreading, kImplemented); dword_result_t KeSetDisableBoostThread_entry(pointer_t thread_ptr, - dword_t disabled) { - // supposed to acquire dispatcher lock + a prcb lock, all just to exchange - // this char there is no other special behavior going on in this function, - // just acquiring locks to do this exchange - auto old_boost_disabled = - reinterpret_cast(&thread_ptr->boost_disabled) - ->exchange(static_cast(disabled)); - - return old_boost_disabled; + dword_t disabled, + const ppc_context_t& context) { + return xeKeSetDisableBoostThread(context, thread_ptr, disabled); } DECLARE_XBOXKRNL_EXPORT1(KeSetDisableBoostThread, kThreading, kImplemented); +dword_result_t KeSetPriorityThread_entry(pointer_t thread_ptr, + dword_t priority, + const ppc_context_t& context) { + return xeKeSetPriorityThread(context, thread_ptr, priority); +} + +DECLARE_XBOXKRNL_EXPORT1(KeSetPriorityThread, kThreading, kImplemented); + uint32_t xeKeGetCurrentProcessType(cpu::ppc::PPCContext* context) { - auto pcr = context->TranslateVirtualGPR(context->r[13]); + auto pcr = GetKPCR(context); - if (!pcr->prcb_data.dpc_active) - return context->TranslateVirtual(pcr->prcb_data.current_thread) - ->process_type; + if (!pcr->prcb_data.dpc_active) return GetKThread(context, pcr)->process_type; return pcr->processtype_value_in_dpc; } void xeKeSetCurrentProcessType(uint32_t type, cpu::ppc::PPCContext* context) { - auto pcr = context->TranslateVirtualGPR(context->r[13]); + auto pcr = GetKPCR(context); if (pcr->prcb_data.dpc_active) { pcr->processtype_value_in_dpc = type; } @@ -395,169 +367,126 @@ void KeSetCurrentProcessType_entry(dword_t type, const ppc_context_t& context) { } DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentProcessType, kThreading, kImplemented); -dword_result_t KeQueryPerformanceFrequency_entry() { - uint64_t result = Clock::guest_tick_frequency(); - return static_cast(result); -} +dword_result_t KeQueryPerformanceFrequency_entry() { return 50000000ULL; } DECLARE_XBOXKRNL_EXPORT2(KeQueryPerformanceFrequency, kThreading, kImplemented, kHighFrequency); uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable, uint64_t* interval_ptr, cpu::ppc::PPCContext* ctx) { - XThread* thread = XThread::GetCurrentThread(); - - if (alertable) { - X_STATUS stat = xeProcessUserApcs(ctx); - if (stat == X_STATUS_USER_APC) { - return stat; - } - } - X_STATUS result = thread->Delay(processor_mode, alertable, *interval_ptr); - - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(ctx); - if (result == X_STATUS_USER_APC) { - return result; - } - } - - return result; + return xeKeDelayExecutionThread(ctx, processor_mode, alertable, + (int64_t*)interval_ptr); } dword_result_t KeDelayExecutionThread_entry(dword_t processor_mode, dword_t alertable, lpqword_t interval_ptr, const ppc_context_t& context) { - uint64_t interval = interval_ptr ? static_cast(*interval_ptr) : 0u; - return KeDelayExecutionThread(processor_mode, alertable, - interval_ptr ? &interval : nullptr, context); + uint64_t interval = *interval_ptr; + return KeDelayExecutionThread(processor_mode, alertable, &interval, context); } DECLARE_XBOXKRNL_EXPORT3(KeDelayExecutionThread, kThreading, kImplemented, kBlocking, kHighFrequency); -dword_result_t NtYieldExecution_entry() { - auto thread = XThread::GetCurrentThread(); - thread->Delay(0, 0, 0); - return 0; +dword_result_t NtYieldExecution_entry(const ppc_context_t& context) { + return xeNtYieldExecution(context); } DECLARE_XBOXKRNL_EXPORT2(NtYieldExecution, kThreading, kImplemented, kHighFrequency); void KeQuerySystemTime_entry(lpqword_t time_ptr, const ppc_context_t& ctx) { - if (time_ptr) { - // update the timestamp bundle to the time we queried. - // this is a race, but i don't of any sw that requires it, it just seems - // like we ought to keep it consistent with ketimestampbundle in case - // something uses this function, but also reads it directly - uint32_t ts_bundle = ctx->kernel_state->GetKeTimestampBundle(); - uint64_t time = Clock::QueryGuestSystemTime(); - // todo: cmpxchg? - xe::store_and_swap( - &ctx->TranslateVirtual(ts_bundle)->system_time, - time); - *time_ptr = time; - } + uint32_t ts_bundle = ctx->kernel_state->GetKeTimestampBundle(); + + *time_ptr = + ctx->TranslateVirtual(ts_bundle)->system_time; } DECLARE_XBOXKRNL_EXPORT1(KeQuerySystemTime, kThreading, kImplemented); +static void do_tls_asserts(PPCContext* context) { + auto kpcr = GetKPCR(context); + auto kthread = GetKThread(context, kpcr); + + xenia_assert(kpcr->tls_ptr == kthread->tls_address); + xenia_assert(kpcr->tls_ptr != 0); +} + // https://msdn.microsoft.com/en-us/library/ms686801 -dword_result_t KeTlsAlloc_entry() { - uint32_t slot = kernel_state()->AllocateTLS(); - XThread::GetCurrentThread()->SetTLSValue(slot, 0); +dword_result_t KeTlsAlloc_entry(const ppc_context_t& context) { + do_tls_asserts(context); + uint32_t slot = kernel_state()->AllocateTLS(context); return slot; } DECLARE_XBOXKRNL_EXPORT1(KeTlsAlloc, kThreading, kImplemented); // https://msdn.microsoft.com/en-us/library/ms686804 -dword_result_t KeTlsFree_entry(dword_t tls_index) { +dword_result_t KeTlsFree_entry(dword_t tls_index, + const ppc_context_t& context) { + do_tls_asserts(context); if (tls_index == X_TLS_OUT_OF_INDEXES) { return 0; } - kernel_state()->FreeTLS(tls_index); + kernel_state()->FreeTLS(context, tls_index); return 1; } DECLARE_XBOXKRNL_EXPORT1(KeTlsFree, kThreading, kImplemented); // https://msdn.microsoft.com/en-us/library/ms686812 -dword_result_t KeTlsGetValue_entry(dword_t tls_index) { +dword_result_t KeTlsGetValue_entry(dword_t tls_index, + const ppc_context_t& context) { + do_tls_asserts(context); + context->AssertCurrent(); // xboxkrnl doesn't actually have an error branch - it always succeeds, even // if it overflows the TLS. - uint32_t value = 0; - if (XThread::GetCurrentThread()->GetTLSValue(tls_index, &value)) { - return value; - } - - return 0; + return static_cast( + *(context->TranslateVirtualBE(GetKPCR(context)->tls_ptr) - + static_cast(tls_index) - 1)); } DECLARE_XBOXKRNL_EXPORT2(KeTlsGetValue, kThreading, kImplemented, kHighFrequency); // https://msdn.microsoft.com/en-us/library/ms686818 -dword_result_t KeTlsSetValue_entry(dword_t tls_index, dword_t tls_value) { +dword_result_t KeTlsSetValue_entry(dword_t tls_index, dword_t tls_value, + const ppc_context_t& context) { + do_tls_asserts(context); + context->AssertCurrent(); // xboxkrnl doesn't actually have an error branch - it always succeeds, even // if it overflows the TLS. - if (XThread::GetCurrentThread()->SetTLSValue(tls_index, tls_value)) { - return 1; - } - - return 0; + *(context->TranslateVirtualBE(GetKPCR(context)->tls_ptr) - + tls_index - 1) = (uint32_t)tls_value; + return 1; } DECLARE_XBOXKRNL_EXPORT1(KeTlsSetValue, kThreading, kImplemented); void KeInitializeEvent_entry(pointer_t event_ptr, dword_t event_type, - dword_t initial_state) { + dword_t initial_state, + const ppc_context_t& context) { event_ptr.Zero(); event_ptr->header.type = event_type; event_ptr->header.signal_state = (uint32_t)initial_state; - auto ev = - XObject::GetNativeObject(kernel_state(), event_ptr, event_type); - if (!ev) { - assert_always(); - return; - } + util::XeInitializeListHead(&event_ptr->header.wait_list, context); } DECLARE_XBOXKRNL_EXPORT1(KeInitializeEvent, kThreading, kImplemented); -uint32_t xeKeSetEvent(X_KEVENT* event_ptr, uint32_t increment, uint32_t wait) { - auto ev = XObject::GetNativeObject(kernel_state(), event_ptr); - if (!ev) { - assert_always(); - return 0; - } - - return ev->Set(increment, !!wait); -} - dword_result_t KeSetEvent_entry(pointer_t event_ptr, - dword_t increment, dword_t wait) { - return xeKeSetEvent(event_ptr, increment, wait); + dword_t increment, dword_t wait, + const ppc_context_t& context) { + return xeKeSetEvent(context, event_ptr, increment, wait); } DECLARE_XBOXKRNL_EXPORT2(KeSetEvent, kThreading, kImplemented, kHighFrequency); dword_result_t KePulseEvent_entry(pointer_t event_ptr, - dword_t increment, dword_t wait) { - auto ev = XObject::GetNativeObject(kernel_state(), event_ptr); - if (!ev) { - assert_always(); - return 0; - } - - return ev->Pulse(increment, !!wait); + dword_t increment, dword_t wait, + const ppc_context_t& context) { + return xeKePulseEvent(context, event_ptr, increment, wait); } DECLARE_XBOXKRNL_EXPORT2(KePulseEvent, kThreading, kImplemented, kHighFrequency); -dword_result_t KeResetEvent_entry(pointer_t event_ptr) { - auto ev = XObject::GetNativeObject(kernel_state(), event_ptr); - if (!ev) { - assert_always(); - return 0; - } - - return ev->Reset(); +dword_result_t KeResetEvent_entry(pointer_t event_ptr, + const ppc_context_t& context) { + return xeKeResetEvent(context, event_ptr); } DECLARE_XBOXKRNL_EXPORT1(KeResetEvent, kThreading, kImplemented); @@ -580,7 +509,7 @@ dword_result_t NtCreateEvent_entry( } auto ev = object_ref(new XEvent(kernel_state())); - ev->Initialize(!event_type, !!initial_state); + ev->Initialize(event_type == 0, !!initial_state); // obj_attributes may have a name inside of it, if != NULL. if (obj_attributes_ptr) { @@ -603,7 +532,8 @@ uint32_t xeNtSetEvent(uint32_t handle, xe::be* previous_state_ptr) { if (ev->type() != XObject::Type::Event) { return X_STATUS_OBJECT_TYPE_MISMATCH; } - int32_t was_signalled = ev->Set(0, false); + int32_t was_signalled = xeKeSetEvent(cpu::ThreadState::GetContext(), + ev->guest_object(), 1, 0); if (previous_state_ptr) { *previous_state_ptr = static_cast(was_signalled); } @@ -619,40 +549,43 @@ dword_result_t NtSetEvent_entry(dword_t handle, lpdword_t previous_state_ptr) { } DECLARE_XBOXKRNL_EXPORT2(NtSetEvent, kThreading, kImplemented, kHighFrequency); -dword_result_t NtPulseEvent_entry(dword_t handle, - lpdword_t previous_state_ptr) { - X_STATUS result = X_STATUS_SUCCESS; - +dword_result_t NtPulseEvent_entry(dword_t handle, lpdword_t previous_state_ptr, + const ppc_context_t& context) { auto ev = kernel_state()->object_table()->LookupObject(handle); - if (ev) { - int32_t was_signalled = ev->Pulse(0, false); - if (previous_state_ptr) { - *previous_state_ptr = static_cast(was_signalled); - } - } else { - result = X_STATUS_INVALID_HANDLE; + auto prev_state = xeKePulseEvent(context, ev->guest_object(), 1, 0); + if (previous_state_ptr) { + *previous_state_ptr = prev_state; } - return result; + return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT2(NtPulseEvent, kThreading, kImplemented, kHighFrequency); -dword_result_t NtQueryEvent_entry(dword_t handle, lpdword_t out_struc) { +dword_result_t NtQueryEvent_entry(dword_t handle, + pointer_t out_struc, + const ppc_context_t& context) { X_STATUS result = X_STATUS_SUCCESS; - auto ev = kernel_state()->object_table()->LookupObject(handle); - if (ev) { - uint32_t type_tmp, state_tmp; - - ev->Query(&type_tmp, &state_tmp); + xenia_assert(false); + auto kernel = context->kernel_state; + uint32_t object_ptr = 0; + result = kernel->ReferenceObjectByHandle( + context, handle, + kernel->GetKernelGuestGlobals() + + offsetof(KernelGuestGlobals, ExEventObjectType), + &object_ptr); - out_struc[0] = type_tmp; - out_struc[1] = state_tmp; - } else { - result = X_STATUS_INVALID_HANDLE; + if (result < X_STATUS_SUCCESS) { + return result; } + X_KEVENT* event = context->TranslateVirtual(object_ptr); + int32_t local_signalstate = event->header.signal_state; + auto local_type = event->header.type; + kernel->DereferenceObject(context, object_ptr); + out_struc->type = local_type; + out_struc->signal_state = local_signalstate; - return result; + return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT2(NtQueryEvent, kThreading, kImplemented, kHighFrequency); @@ -661,12 +594,12 @@ uint32_t xeNtClearEvent(uint32_t handle) { auto ev = kernel_state()->object_table()->LookupObject(handle); if (ev) { - ev->Reset(); + ev->guest_object()->header.signal_state = 0; + + return result; } else { - result = X_STATUS_INVALID_HANDLE; + return X_STATUS_INVALID_HANDLE; } - - return result; } dword_result_t NtClearEvent_entry(dword_t handle) { @@ -675,35 +608,25 @@ dword_result_t NtClearEvent_entry(dword_t handle) { DECLARE_XBOXKRNL_EXPORT2(NtClearEvent, kThreading, kImplemented, kHighFrequency); +void xeKeInitializeSemaphore(X_KSEMAPHORE* semaphore, int count, int limit) { + semaphore->header.type = 5; // SemaphoreObject + semaphore->header.signal_state = (uint32_t)count; + semaphore->limit = (uint32_t)limit; + util::XeInitializeListHead( + + &semaphore->header.wait_list, kernel_memory()); +} // https://msdn.microsoft.com/en-us/library/windows/hardware/ff552150(v=vs.85).aspx void KeInitializeSemaphore_entry(pointer_t semaphore_ptr, dword_t count, dword_t limit) { - semaphore_ptr->header.type = 5; // SemaphoreObject - semaphore_ptr->header.signal_state = (uint32_t)count; - semaphore_ptr->limit = (uint32_t)limit; - - auto sem = XObject::GetNativeObject(kernel_state(), semaphore_ptr, - 5 /* SemaphoreObject */); - if (!sem) { - assert_always(); - return; - } + xeKeInitializeSemaphore(semaphore_ptr, count, limit); } DECLARE_XBOXKRNL_EXPORT1(KeInitializeSemaphore, kThreading, kImplemented); uint32_t xeKeReleaseSemaphore(X_KSEMAPHORE* semaphore_ptr, uint32_t increment, uint32_t adjustment, uint32_t wait) { - auto sem = - XObject::GetNativeObject(kernel_state(), semaphore_ptr); - if (!sem) { - assert_always(); - return 0; - } - - // TODO(benvanik): increment thread priority? - // TODO(benvanik): wait? - - return sem->ReleaseSemaphore(adjustment); + return xeKeReleaseSemaphore(cpu::ThreadState::GetContext(), semaphore_ptr, + increment, adjustment, wait); } dword_result_t KeReleaseSemaphore_entry(pointer_t semaphore_ptr, @@ -755,14 +678,20 @@ DECLARE_XBOXKRNL_EXPORT1(NtCreateSemaphore, kThreading, kImplemented); dword_result_t NtReleaseSemaphore_entry(dword_t sem_handle, dword_t release_count, - lpdword_t previous_count_ptr) { + lpdword_t previous_count_ptr, + const ppc_context_t& context) { X_STATUS result = X_STATUS_SUCCESS; int32_t previous_count = 0; auto sem = kernel_state()->object_table()->LookupObject(sem_handle); if (sem) { - previous_count = sem->ReleaseSemaphore((int32_t)release_count); + previous_count = xeKeReleaseSemaphore( + context, sem->guest_object(), 1, release_count, 0); + X_STATUS caught_status = context->CatchStatus(); + if (caught_status) { + return caught_status; + } } else { result = X_STATUS_INVALID_HANDLE; } @@ -794,7 +723,7 @@ dword_result_t NtCreateMutant_entry( } auto mutant = object_ref(new XMutant(kernel_state())); - mutant->Initialize(initial_owner ? true : false); + mutant->Initialize(initial_owner ? true : false, obj_attributes); // obj_attributes may have a name inside of it, if != NULL. if (obj_attributes) { @@ -809,32 +738,122 @@ dword_result_t NtCreateMutant_entry( } DECLARE_XBOXKRNL_EXPORT1(NtCreateMutant, kThreading, kImplemented); -dword_result_t NtReleaseMutant_entry(dword_t mutant_handle, dword_t unknown) { - // This doesn't seem to be supported. - // int32_t previous_count_ptr = SHIM_GET_ARG_32(2); +dword_result_t NtReleaseMutant_entry(dword_t mutant_handle, + lpdword_t previous_count, + const ppc_context_t& context) { + auto kernel = context->kernel_state; + uint32_t object = 0; + uint32_t ref_res = kernel->ReferenceObjectByHandle( + context, mutant_handle, + kernel->GetKernelGuestGlobals() + + offsetof(KernelGuestGlobals, ExMutantObjectType), + &object); + if ((int)ref_res < 0) { + return ref_res; + } + + X_KMUTANT* mutant = context->TranslateVirtual(object); - // Whatever arg 1 is all games seem to set it to 0, so whether it's - // abandon or wait we just say false. Which is good, cause they are - // both ignored. - assert_zero(unknown); - uint32_t priority_increment = 0; - bool abandon = false; - bool wait = false; + auto prev_count = xeKeReleaseMutant(context, mutant, 1, 0, 0); - X_STATUS result = X_STATUS_SUCCESS; + X_STATUS caught_status = context->CatchStatus(); + if (!caught_status) { + kernel->DereferenceObject(context, object); - auto mutant = - kernel_state()->object_table()->LookupObject(mutant_handle); - if (mutant) { - mutant->ReleaseMutant(priority_increment, abandon, wait); + if (previous_count) { + *previous_count = prev_count; + } + return 0; } else { - result = X_STATUS_INVALID_HANDLE; + kernel->DereferenceObject(context, object); + return caught_status; } - - return result; } DECLARE_XBOXKRNL_EXPORT1(NtReleaseMutant, kThreading, kImplemented); +dword_result_t KeReleaseMutant_entry(pointer_t mutant, + dword_t increment, dword_t abandoned, + dword_t wait, + const ppc_context_t& context) { + return xeKeReleaseMutant(context, mutant, increment, abandoned & 0xff, + wait & 0xff); +} + +DECLARE_XBOXKRNL_EXPORT1(KeReleaseMutant, kThreading, kImplemented); +void xeKeInitializeMutant(X_KMUTANT* mutant, bool initially_owned, + xe::cpu::ppc::PPCContext* context) { + mutant->header.type = 2; + + if (initially_owned) { + auto kpcr = GetKPCR(context); + + auto v4 = GetKThread(context, kpcr); + mutant->header.signal_state = 0; + mutant->owner = context->HostToGuestVirtual(v4); + auto old_irql = kernel_state()->LockDispatcher(context); + + util::XeInsertHeadList(v4->mutants_list.blink_ptr, &mutant->unk_list, + context); + + xboxkrnl::xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + } else { + mutant->owner = 0U; + mutant->header.signal_state = 1; + } + util::XeInitializeListHead(&mutant->header.wait_list, context); + mutant->abandoned = 0; +} +void KeInitializeMutant_entry(pointer_t mutant, + dword_t initially_owned, + const ppc_context_t& context) { + xeKeInitializeMutant(mutant, static_cast(initially_owned), + context); +} + +DECLARE_XBOXKRNL_EXPORT1(KeInitializeMutant, kThreading, kImplemented); + +void xeKeInitializeTimerEx(X_KTIMER* timer, uint32_t type, uint32_t proctype, + PPCContext* context) { + xenia_assert(proctype < 3); + xenia_assert(type == 0 || type == 1); + // other fields are unmodified, they must carry through multiple calls of + // initialize + timer->header.process_type = proctype; + timer->header.inserted = 0; + timer->header.type = type + 8; + timer->header.signal_state = 0; + util::XeInitializeListHead(&timer->header.wait_list, context); + timer->due_time = 0; + timer->period = 0; +} + +void KeInitializeTimerEx_entry(pointer_t timer, dword_t type, + dword_t proctype, const ppc_context_t& context) { + xeKeInitializeTimerEx(timer, type, proctype & 0xFF, context); +} +DECLARE_XBOXKRNL_EXPORT1(KeInitializeTimerEx, kThreading, kImplemented); + +dword_result_t KeSetTimerEx_entry(pointer_t timer, qword_t due_time, + dword_t period, pointer_t dpc, + const ppc_context_t& context) { + return xeKeSetTimerEx(context, timer, due_time, period, dpc); +} +DECLARE_XBOXKRNL_EXPORT1(KeSetTimerEx, kThreading, kImplemented); + +dword_result_t KeSetTimer_entry(pointer_t timer, qword_t due_time, + pointer_t dpc, + const ppc_context_t& context) { + return xeKeSetTimerEx(context, timer, due_time, 0, dpc); +} +DECLARE_XBOXKRNL_EXPORT1(KeSetTimer, kThreading, kImplemented); + +dword_result_t KeCancelTimer_entry(pointer_t timer, + const ppc_context_t& context) { + return xeKeCancelTimer(context, timer); +} +DECLARE_XBOXKRNL_EXPORT1(KeCancelTimer, kThreading, kImplemented); + dword_result_t NtCreateTimer_entry(lpdword_t handle_ptr, lpvoid_t obj_attributes_ptr, dword_t timer_type) { @@ -873,12 +892,10 @@ DECLARE_XBOXKRNL_EXPORT1(NtCreateTimer, kThreading, kImplemented); dword_result_t NtSetTimerEx_entry(dword_t timer_handle, lpqword_t due_time_ptr, lpvoid_t routine_ptr /*PTIMERAPCROUTINE*/, - dword_t unk_one, lpvoid_t routine_arg, + dword_t mode, lpvoid_t routine_arg, dword_t resume, dword_t period_ms, - dword_t unk_zero) { - assert_true(unk_one == 1); - assert_true(unk_zero == 0); - + lpdword_t unk_zero, + const ppc_context_t& context) { uint64_t due_time = *due_time_ptr; X_STATUS result = X_STATUS_SUCCESS; @@ -886,9 +903,12 @@ dword_result_t NtSetTimerEx_entry(dword_t timer_handle, lpqword_t due_time_ptr, auto timer = kernel_state()->object_table()->LookupObject(timer_handle); if (timer) { - result = - timer->SetTimer(due_time, period_ms, routine_ptr.guest_address(), - routine_arg.guest_address(), resume ? true : false); + int prev_state = + xeKeSetExTimer(context, timer->guest_object(), due_time, + routine_ptr, routine_arg, period_ms, mode); + if (unk_zero) { + *unk_zero = prev_state; + } } else { result = X_STATUS_INVALID_HANDLE; } @@ -898,53 +918,37 @@ dword_result_t NtSetTimerEx_entry(dword_t timer_handle, lpqword_t due_time_ptr, DECLARE_XBOXKRNL_EXPORT1(NtSetTimerEx, kThreading, kImplemented); dword_result_t NtCancelTimer_entry(dword_t timer_handle, - lpdword_t current_state_ptr) { + lpdword_t current_state_ptr, + const ppc_context_t& context) { X_STATUS result = X_STATUS_SUCCESS; auto timer = kernel_state()->object_table()->LookupObject(timer_handle); if (timer) { - result = timer->Cancel(); + result = X_STATUS_SUCCESS; + auto current_state = + xeKeCancelExTimer(context, timer->guest_object()); + if (current_state_ptr) { + *current_state_ptr = current_state; + } } else { result = X_STATUS_INVALID_HANDLE; } - if (current_state_ptr) { - *current_state_ptr = 0; - } return result; } DECLARE_XBOXKRNL_EXPORT1(NtCancelTimer, kThreading, kImplemented); -uint32_t xeKeWaitForSingleObject(void* object_ptr, uint32_t wait_reason, - uint32_t processor_mode, uint32_t alertable, - uint64_t* timeout_ptr) { - auto object = XObject::GetNativeObject(kernel_state(), object_ptr); - - if (!object) { - // The only kind-of failure code (though this should never happen) - assert_always(); - return X_STATUS_ABANDONED_WAIT_0; - } - - X_STATUS result = - object->Wait(wait_reason, processor_mode, alertable, timeout_ptr); - if (alertable) { - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(nullptr); - } - } - return result; -} - dword_result_t KeWaitForSingleObject_entry(lpvoid_t object_ptr, dword_t wait_reason, dword_t processor_mode, dword_t alertable, lpqword_t timeout_ptr) { - uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; - return xeKeWaitForSingleObject(object_ptr, wait_reason, processor_mode, - alertable, timeout_ptr ? &timeout : nullptr); + int64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; + return xeKeWaitForSingleObject(cpu::ThreadState::GetContext(), + (X_DISPATCH_HEADER*)object_ptr.host_address(), + wait_reason, processor_mode, alertable, + timeout_ptr ? &timeout : nullptr); } DECLARE_XBOXKRNL_EXPORT3(KeWaitForSingleObject, kThreading, kImplemented, kBlocking, kHighFrequency); @@ -957,13 +961,12 @@ uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode, kernel_state()->object_table()->LookupObject(object_handle); if (object) { uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; - result = - object->Wait(3, wait_mode, alertable, timeout_ptr ? &timeout : nullptr); - if (alertable) { - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(nullptr); - } - } + + return xeKeWaitForSingleObjectEx(cpu::ThreadState::GetContext(), + object->guest_object(), + wait_mode, alertable, + (int64_t*)timeout_ptr); + } else { result = X_STATUS_INVALID_HANDLE; } @@ -985,142 +988,119 @@ DECLARE_XBOXKRNL_EXPORT3(NtWaitForSingleObjectEx, kThreading, kImplemented, dword_result_t KeWaitForMultipleObjects_entry( dword_t count, lpdword_t objects_ptr, dword_t wait_type, dword_t wait_reason, dword_t processor_mode, dword_t alertable, - lpqword_t timeout_ptr, lpvoid_t wait_block_array_ptr) { - assert_true(wait_type <= 1); - - assert_true(count <= 64); - object_ref objects[64]; - { - auto crit = global_critical_region::AcquireDirect(); - for (uint32_t n = 0; n < count; n++) { - auto object_ptr = kernel_memory()->TranslateVirtual(objects_ptr[n]); - auto object_ref = XObject::GetNativeObject(kernel_state(), - object_ptr, -1, true); - if (!object_ref) { - return X_STATUS_INVALID_PARAMETER; - } + lpqword_t timeout_ptr, pointer_t wait_block_array_ptr, + const ppc_context_t& context) { + X_DISPATCH_HEADER* objects_tmp[64]; - objects[n] = std::move(object_ref); - } - } - uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; - X_STATUS result = XObject::WaitMultiple( - uint32_t(count), reinterpret_cast(&objects[0]), wait_type, - wait_reason, processor_mode, alertable, timeout_ptr ? &timeout : nullptr); - if (alertable) { - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(nullptr); - } + for (unsigned i = 0; i < count; ++i) { + objects_tmp[i] = + context->TranslateVirtual(objects_ptr[i]); } - return result; + + int64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; + return xeKeWaitForMultipleObjects( + context, count, objects_tmp, wait_type, wait_reason, processor_mode, + alertable, timeout_ptr ? &timeout : nullptr, wait_block_array_ptr); } DECLARE_XBOXKRNL_EXPORT3(KeWaitForMultipleObjects, kThreading, kImplemented, kBlocking, kHighFrequency); uint32_t xeNtWaitForMultipleObjectsEx(uint32_t count, xe::be* handles, uint32_t wait_type, uint32_t wait_mode, - uint32_t alertable, - uint64_t* timeout_ptr) { - assert_true(wait_type <= 1); - - assert_true(count <= 64); + uint32_t alertable, uint64_t* timeout_ptr, + cpu::ppc::PPCContext* context) { + X_DISPATCH_HEADER* objects_tmp[64]; object_ref objects[64]; - - /* - Reserving to squash the constant reallocations, in a benchmark of one - particular game over a period of five minutes roughly 11% of CPU time was - spent inside a helper function to Windows' heap allocation function. 7% of - that time was traced back to here - - edit: actually switched to fixed size array, as there can never be more - than 64 events specified - */ - { - auto crit = global_critical_region::AcquireDirect(); - for (uint32_t n = 0; n < count; n++) { - uint32_t object_handle = handles[n]; - auto object = kernel_state()->object_table()->LookupObject( - object_handle, true); - if (!object) { - return X_STATUS_INVALID_PARAMETER; - } - objects[n] = std::move(object); + for (uint32_t n = 0; n < count; n++) { + uint32_t object_handle = handles[n]; + auto object = kernel_state()->object_table()->LookupObject( + object_handle, true); + if (!object) { + return X_STATUS_INVALID_PARAMETER; } + objects[n] = std::move(object); } - - auto result = - XObject::WaitMultiple(count, reinterpret_cast(&objects[0]), - wait_type, 6, wait_mode, alertable, timeout_ptr); - if (alertable) { - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(nullptr); - } + for (uint32_t n = 0; n < count; ++n) { + objects_tmp[n] = + xeObGetWaitableObject(context, objects[n]->guest_object()); } + + // return 0; + int64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; + // im not sure wait reason is right + auto result = xeKeWaitForMultipleObjects( + context, count, objects_tmp, wait_type, 3, wait_mode, alertable, + timeout_ptr ? &timeout : nullptr, + &GetKThread(context)->scratch_waitblock_memory[0]); + return result; } dword_result_t NtWaitForMultipleObjectsEx_entry( dword_t count, lpdword_t handles, dword_t wait_type, dword_t wait_mode, - dword_t alertable, lpqword_t timeout_ptr) { + dword_t alertable, lpqword_t timeout_ptr, const ppc_context_t& context) { uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; if (!count || count > 64 || (wait_type != 1 && wait_type)) { return X_STATUS_INVALID_PARAMETER; } - return xeNtWaitForMultipleObjectsEx(count, handles, wait_type, wait_mode, - alertable, - timeout_ptr ? &timeout : nullptr); + return xeNtWaitForMultipleObjectsEx( + count, handles, wait_type, wait_mode, alertable, + timeout_ptr ? &timeout : nullptr, context); } DECLARE_XBOXKRNL_EXPORT3(NtWaitForMultipleObjectsEx, kThreading, kImplemented, kBlocking, kHighFrequency); -dword_result_t NtSignalAndWaitForSingleObjectEx_entry(dword_t signal_handle, - dword_t wait_handle, - dword_t alertable, - dword_t r6, - lpqword_t timeout_ptr) { +dword_result_t NtSignalAndWaitForSingleObjectEx_entry( + dword_t signal_handle, dword_t wait_handle, dword_t mode, dword_t alertable, + lpqword_t timeout_ptr, const ppc_context_t& context) { X_STATUS result = X_STATUS_SUCCESS; - // pre-lock for these two handle lookups - global_critical_region::mutex().lock(); auto signal_object = kernel_state()->object_table()->LookupObject( signal_handle, true); auto wait_object = kernel_state()->object_table()->LookupObject(wait_handle, true); - global_critical_region::mutex().unlock(); if (signal_object && wait_object) { - uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; - result = - XObject::SignalAndWait(signal_object.get(), wait_object.get(), 3, 1, - alertable, timeout_ptr ? &timeout : nullptr); + int64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; + + result = xeKeSignalAndWaitForSingleObjectEx( + context, signal_object->guest_object(), + wait_object->guest_object(), mode, alertable, + timeout_ptr ? &timeout : nullptr); + } else { result = X_STATUS_INVALID_HANDLE; } - - if (alertable) { - if (result == X_STATUS_USER_APC) { - result = xeProcessUserApcs(nullptr); - } - } return result; } DECLARE_XBOXKRNL_EXPORT3(NtSignalAndWaitForSingleObjectEx, kThreading, kImplemented, kBlocking, kHighFrequency); static void PrefetchForCAS(const void* value) { swcache::PrefetchW(value); } - +XE_COMPARISON_NOINLINE uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, bool change_irql) { - auto old_irql = change_irql ? xeKfRaiseIrql(ctx, 2) : 0; - PrefetchForCAS(lock); - assert_true(lock->prcb_of_owner != static_cast(ctx->r[13])); - // Lock. - while (!xe::atomic_cas(0, xe::byte_swap(static_cast(ctx->r[13])), - &lock->prcb_of_owner.value)) { - // Spin! - // TODO(benvanik): error on deadlock? - xe::threading::MaybeYield(); + auto old_irql = 0; + + if (change_irql) { + auto kpcr = GetKPCR(ctx); + old_irql = kpcr->current_irql; + kpcr->current_irql = IRQL_DISPATCH; + xenia_assert(old_irql <= IRQL_DISPATCH); + } + if (lock->pcr_of_owner == static_cast(ctx->r[13])) { + // lock is already held! + xenia_assert(false); + return old_irql; } + // Lock. + auto processor = ctx->processor; + + while (!processor->GuestAtomicCAS32( + ctx, 0, static_cast(ctx->r[13]), + ctx->HostToGuestVirtual(&lock->pcr_of_owner.value))) { + ctx->CheckInterrupt(); + } return old_irql; } @@ -1131,21 +1111,37 @@ dword_result_t KfAcquireSpinLock_entry(pointer_t lock_ptr, } DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking, kHighFrequency); - +XE_COMPARISON_NOINLINE void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, bool change_irql) { - assert_true(lock->prcb_of_owner == static_cast(ctx->r[13])); + assert_true(lock->pcr_of_owner == static_cast(ctx->r[13])); // Unlock. - lock->prcb_of_owner.value = 0; + // if someone has a reservation on this address, make sure its been cancelled + // before we store + + // lock->pcr_of_owner.value = 0; + + // this is just to cancel adjacent reserves + + // ctx->processor->GuestAtomicExchange32(ctx, &lock->pcr_of_owner, 0); + lock->pcr_of_owner = 0U; + //_mm_mfence(); + // ctx->processor->CancelReservationOnAddress(ctx, &lock->pcr_of_owner); if (change_irql) { // Unlock. - if (old_irql >= 2) { + if (old_irql >= IRQL_DISPATCH) { return; } - // Restore IRQL. - xeKfLowerIrql(ctx, old_irql); + auto kpcr = GetKPCR(ctx); + + kpcr->current_irql = old_irql; + + unsigned int swint = GetKPCR(ctx)->software_interrupt_state; + if (old_irql < swint) { + xeDispatchProcedureCallInterrupt(old_irql, swint, ctx); + } } } @@ -1168,7 +1164,7 @@ dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry( pointer_t lock_ptr, const ppc_context_t& ppc_ctx) { // Lock. auto lock = reinterpret_cast(lock_ptr.host_address()); - assert_true(lock_ptr->prcb_of_owner != static_cast(ppc_ctx->r[13])); + assert_true(lock_ptr->pcr_of_owner != static_cast(ppc_ctx->r[13])); PrefetchForCAS(lock); if (!ppc_ctx->processor->GuestAtomicCAS32( ppc_ctx, 0, static_cast(ppc_ctx->r[13]), @@ -1188,14 +1184,35 @@ void KeReleaseSpinLockFromRaisedIrql_entry(pointer_t lock_ptr, DECLARE_XBOXKRNL_EXPORT2(KeReleaseSpinLockFromRaisedIrql, kThreading, kImplemented, kHighFrequency); -void KeEnterCriticalRegion_entry() { - XThread::GetCurrentThread()->EnterCriticalRegion(); +void xeKeEnterCriticalRegion(PPCContext* context) { + GetKThread()->apc_disable_count--; +} + +void KeEnterCriticalRegion_entry(const ppc_context_t& context) { + xeKeEnterCriticalRegion(context); } DECLARE_XBOXKRNL_EXPORT2(KeEnterCriticalRegion, kThreading, kImplemented, kHighFrequency); -void KeLeaveCriticalRegion_entry() { - XThread::GetCurrentThread()->LeaveCriticalRegion(); +void xeKeLeaveCriticalRegion(PPCContext* context) { + auto enable_count = ++GetKThread(context)->apc_disable_count; + if (!enable_count) { + if (!GetKThread(context)->apc_lists[0].empty(context)) { + // kernel apc list not empty + GetKThread(context)->deferred_apc_software_interrupt_state = 1; + GetKPCR(context)->apc_software_interrupt_state = 1; + + // not very confident in this + if (GetKPCR(context)->current_irql <= IRQL_APC) { + xeDispatchProcedureCallInterrupt( + GetKPCR(context)->current_irql, + GetKPCR(context)->software_interrupt_state, context); + } + } + } +} +void KeLeaveCriticalRegion_entry(const ppc_context_t& context) { + xeKeLeaveCriticalRegion(context); } DECLARE_XBOXKRNL_EXPORT2(KeLeaveCriticalRegion, kThreading, kImplemented, kHighFrequency); @@ -1204,26 +1221,36 @@ dword_result_t KeRaiseIrqlToDpcLevel_entry(const ppc_context_t& ctx) { auto pcr = ctx.GetPCR(); uint32_t old_irql = pcr->current_irql; - if (old_irql > 2) { + if (old_irql > IRQL_DISPATCH) { XELOGE("KeRaiseIrqlToDpcLevel - old_irql > 2"); } - pcr->current_irql = 2; + // oddly, this does not set the current interrupt priority + pcr->current_irql = IRQL_DISPATCH; return old_irql; } DECLARE_XBOXKRNL_EXPORT2(KeRaiseIrqlToDpcLevel, kThreading, kImplemented, kHighFrequency); +XE_COMPARISON_NOINLINE void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql) { - X_KPCR* kpcr = ctx->TranslateVirtualGPR(ctx->r[13]); + X_KPCR* kpcr = GetKPCR(ctx); if (new_irql > kpcr->current_irql) { XELOGE("KfLowerIrql : new_irql > kpcr->current_irql!"); } kpcr->current_irql = new_irql; - if (new_irql < 2) { + + kernel_state()->SetCurrentInterruptPriority(ctx, kpcr, new_irql); + + if (new_irql < IRQL_DISPATCH) { // the called function does a ton of other stuff including changing the // irql and interrupt_related + + uint16_t swint = GetKPCR(ctx)->software_interrupt_state; + if (new_irql < swint) { + xeDispatchProcedureCallInterrupt(new_irql, swint, ctx); + } } } // irql is supposed to be per thread afaik... @@ -1231,9 +1258,9 @@ void KfLowerIrql_entry(dword_t new_irql, const ppc_context_t& ctx) { xeKfLowerIrql(ctx, static_cast(new_irql)); } DECLARE_XBOXKRNL_EXPORT2(KfLowerIrql, kThreading, kImplemented, kHighFrequency); - +XE_COMPARISON_NOINLINE unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql) { - X_KPCR* v1 = ctx->TranslateVirtualGPR(ctx->r[13]); + X_KPCR* v1 = GetKPCR(ctx); uint32_t old_irql = v1->current_irql; v1->current_irql = new_irql; @@ -1241,6 +1268,7 @@ unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql) { if (old_irql > (unsigned int)new_irql) { XELOGE("KfRaiseIrql - old_irql > new_irql!"); } + kernel_state()->SetCurrentInterruptPriority(ctx, v1, new_irql); return old_irql; } // used by aurora's nova plugin @@ -1253,6 +1281,294 @@ dword_result_t KfRaiseIrql_entry(dword_t new_irql, const ppc_context_t& ctx) { DECLARE_XBOXKRNL_EXPORT2(KfRaiseIrql, kThreading, kImplemented, kHighFrequency); +void AddTimer(PPCContext* context, X_KTIMER* timer) { + auto globals = context->kernel_state->GetKernelGuestGlobals(context); + + auto lock = &globals->timer_table_spinlock; + + xboxkrnl::xeKeKfAcquireSpinLock(context, lock, false); + auto& timers = globals->running_timers; + timers.InsertHead(timer, context); + xboxkrnl::xeKeKfReleaseSpinLock(context, lock, 0, false); +} + +int XeInsertGlobalTimer(PPCContext* context, X_KTIMER* timer, int64_t time) { + X_TIME_STAMP_BUNDLE* KeTimeStampBundle = + &context->kernel_state->GetKernelGuestGlobals(context)->KeTimestampBundle; + int v3 = timer->period; + + timer->header.inserted = 1; + timer->header.absolute = 0; + if (!v3) { + timer->header.signal_state = 0; + } + + // todo: logic here is not so simple + + if (time < 0) { + // i dont actually know where it sets due_time + timer->due_time = KeTimeStampBundle->system_time - time; + AddTimer(context, timer); + return 1; + + } else { + // huh? + if (static_cast(KeTimeStampBundle->system_time - time) >= 0) { + // already done! + timer->header.signal_state = 1; + timer->header.inserted = 0; + return 0; + } + + timer->header.absolute = 1; + timer->due_time = time; + AddTimer(context, timer); + return 1; + } +} + +struct queued_timer_dpc_t { + XDPC* dpc; + uint32_t routine; + uint32_t context; +}; +void xeHandleTimers(PPCContext* context, uint32_t timer_related) { + xenia_assert(GetKPCR(context)->current_irql == IRQL_DISPATCH); + + auto kernel = context->kernel_state; + std::vector expired_timers; + expired_timers.reserve(32); + auto globals = kernel->GetKernelGuestGlobals(context); + + uint32_t original_irql = kernel->LockDispatcher(context); + uint64_t current_interrupt_time = globals->KeTimestampBundle.interrupt_time; + + xboxkrnl::xeKeKfAcquireSpinLock(context, &globals->timer_table_spinlock, + false); + for (auto&& timer : globals->running_timers.IterateForward(context)) { + if (timer.due_time <= current_interrupt_time) { + expired_timers.push_back(&timer); + } + } + + for (auto&& timer_to_remove : expired_timers) { + util::XeRemoveEntryList(&timer_to_remove->table_bucket_entry, context); + } + xboxkrnl::xeKeKfReleaseSpinLock(context, &globals->timer_table_spinlock, 0, + false); + + // make sure we run timers in order of their expiration. i think this ordering + // is guaranteed by the kernel + std::sort(expired_timers.begin(), expired_timers.end(), + [](X_KTIMER* a, X_KTIMER* b) { return a->due_time < b->due_time; }); + + // 360 xboxkrnl uses an array of 16 of these, and loops collecting dpcs + // until the array is filled, then executes them, then checks for more timers + // and so on until all timers have been handled + // this does alter the ordering somewhat, but i doubt anything relies on that + std::vector qdpcs{}; + qdpcs.reserve(expired_timers.size()); + uint64_t current_systemtime = globals->KeTimestampBundle.system_time; + + for (auto&& timer : expired_timers) { + timer->header.inserted = 0; + timer->table_bucket_entry.Zero(); + timer->header.signal_state = 1; + if (context->TranslateVirtual( + timer->header.wait_list.flink_ptr) != &timer->header.wait_list) { + xeDispatchSignalStateChange(context, &timer->header, 0); + } + + if (timer->period) { + int timer_insert_result = XeInsertGlobalTimer(context, timer, -10000LL * timer->period); + xenia_assert(timer_insert_result != 0); + } + + auto dpc = context->TranslateVirtual(timer->dpc); + // check if null + if (dpc) { + if (dpc->desired_cpu_number || + dpc->desired_cpu_number == + GetKPCR(context)->prcb_data.current_cpu + 1) { + // dpc is intended for us to execute + // save all data, i guess other dpcs might modify it? + qdpcs.push_back(queued_timer_dpc_t{dpc, dpc->routine, dpc->context}); + } else { + xeKeInsertQueueDpc(dpc, static_cast(current_systemtime), + static_cast(current_systemtime >> 32), + context); + } + } + } + // todo: control flow is a bit weird here, not 100% sure of this + if (qdpcs.size()) { + xeDispatcherSpinlockUnlock(context, kernel->GetDispatcherLock(context), + IRQL_DISPATCH); + + for (auto&& queued_dpc : qdpcs) { + uint64_t dpc_args[] = {context->HostToGuestVirtual(queued_dpc.dpc), + queued_dpc.context, + static_cast(current_systemtime), + static_cast(current_systemtime >> 32)}; + + context->processor->Execute(context->thread_state(), queued_dpc.routine, + dpc_args, countof(dpc_args)); + } + xboxkrnl::xeKfLowerIrql(context, original_irql); + } else { + xeDispatcherSpinlockUnlock(context, kernel->GetDispatcherLock(context), + original_irql); + } +} +static void set_msr_interrupt_bits(PPCContext* context, uint32_t value) { + // todo: implement! + uint64_t old_msr = context->msr; + context->msr = (old_msr & ~0x8000ULL) | (value & 0x8000); +} + +void xeExecuteDPCList2( + PPCContext* context, uint32_t timer_unk, + util::X_TYPED_LIST& dpc_list, + uint32_t zero_register) { + auto irql = GetKPCR(context)->current_irql; + + xenia_assert(irql == IRQL_DISPATCH); + + do { + // they only check if this value is nonzero. they probably + // just use r1 because its a readily available nonzero register + GetKPCR(context)->prcb_data.dpc_active = + static_cast(context->r[1]); + uint32_t tmp_msr_mask = 0xFDFFD7FF; + GetKPCR(context)->msr_mask = tmp_msr_mask; + context->msr &= tmp_msr_mask; + + if (timer_unk) { + uint32_t v4 = GetKPCR(context)->timer_related; + GetKPCR(context)->timer_pending = zero_register; + set_msr_interrupt_bits(context, 0xFFFF8000); + xeHandleTimers(context, v4); + set_msr_interrupt_bits(context, zero_register); + } + X_KSPINLOCK* spin = &GetKPCR(context)->prcb_data.dpc_lock; + while (true) { + xboxkrnl::xeKeKfAcquireSpinLock(context, spin, false); + + if (dpc_list.empty(context)) { + xboxkrnl::xeKeKfReleaseSpinLock(context, spin, 0, false); + break; + } + auto dpc = dpc_list.HeadObject(context); + util::XeRemoveEntryList(&dpc->list_entry, context); + auto routine = dpc->routine; + auto ctx = dpc->context; + auto arg1 = dpc->arg1; + auto arg2 = dpc->arg2; + dpc->selected_cpu_number = zero_register; + + xboxkrnl::xeKeKfReleaseSpinLock(context, spin, 0, false); + + set_msr_interrupt_bits(context, 0xFFFF8000); + uint64_t dpc_args[] = {context->HostToGuestVirtual(dpc), ctx, arg1, arg2}; + context->processor->Execute(context->thread_state(), routine, dpc_args, + 4); + + // make sure the routine didnt change the irql + xenia_assert(GetKPCR(context)->current_irql == IRQL_DISPATCH); + + set_msr_interrupt_bits(context, zero_register); + + if (dpc_list.empty(context)) { + break; + } + } + GetKPCR(context)->prcb_data.dpc_active = zero_register; + GetKPCR(context)->prcb_data.dpc_related_40 = zero_register; + + // this doesnt make much sense to me, so its likely an issue with the + // decompilation + timer_unk = dpc_list.flink_ptr; + + } while (!dpc_list.empty(context)); +} + +void xeKeRetireDpcList(PPCContext* context) { + auto kpcr = GetKPCR(context); + xenia_assert(kpcr->current_irql == IRQL_DISPATCH); + unsigned int is_dpc_active = kpcr->prcb_data.dpc_active; + if (is_dpc_active) { + xenia_assert(kpcr->unk_0A == 0); + xenia_assert(kpcr->processtype_value_in_dpc == 0); + } + + context->DisableEI(); + unsigned int timer_ready = kpcr->timer_pending; + + if (!kpcr->prcb_data.queued_dpcs_list_head.empty(context) || timer_ready) { + xeExecuteDPCList2(context, timer_ready, + kpcr->prcb_data.queued_dpcs_list_head, 0); + kpcr->prcb_data.dpc_active = is_dpc_active; + } + context->EnableEI(); +} + +void KeRetireDpcList_entry(const ppc_context_t& context) { + xeKeRetireDpcList(context); +} +DECLARE_XBOXKRNL_EXPORT1(KeRetireDpcList, kThreading, kImplemented); + +void xeDispatchProcedureCallInterrupt(unsigned int new_irql, + unsigned int software_interrupt_mask, + cpu::ppc::PPCContext* context) { + xenia_assert(new_irql < IRQL_DISPATCH); + // need to save/restore registers, dpcs/apcs may clobber + // todo: might really only need to save lr + cpu::ppc::PPCGprSnapshot savegp; + context->TakeGPRSnapshot(&savegp); + if (new_irql < software_interrupt_mask) { + uint64_t saved_msr = context->msr; + + if (software_interrupt_mask >> 8) { + GetKPCR(context)->current_irql = + static_cast(software_interrupt_mask >> 8); + uint32_t sw_state; + do { + context->msr |= 0x8000ULL; + context->CheckInterrupt(); + xeHandleDPCsAndThreadSwapping(context); + context->msr &= ~(0x8000ULL); + sw_state = GetKPCR(context)->software_interrupt_state; + } while (sw_state >> 8); + GetKPCR(context)->current_irql = sw_state | new_irql; + if (sw_state <= new_irql) { + context->msr = saved_msr; + goto restgplr; + } + } else { + if (software_interrupt_mask <= new_irql) { + goto restgplr; + } + GetKPCR(context)->current_irql = software_interrupt_mask; + } + + do { + context->msr |= 0x8000ULL; + xenia_assert(offsetof(X_KPCR, apc_software_interrupt_state) == 0x9); + GetKPCR(context)->apc_software_interrupt_state = new_irql; + context->CheckInterrupt(); + xeProcessKernelApcs(context); + context->CheckInterrupt(); + context->msr &= ~0x8000ULL; + } while (GetKPCR(context)->apc_software_interrupt_state); + GetKPCR(context)->current_irql = new_irql; + + context->msr = saved_msr; + } +restgplr: + context->CheckInterrupt(); + context->RestoreGPRSnapshot(&savegp); +} + uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, uint32_t apc_routine_context, uint32_t arg1, uint32_t arg2, cpu::ppc::PPCContext* context) { @@ -1271,16 +1587,13 @@ uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, return X_STATUS_NO_MEMORY; } XAPC* apc = context->TranslateVirtual(apc_ptr); - xeKeInitializeApc(apc, thread->guest_object(), XAPC::kDummyKernelRoutine, 0, - apc_routine, 1 /*user apc mode*/, apc_routine_context); + xeKeInitializeApc(apc, thread->guest_object(), 0, 0, apc_routine, + 1 /*user apc mode*/, apc_routine_context); if (!xeKeInsertQueueApc(apc, arg1, arg2, 0, context)) { memory->SystemHeapFree(apc_ptr); return X_STATUS_UNSUCCESSFUL; } - // no-op, just meant to awaken a sleeping alertable thread to process real - // apcs - thread->thread()->QueueUserCallback([]() {}); return X_STATUS_SUCCESS; } dword_result_t NtQueueApcThread_entry(dword_t thread_handle, @@ -1291,30 +1604,43 @@ dword_result_t NtQueueApcThread_entry(dword_t thread_handle, return xeNtQueueApcThread(thread_handle, apc_routine, apc_routine_context, arg1, arg2, context); } - -X_STATUS xeProcessUserApcs(PPCContext* ctx) { +/* + todo: Kernel Apc queue logic is very different! this does not process things + in the proper order! it also does not set irql right, and does not set some + kthread/kpcr vars that need setting +*/ +template +X_STATUS xeProcessApcQueue(PPCContext* ctx) { if (!ctx) { ctx = cpu::ThreadState::Get()->context(); } X_STATUS alert_status = X_STATUS_SUCCESS; - auto kpcr = ctx->TranslateVirtualGPR(ctx->r[13]); + auto kpcr = GetKPCR(ctx); auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread); + cpu::ppc::PPCGprSnapshot savegplr; + ctx->TakeGPRSnapshot(&savegplr); uint32_t unlocked_irql = xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); - auto& user_apc_queue = current_thread->apc_lists[1]; + auto& user_apc_queue = current_thread->apc_lists[which_queue]; // use guest stack for temporaries - uint32_t old_stack_pointer = static_cast(ctx->r[1]); + uint32_t old_stack_pointer = current_thread->kernel_aux_stack_current_; - uint32_t scratch_address = old_stack_pointer - 16; - ctx->r[1] = old_stack_pointer - 32; + uint32_t scratch_address = old_stack_pointer; + current_thread->kernel_aux_stack_current_ += 16; while (!user_apc_queue.empty(ctx)) { uint32_t apc_ptr = user_apc_queue.flink_ptr; + if (!apc_ptr) { + XELOGE("Null link in apc queue!!"); + user_apc_queue.Initialize(ctx); + break; + } + XAPC* apc = user_apc_queue.ListEntryObject( ctx->TranslateVirtual(apc_ptr)); @@ -1327,8 +1653,8 @@ X_STATUS xeProcessUserApcs(PPCContext* ctx) { apc->enqueued = 0; xeKeKfReleaseSpinLock(ctx, ¤t_thread->apc_lock, unlocked_irql); - alert_status = X_STATUS_USER_APC; - if (apc->kernel_routine != XAPC::kDummyKernelRoutine) { + alert_status = alert_status_res; + if (apc->kernel_routine != 0) { uint64_t kernel_args[] = { apc_ptr, scratch_address + 0, @@ -1336,7 +1662,7 @@ X_STATUS xeProcessUserApcs(PPCContext* ctx) { scratch_address + 8, scratch_address + 12, }; - ctx->processor->Execute(ctx->thread_state, apc->kernel_routine, + ctx->processor->Execute(ctx->thread_state(), apc->kernel_routine, kernel_args, xe::countof(kernel_args)); } else { ctx->kernel_state->memory()->SystemHeapFree(apc_ptr); @@ -1349,19 +1675,117 @@ X_STATUS xeProcessUserApcs(PPCContext* ctx) { if (normal_routine) { uint64_t normal_args[] = {normal_context, arg1, arg2}; - ctx->processor->Execute(ctx->thread_state, normal_routine, normal_args, + ctx->processor->Execute(ctx->thread_state(), normal_routine, normal_args, xe::countof(normal_args)); } unlocked_irql = xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); } - - ctx->r[1] = old_stack_pointer; + current_thread->kernel_aux_stack_current_ = old_stack_pointer; xeKeKfReleaseSpinLock(ctx, ¤t_thread->apc_lock, unlocked_irql); + ctx->RestoreGPRSnapshot(&savegplr); return alert_status; } +X_STATUS xeProcessKernelApcQueue(PPCContext* ctx) { + xenia_assert(GetKPCR(ctx)->current_irql == IRQL_APC); + + if (!ctx) { + ctx = cpu::ThreadState::Get()->context(); + } + auto kthread = GetKThread(ctx); + + xeKeKfAcquireSpinLock(ctx, &kthread->apc_lock); + cpu::ppc::PPCGprSnapshot savegplr; + ctx->TakeGPRSnapshot(&savegplr); + + auto v2 = &kthread->apc_lists[0]; + kthread->deferred_apc_software_interrupt_state = 0; + + // use guest stack for temporaries + uint32_t old_stack_pointer = kthread->kernel_aux_stack_current_; + + uint32_t scratch_address = old_stack_pointer; + kthread->kernel_aux_stack_current_ += 32; + + while (v2->flink_ptr.xlat() != v2) { + auto v3 = v2->flink_ptr; + auto apc = kthread->apc_lists[0].ListEntryObject(v3.xlat()); + + uint8_t* scratch_ptr = ctx->TranslateVirtual(scratch_address); + xe::store_and_swap(scratch_ptr + 0, apc->normal_routine); + xe::store_and_swap(scratch_ptr + 4, apc->normal_context); + xe::store_and_swap(scratch_ptr + 8, apc->arg1); + xe::store_and_swap(scratch_ptr + 12, apc->arg2); + xe::store_and_swap(scratch_ptr + 16, apc->kernel_routine); + if (apc->normal_routine == 0) { + auto v7 = v3->flink_ptr; + auto v8 = v3->blink_ptr; + v8->flink_ptr = v7; + v7->blink_ptr = v8; + apc->enqueued = 0; + xeKeKfReleaseSpinLock(ctx, &kthread->apc_lock, IRQL_APC); + uint64_t kernel_args[] = { + ctx->HostToGuestVirtual(apc), scratch_address + 0, + scratch_address + 4, scratch_address + 8, + scratch_address + 12, + }; + + ctx->processor->Execute(ctx->thread_state(), + xe::load_and_swap(scratch_ptr + 16), + kernel_args, xe::countof(kernel_args)); + xeKeKfAcquireSpinLock(ctx, &kthread->apc_lock); + } else { + if (kthread->executing_kernel_apc || kthread->apc_disable_count) { + break; + } + auto v10 = v3->flink_ptr; + auto v11 = v3->blink_ptr; + v11->flink_ptr = v10; + v10->blink_ptr = v11; + apc->enqueued = 0; + xeKeKfReleaseSpinLock(ctx, &kthread->apc_lock, IRQL_APC); + uint64_t kernel_args[] = { + ctx->HostToGuestVirtual(apc), scratch_address + 0, + scratch_address + 4, scratch_address + 8, + scratch_address + 12, + }; + ctx->processor->Execute(ctx->thread_state(), + xe::load_and_swap(scratch_ptr + 16), + kernel_args, xe::countof(kernel_args)); + uint32_t normal_routine = xe::load_and_swap(scratch_ptr + 0); + uint32_t normal_context = xe::load_and_swap(scratch_ptr + 4); + uint32_t arg1 = xe::load_and_swap(scratch_ptr + 8); + uint32_t arg2 = xe::load_and_swap(scratch_ptr + 12); + if (normal_routine) { + kthread->executing_kernel_apc = 1; + xeKfLowerIrql(ctx, IRQL_PASSIVE); + uint64_t normal_args[] = {normal_context, arg1, arg2}; + ctx->processor->Execute(ctx->thread_state(), normal_routine, + normal_args, xe::countof(normal_args)); + xeKfRaiseIrql(ctx, IRQL_APC); + } + xeKeKfAcquireSpinLock(ctx, &kthread->apc_lock); + kthread->executing_kernel_apc = 0; + } + } + kthread->kernel_aux_stack_current_ = old_stack_pointer; + + xeKeKfReleaseSpinLock(ctx, &kthread->apc_lock, IRQL_APC); + ctx->RestoreGPRSnapshot(&savegplr); + return 0; +} + +X_STATUS xeProcessUserApcs(PPCContext* ctx) { + GetKThread(ctx)->user_apc_pending = 0; + return xeProcessApcQueue(ctx); +} + +X_STATUS xeProcessKernelApcs(PPCContext* ctx) { + return xeProcessKernelApcQueue(ctx); +} + static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc_mode, bool rundown) { uint32_t unlocked_irql = @@ -1390,7 +1814,7 @@ static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, if (this_entry->rundown_routine) { uint64_t args[] = {ctx->HostToGuestVirtual(this_entry)}; - kernel_state()->processor()->Execute(ctx->thread_state, + kernel_state()->processor()->Execute(ctx->thread_state(), this_entry->rundown_routine, args, xe::countof(args)); } else { @@ -1459,39 +1883,98 @@ uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, } else { apc->arg1 = arg1; apc->arg2 = arg2; + apc->enqueued = 1; + xeKeInsertQueueApcHelper(context, apc, priority_increment); + + result = 1; + } + xeDispatcherSpinlockUnlock(context, &target_thread->apc_lock, old_irql); + return result; +} + +// i doubt this is correctly implemented +static void SendRunKernelApcIPI(void* ud) { + auto context = cpu::ThreadState::GetContext(); + auto kpcr = GetKPCR(context); + if (kpcr->prcb_data.current_thread.xlat() != (X_KTHREAD*)ud) { + XELOGE("Mismatched current thread in sendrunkernelapcipi"); + return; + } + kpcr->apc_software_interrupt_state = 1; + KernelState::HWThreadFor(context)->interrupt_controller()->SetEOI(1); +} - auto& which_list = target_thread->apc_lists[apc->apc_mode]; +void xeKeInsertQueueApcHelper(cpu::ppc::PPCContext* context, XAPC* apc, + int priority_increment) { + auto apc_thread = context->TranslateVirtual(apc->thread_ptr); + auto apc_mode = apc->apc_mode; + auto& which_list = apc_thread->apc_lists[apc->apc_mode]; - if (apc->normal_routine) { - which_list.InsertTail(apc, context); + if (apc->normal_routine) { + auto v6 = &which_list; + auto v7 = which_list.blink_ptr; + apc->list_entry.flink_ptr = v6; + apc->list_entry.blink_ptr = v7; + v7->flink_ptr = &apc->list_entry; + v6->blink_ptr = &apc->list_entry; + } else { + auto v8 = &which_list; + ShiftedPointer i = nullptr; + for (i = v8->flink_ptr.xlat(); i.m_base != v8 && !ADJ(i)->normal_routine; + i = ADJ(i)->list_entry.flink_ptr.xlat()) + ; + auto v10 = ADJ(i)->list_entry.blink_ptr; + auto v11 = v10->flink_ptr; + apc->list_entry.blink_ptr = v10; + apc->list_entry.flink_ptr = v11; + v11->blink_ptr = &apc->list_entry; + v10->flink_ptr = &apc->list_entry; + } + context->kernel_state->LockDispatcherAtIrql(context); + { + X_STATUS wait_status; + auto target_thread_state = apc_thread->thread_state; + if (apc_mode) { + if (target_thread_state == KTHREAD_STATE_WAITING && apc_thread->processor_mode == 1 && + apc_thread->alertable) { + wait_status = X_STATUS_USER_APC; + apc_thread->user_apc_pending = 1; + goto LABEL_25; + } } else { - XAPC* insertion_pos = nullptr; - for (auto&& sub_apc : which_list.IterateForward(context)) { - insertion_pos = &sub_apc; - if (sub_apc.normal_routine) { - break; + apc_thread->deferred_apc_software_interrupt_state = 1; + //if the thread is already running, we need to send it an IPI if it is on a different processor + if (target_thread_state == KTHREAD_STATE_RUNNING) { + auto thread_processor = apc_thread->current_cpu; + if (thread_processor == GetKPCR(context)->prcb_data.current_cpu) { + GetKPCR(context)->apc_software_interrupt_state = 1; + } else { + // THIS IS DEFINITELY BADLY IMPLEMENTED! + cpu::SendInterruptArguments arguments; + arguments.ipi_func = SendRunKernelApcIPI; + arguments.ud = apc_thread; + arguments.wait_done = false; + arguments.irql_ = 0;//randomly picked this irql + context->processor->GetCPUThread(thread_processor) + ->SendGuestIPI(arguments); } + goto LABEL_26; } - if (!insertion_pos) { - which_list.InsertHead(apc, context); - } else { - util::XeInsertHeadList(insertion_pos->list_entry.blink_ptr, - &apc->list_entry, context); + if (target_thread_state == KTHREAD_STATE_WAITING && apc_thread->wait_irql == IRQL_PASSIVE && + (!apc->normal_routine || !apc_thread->apc_disable_count && + !apc_thread->executing_kernel_apc)) { + wait_status = X_STATUS_KERNEL_APC; + LABEL_25: + xeEnqueueThreadPostWait(context, apc_thread, wait_status, + priority_increment); + goto LABEL_26; } } - - apc->enqueued = 1; - - /* - todo: this is incomplete, a ton of other logic happens here, i believe - for waking the target thread if its alertable - */ - result = 1; + LABEL_26:; } - xeKeKfReleaseSpinLock(context, &target_thread->apc_lock, old_irql); - return result; -} + context->kernel_state->UnlockDispatcherAtIrql(context); +} dword_result_t KeInsertQueueApc_entry(pointer_t apc, lpvoid_t arg1, lpvoid_t arg2, dword_t priority_increment, const ppc_context_t& context) { @@ -1499,8 +1982,7 @@ dword_result_t KeInsertQueueApc_entry(pointer_t apc, lpvoid_t arg1, } DECLARE_XBOXKRNL_EXPORT1(KeInsertQueueApc, kThreading, kImplemented); -dword_result_t KeRemoveQueueApc_entry(pointer_t apc, - const ppc_context_t& context) { +uint32_t xeKeRemoveQueueApc(XAPC* apc, PPCContext* context) { bool result = false; uint32_t thread_guest_pointer = apc->thread_ptr; @@ -1520,6 +2002,11 @@ dword_result_t KeRemoveQueueApc_entry(pointer_t apc, return result ? 1 : 0; } + +dword_result_t KeRemoveQueueApc_entry(pointer_t apc, + const ppc_context_t& context) { + return xeKeRemoveQueueApc(apc, context); +} DECLARE_XBOXKRNL_EXPORT1(KeRemoveQueueApc, kThreading, kImplemented); dword_result_t KiApcNormalRoutineNop_entry(dword_t unk0 /* output? */, @@ -1534,44 +2021,106 @@ void KeInitializeDpc_entry(pointer_t dpc, lpvoid_t routine, } DECLARE_XBOXKRNL_EXPORT2(KeInitializeDpc, kThreading, kImplemented, kSketchy); -dword_result_t KeInsertQueueDpc_entry(pointer_t dpc, dword_t arg1, - dword_t arg2) { - assert_always("DPC does not dispatch yet; going to hang!"); +static void DPCIPIFunction(void* ud) { + // maybe xeHandleDPCsAndThreadSwapping instead? + auto context = cpu::ThreadState::GetContext(); + + auto kpcr = GetKPCR(context); + KernelState::HWThreadFor(context)->interrupt_controller()->SetEOI(1); - uint32_t list_entry_ptr = dpc.guest_address() + 4; + GetKPCR(context)->generic_software_interrupt = 2; +} +uint32_t xeKeInsertQueueDpc(XDPC* dpc, uint32_t arg1, uint32_t arg2, + PPCContext* ctx) { + bool result = false; + auto old_irql = xeKfRaiseIrql(ctx, IRQL_HIGHEST); - // Lock dispatcher. - auto global_lock = xe::global_critical_region::AcquireDirect(); - auto dpc_list = kernel_state()->dpc_list(); + X_KPRCB* target_prcb; + auto inserted_cpunum = dpc->desired_cpu_number; - // If already in a queue, abort. - if (dpc_list->IsQueued(list_entry_ptr)) { - return 0; + if (dpc->desired_cpu_number) { + target_prcb = + &ctx->kernel_state->KPCRPageForCpuNumber(dpc->desired_cpu_number - 1) + ->pcr.prcb_data; + } else { + target_prcb = &GetKPCR(ctx)->prcb_data; + inserted_cpunum = target_prcb->current_cpu + 1; + } + + xboxkrnl::xeKeKfAcquireSpinLock(ctx, &target_prcb->dpc_lock, false); + bool send_interrupt = false; + if (dpc->selected_cpu_number == 0) { + result = true; + dpc->selected_cpu_number = inserted_cpunum; + dpc->arg1 = arg1; + dpc->arg2 = arg2; + util::XeInsertTailList(&target_prcb->queued_dpcs_list_head, + &dpc->list_entry, ctx); + if (!target_prcb->dpc_active && !target_prcb->dpc_related_40) { + send_interrupt = true; + target_prcb->dpc_related_40 = 1; + } } + xboxkrnl::xeKeKfReleaseSpinLock(ctx, &target_prcb->dpc_lock, 0, false); - // Prep DPC. - dpc->arg1 = (uint32_t)arg1; - dpc->arg2 = (uint32_t)arg2; + if (send_interrupt) { + if (target_prcb == &GetKPCR(ctx)->prcb_data) { + GetKPCR(ctx)->generic_software_interrupt = 2; + } else { + uint32_t cpunum = inserted_cpunum - 1; + + // ctx->kernel_state->SendIPI(1 << (inserted_cpunum - 1), 2); + // kernel sends an ipi here. i havent been able to figure out what the + // args to it mean, but presumably the IPI just triggers running the dpc + // list on the + cpu::SendInterruptArguments arguments{}; + arguments.ipi_func = DPCIPIFunction; + arguments.ud = nullptr; + arguments.irql_ = 0;//randomly chosen irql + arguments.wait_done = false; + ctx->processor->GetCPUThread(cpunum)->SendGuestIPI(arguments); + } + } - dpc_list->Insert(list_entry_ptr); + xboxkrnl::xeKfLowerIrql(ctx, old_irql); + return result; +} - return 1; +dword_result_t KeInsertQueueDpc_entry(pointer_t dpc, dword_t arg1, + dword_t arg2, const ppc_context_t& ctx) { + return xeKeInsertQueueDpc(dpc, arg1, arg2, ctx); } DECLARE_XBOXKRNL_EXPORT2(KeInsertQueueDpc, kThreading, kStub, kSketchy); -dword_result_t KeRemoveQueueDpc_entry(pointer_t dpc) { +uint32_t xeKeRemoveQueueDpc(XDPC* dpc, PPCContext* ctx) { bool result = false; - uint32_t list_entry_ptr = dpc.guest_address() + 4; - - auto global_lock = xe::global_critical_region::AcquireDirect(); - auto dpc_list = kernel_state()->dpc_list(); - if (dpc_list->IsQueued(list_entry_ptr)) { - dpc_list->Remove(list_entry_ptr); - result = true; + auto old_irql = xeKfRaiseIrql(ctx, IRQL_HIGHEST); + auto selected_cpu_number = dpc->selected_cpu_number; + if (selected_cpu_number) { + // need to hold the dpc lock, find the pcr it belongs to + auto targeted_pcr = + &ctx->kernel_state->KPCRPageForCpuNumber(selected_cpu_number - 1)->pcr; + + xboxkrnl::xeKeKfAcquireSpinLock(ctx, &targeted_pcr->prcb_data.dpc_lock, + false); + { + if (dpc->selected_cpu_number) { + util::XeRemoveEntryList(&dpc->list_entry, ctx); + dpc->selected_cpu_number = 0; + } + } + xboxkrnl::xeKeKfReleaseSpinLock(ctx, &targeted_pcr->prcb_data.dpc_lock, + false); } - return result ? 1 : 0; + xeKfLowerIrql(ctx, old_irql); + return selected_cpu_number != 0; +} + +dword_result_t KeRemoveQueueDpc_entry(pointer_t dpc, + const ppc_context_t& ctx) { + return xeKeRemoveQueueDpc(dpc, ctx); } DECLARE_XBOXKRNL_EXPORT1(KeRemoveQueueDpc, kThreading, kImplemented); @@ -1587,14 +2136,15 @@ struct X_ERWLOCK { }; static_assert_size(X_ERWLOCK, 0x38); -void ExInitializeReadWriteLock_entry(pointer_t lock_ptr) { +void ExInitializeReadWriteLock_entry(pointer_t lock_ptr, + const ppc_context_t& context) { lock_ptr->lock_count = -1; lock_ptr->writers_waiting_count = 0; lock_ptr->readers_waiting_count = 0; lock_ptr->readers_entry_count = 0; - KeInitializeEvent_entry(&lock_ptr->writer_event, 1, 0); + KeInitializeEvent_entry(&lock_ptr->writer_event, 1, 0, context); KeInitializeSemaphore_entry(&lock_ptr->reader_semaphore, 0, 0x7FFFFFFF); - lock_ptr->spin_lock.prcb_of_owner = 0; + lock_ptr->spin_lock.pcr_of_owner = 0; } DECLARE_XBOXKRNL_EXPORT1(ExInitializeReadWriteLock, kThreading, kImplemented); @@ -1611,7 +2161,9 @@ void ExAcquireReadWriteLockExclusive_entry(pointer_t lock_ptr, lock_ptr->writers_waiting_count++; xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql); - xeKeWaitForSingleObject(&lock_ptr->writer_event, 7, 0, 0, nullptr); + + xeKeWaitForSingleObject(ppc_context, &lock_ptr->writer_event.header, 7, 0, 0, + nullptr); } DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading, kImplemented, kBlocking); @@ -1649,7 +2201,8 @@ void ExAcquireReadWriteLockShared_entry(pointer_t lock_ptr, lock_ptr->readers_waiting_count++; xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql); - xeKeWaitForSingleObject(&lock_ptr->reader_semaphore, 7, 0, 0, nullptr); + xeKeWaitForSingleObject(ppc_context, &lock_ptr->reader_semaphore.header, 7, 0, + 0, nullptr); } DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented, kBlocking); @@ -1692,7 +2245,7 @@ void ExReleaseReadWriteLock_entry(pointer_t lock_ptr, lock_ptr->readers_waiting_count = 0; lock_ptr->readers_entry_count = readers_waiting_count; xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql); - xeKeReleaseSemaphore(&lock_ptr->reader_semaphore, 1, + xeKeReleaseSemaphore(ppc_context, &lock_ptr->reader_semaphore, 1, readers_waiting_count, 0); return; } @@ -1706,13 +2259,14 @@ void ExReleaseReadWriteLock_entry(pointer_t lock_ptr, lock_ptr->writers_waiting_count--; xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql); - xeKeSetEvent(&lock_ptr->writer_event, 1, 0); + xeKeSetEvent(ppc_context, &lock_ptr->writer_event, 1, 0); } DECLARE_XBOXKRNL_EXPORT1(ExReleaseReadWriteLock, kThreading, kImplemented); // NOTE: This function is very commonly inlined, and probably won't be called! pointer_result_t InterlockedPushEntrySList_entry( - pointer_t plist_ptr, pointer_t entry) { + pointer_t plist_ptr, pointer_t entry, + const ppc_context_t& context) { assert_not_null(plist_ptr); assert_not_null(entry); @@ -1720,6 +2274,7 @@ pointer_result_t InterlockedPushEntrySList_entry( alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0}; uint32_t old_head = 0; do { + context->CheckInterrupt(); old_hdr = *plist_ptr; new_hdr.depth = old_hdr.depth + 1; new_hdr.sequence = old_hdr.sequence + 1; @@ -1737,13 +2292,14 @@ DECLARE_XBOXKRNL_EXPORT2(InterlockedPushEntrySList, kThreading, kImplemented, kHighFrequency); pointer_result_t InterlockedPopEntrySList_entry( - pointer_t plist_ptr) { + pointer_t plist_ptr, const ppc_context_t& context) { assert_not_null(plist_ptr); uint32_t popped = 0; alignas(8) X_SLIST_HEADER old_hdr = {{0}, 0, 0}; alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0}; do { + context->CheckInterrupt(); old_hdr = *plist_ptr; auto next = kernel_memory()->TranslateVirtual( old_hdr.next.next); @@ -1765,13 +2321,14 @@ DECLARE_XBOXKRNL_EXPORT2(InterlockedPopEntrySList, kThreading, kImplemented, kHighFrequency); pointer_result_t InterlockedFlushSList_entry( - pointer_t plist_ptr) { + pointer_t plist_ptr, const ppc_context_t& context) { assert_not_null(plist_ptr); alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0}; uint32_t first = 0; do { + context->CheckInterrupt(); old_hdr = *plist_ptr; first = old_hdr.next.next; new_hdr.next.next = 0; @@ -1784,6 +2341,64 @@ pointer_result_t InterlockedFlushSList_entry( return first; } DECLARE_XBOXKRNL_EXPORT1(InterlockedFlushSList, kThreading, kImplemented); +// todo: does this belong here? its arguable whether this is a threading or +// object function +dword_result_t ObGetWaitableObject_entry(dword_t object, + const ppc_context_t& context) { + return context->HostToGuestVirtual( + xeObGetWaitableObject(context, context->TranslateVirtual(object))); +} +DECLARE_XBOXKRNL_EXPORT1(ObGetWaitableObject, kThreading, kImplemented); + +void KeInitializeQueue_entry(pointer_t queue, dword_t count, + const ppc_context_t& context) { + xeKeInitializeQueue(queue, count, context); +} +DECLARE_XBOXKRNL_EXPORT1(KeInitializeQueue, kThreading, kImplemented); + +dword_result_t KeInsertHeadQueue_entry(pointer_t queue, + pointer_t entry, + const ppc_context_t& context) { + return xeKeInsertHeadQueue(queue, entry, context); +} + +DECLARE_XBOXKRNL_EXPORT1(KeInsertHeadQueue, kThreading, kImplemented); + +dword_result_t KeInsertQueue_entry(pointer_t queue, + pointer_t entry, + const ppc_context_t& context) { + return xeKeInsertQueue(queue, entry, context); +} +DECLARE_XBOXKRNL_EXPORT1(KeInsertQueue, kThreading, kImplemented); + +dword_result_t KeRundownQueue_entry(pointer_t queue, + const ppc_context_t& context) { + return context->HostToGuestVirtual(xeKeRundownQueue(context, queue)); +} + +DECLARE_XBOXKRNL_EXPORT1(KeRundownQueue, kThreading, kImplemented); + +dword_result_t KeRemoveQueue_entry(pointer_t queue, dword_t wait_mode, + lpqword_t timeout, + const ppc_context_t& context) { + int64_t timeout_host = timeout.guest_address() ? *timeout : 0; + return xeKeRemoveQueue(context, queue, wait_mode & 0xff, + timeout.guest_address() ? &timeout_host : nullptr); +} + +DECLARE_XBOXKRNL_EXPORT1(KeRemoveQueue, kThreading, kImplemented); + +dword_result_t KeQueryBackgroundProcessors_entry(const ppc_context_t& context) { + return xeKeQueryBackgroundProcessors(context); +} + +DECLARE_XBOXKRNL_EXPORT1(KeQueryBackgroundProcessors, kThreading, kImplemented); + +void KeSetBackgroundProcessors_entry(dword_t value, + const ppc_context_t& context) { + xeKeSetBackgroundProcessors(context, value); +} +DECLARE_XBOXKRNL_EXPORT1(KeSetBackgroundProcessors, kThreading, kImplemented); } // namespace xboxkrnl } // namespace kernel diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h index 6125f7e896..08a3512f64 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h @@ -10,8 +10,9 @@ #ifndef XENIA_KERNEL_XBOXKRNL_XBOXKRNL_THREADING_H_ #define XENIA_KERNEL_XBOXKRNL_XBOXKRNL_THREADING_H_ +#include "xenia/kernel/kernel_guest_structures.h" #include "xenia/kernel/util/shim_utils.h" -#include "xenia/xbox.h" +#include "xenia/kernel/xmutant.h" namespace xe { namespace kernel { @@ -25,17 +26,24 @@ uint32_t xeNtClearEvent(uint32_t handle); uint32_t xeNtWaitForMultipleObjectsEx(uint32_t count, xe::be* handles, uint32_t wait_type, uint32_t wait_mode, - uint32_t alertable, - uint64_t* timeout_ptr); + uint32_t alertable, uint64_t* timeout_ptr, + cpu::ppc::PPCContext* context); -uint32_t xeKeWaitForSingleObject(void* object_ptr, uint32_t wait_reason, - uint32_t processor_mode, uint32_t alertable, - uint64_t* timeout_ptr); + +uint32_t xeKeWaitForSingleObjectEx( + PPCContext* context, + ShiftedPointer wait, + unsigned char waitmode, bool alertable, int64_t* timeout); uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode, uint32_t alertable, uint64_t* timeout_ptr); -uint32_t xeKeSetEvent(X_KEVENT* event_ptr, uint32_t increment, uint32_t wait); +int32_t xeKeSetEvent(PPCContext* context, X_KEVENT* event, int increment, + unsigned char wait); +int32_t xeKePulseEvent(PPCContext* context, X_KEVENT* event, int increment, + unsigned char wait); +int32_t + xeKeResetEvent(PPCContext* context, X_KEVENT* event); uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable, uint64_t* interval_ptr, @@ -46,6 +54,8 @@ uint32_t ExCreateThread(xe::be* handle_ptr, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags); +void xeKeInitializeSemaphore(X_KSEMAPHORE* semaphore, int count, int limit); + uint32_t ExTerminateThread(uint32_t exit_code); uint32_t NtResumeThread(uint32_t handle, uint32_t* suspend_count_ptr); @@ -57,21 +67,154 @@ void xeKeInitializeApc(XAPC* apc, uint32_t thread_ptr, uint32_t kernel_routine, uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, uint32_t priority_increment, cpu::ppc::PPCContext* context); -uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, + +void xeKeInsertQueueApcHelper(cpu::ppc::PPCContext* context,XAPC* apc, + int priority_increment); +uint32_t + xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, uint32_t apc_routine_context, uint32_t arg1, uint32_t arg2, cpu::ppc::PPCContext* context); +XE_COMPARISON_NOINLINE void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql); +XE_COMPARISON_NOINLINE unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql); - -void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, bool change_irql=true); +XE_COMPARISON_NOINLINE +void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, + uint32_t old_irql, bool change_irql = true); +XE_COMPARISON_NOINLINE uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, bool change_irql = true); X_STATUS xeProcessUserApcs(PPCContext* ctx); - +X_STATUS xeProcessKernelApcs(PPCContext* ctx); +void xeExecuteDPCList2( + PPCContext* context, uint32_t timer_unk, + util::X_TYPED_LIST& dpc_list, + uint32_t zero_register); + +void xeKeRetireDpcList(PPCContext* context); + +void xeHandleDPCsAndThreadSwapping(PPCContext* context, bool from_idle_loop=false); +void xeDispatchProcedureCallInterrupt(unsigned int new_irql, + unsigned int software_interrupt_mask, + cpu::ppc::PPCContext* context); void xeRundownApcs(PPCContext* ctx); uint32_t xeKeGetCurrentProcessType(PPCContext* context); void xeKeSetCurrentProcessType(uint32_t type, PPCContext* context); +void xeKeInitializeMutant(X_KMUTANT* mutant, bool initially_owned, + xe::cpu::ppc::PPCContext* context); +void xeKeEnterCriticalRegion(PPCContext* context); +void xeKeLeaveCriticalRegion(PPCContext* context); + +void xeKeInitializeTimerEx(X_KTIMER* timer, uint32_t type, uint32_t proctype, + PPCContext* context); +// dispatcher header helpers +void xeEnqueueThreadPostWait(PPCContext* context, X_KTHREAD* thread, + X_STATUS wait_result, int unknown); +void xeHandleWaitTypeAll(PPCContext* context, X_KWAIT_BLOCK* block); +void xeDispatchSignalStateChange(PPCContext* context, X_DISPATCH_HEADER* header, + int unk); +uint32_t xeKeInsertQueueDpc(XDPC* dpc, uint32_t arg1, uint32_t arg2, + PPCContext* ctx); +uint32_t xeKeRemoveQueueDpc(XDPC* dpc, PPCContext* ctx); +void xeReallyQueueThread(PPCContext* context, X_KTHREAD* kthread); +void xeHandleReadyThreadOnDifferentProcessor(PPCContext* context, + X_KTHREAD* kthread); +X_STATUS xeNtYieldExecution(PPCContext* context); +/* + a special spinlock-releasing function thats used in a lot of scheduler + related functions im not very confident in the correctness of this one. the + original jumps around a lot, directly into the bodies of other functions and + appears to have been written in asm +*/ +void xeDispatcherSpinlockUnlock(PPCContext* context, X_KSPINLOCK* lock, + uint32_t irql); +XE_COMPARISON_NOINLINE +void scheduler_80097F90(PPCContext* context, X_KTHREAD* thread); +XE_COMPARISON_NOINLINE +X_STATUS xeSchedulerSwitchThread(PPCContext* context); +XE_COMPARISON_NOINLINE +X_STATUS xeSchedulerSwitchThread2(PPCContext* context); + +int xeKeSuspendThread(PPCContext* context, X_KTHREAD* thread); +int xeKeResumeThread(PPCContext* context, X_KTHREAD* thread); + +void xeSuspendThreadApcRoutine(PPCContext* context); + +X_STATUS xeKeWaitForSingleObject(PPCContext* context, X_DISPATCH_HEADER* object, + unsigned reason, unsigned unk, bool alertable, + int64_t* timeout); +int32_t xeKeReleaseMutant(PPCContext* context, X_KMUTANT* mutant, int unk, + bool abandoned, unsigned char unk2); +int XeInsertGlobalTimer(PPCContext* context, X_KTIMER* timer, int64_t time); +int xeKeSetTimerEx(PPCContext* context, X_KTIMER* timer, int64_t duetime, + int period, XDPC* dpc); +int xeKeSetTimer(PPCContext* context, X_KTIMER* timer, int64_t duetime, + XDPC* dpc); +int xeKeCancelTimer(PPCContext* context, X_KTIMER* timer); +void xeEXTimerDPCRoutine(PPCContext* context); +//NtCreateTimer +void xeKeInitializeExTimer(PPCContext* context, X_EXTIMER* timer, + uint32_t type); +//NtSetTimer +int xeKeSetExTimer(PPCContext* context, X_EXTIMER* timer, int64_t due_timer, + uint32_t apc_routine, uint32_t apc_arg, int period, int apc_mode); +//NtCancelTimer +int xeKeCancelExTimer(PPCContext* context, X_EXTIMER* timer); + +uint32_t xeKeRemoveQueueApc(XAPC* apc, PPCContext* context); + +void xeEXTimerAPCKernelRoutine(PPCContext* context); + +void xeKeSetAffinityThread(PPCContext* context, X_KTHREAD* thread, + uint32_t affinity, uint32_t* prev_affinity); + +void xeKeSetPriorityClassThread(PPCContext* context, X_KTHREAD* thread, + bool a2); + +void xeKeChangeThreadPriority(PPCContext* context, X_KTHREAD* thread, + int priority); +int32_t xeKeReleaseSemaphore(PPCContext* context, X_KSEMAPHORE* semaphore, + int increment, int adjustment, unsigned char wait); +X_STATUS xeKeDelayExecutionThread(PPCContext* context, char mode, + bool alertable, int64_t* interval); + +int32_t xeKeSetBasePriorityThread(PPCContext* context, X_KTHREAD* thread, + int increment); +X_STATUS xeKeSignalAndWaitForSingleObjectEx( + PPCContext* context, + ShiftedPointer signal, + ShiftedPointer wait, + unsigned char mode, bool alertable, int64_t* timeout); + +int32_t xeKeQueryBasePriorityThread(PPCContext* context, X_KTHREAD* thread); +X_STATUS xeKeWaitForMultipleObjects( + PPCContext* context, unsigned int num_objects, X_DISPATCH_HEADER** objects, + unsigned wait_type, unsigned reason, unsigned char mode, int alertable, + int64_t* timeout, X_KWAIT_BLOCK* wait_blocks); +int32_t xeKeSetDisableBoostThread(PPCContext* context, X_KTHREAD* thread, + char a2); +int32_t xeKeSetPriorityThread(PPCContext* context, X_KTHREAD* thread, + int priority); + +X_DISPATCH_HEADER* xeObGetWaitableObject(PPCContext* context, void* object); + +void xeKeInitializeQueue(X_KQUEUE* queue, uint32_t count, PPCContext* context); +int32_t xeKeInsertQueue(X_KQUEUE* queue, X_LIST_ENTRY* entry, + PPCContext* context); +int32_t xeKeInsertHeadQueue(X_KQUEUE* queue, X_LIST_ENTRY* entry, + PPCContext* context); + +void xeKeSignalQueue(PPCContext* context, X_KQUEUE* queue); + +X_LIST_ENTRY* xeKeRundownQueue(PPCContext* context, X_KQUEUE* queue); + +uint32_t xeKeRemoveQueue(PPCContext* context, X_KQUEUE* queue, + unsigned char wait_mode, int64_t* timeout); + +uint32_t xeKeQueryBackgroundProcessors(PPCContext* context); +void xeKeSetBackgroundProcessors(PPCContext* context, uint32_t new_bgproc); +void xeKeEnterBackgroundMode(PPCContext* context); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_dispatcher_objects.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_dispatcher_objects.cc new file mode 100644 index 0000000000..f064dc35c2 --- /dev/null +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_dispatcher_objects.cc @@ -0,0 +1,621 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include +#include "xenia/base/atomic.h" +#include "xenia/base/clock.h" +#include "xenia/base/logging.h" +#include "xenia/base/mutex.h" +#include "xenia/cpu/processor.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/user_module.h" +#include "xenia/kernel/util/shim_utils.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +#include "xenia/kernel/xevent.h" +#include "xenia/kernel/xmutant.h" +#include "xenia/kernel/xsemaphore.h" +#include "xenia/kernel/xthread.h" +#include "xenia/kernel/xtimer.h" +#include "xenia/xbox.h" + +namespace xe { +namespace kernel { +namespace xboxkrnl { + +int32_t xeKeSetEvent(PPCContext* context, X_KEVENT* event, int increment, + unsigned char wait) { + xenia_assert(event && event->header.type < 2); + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + + auto old_signalstate = event->header.signal_state; + auto wait_list = context->TranslateVirtual( + event->header.wait_list.flink_ptr); + + if (&wait_list->wait_list_entry == &event->header.wait_list) { + // no waiters, just set signalstate + event->header.signal_state = 1; + } else if (event->header.type != 0 && wait_list->wait_type == WAIT_ANY) { + xeEnqueueThreadPostWait(context, + context->TranslateVirtual(wait_list->thread), + wait_list->wait_result_xstatus, increment); + } else if (!old_signalstate) { + event->header.signal_state = 1; + xeDispatchSignalStateChange(context, &event->header, increment); + } + if (wait) { + auto current_thread = + context->TranslateVirtual(GetKPCR(context)->prcb_data.current_thread); + current_thread->wait_next = wait; + current_thread->wait_irql = old_irql; + } else { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + } + return old_signalstate; +} + +int32_t xeKePulseEvent(PPCContext* context, X_KEVENT* event, int increment, + unsigned char wait) { + xenia_assert(event && event->header.type < 2); + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + + auto old_signalstate = event->header.signal_state; + auto wait_list = context->TranslateVirtual( + event->header.wait_list.flink_ptr); + + if (!old_signalstate && + &wait_list->wait_list_entry != &event->header.wait_list) { + event->header.signal_state = 1; + xeDispatchSignalStateChange(context, &event->header, increment); + } + event->header.signal_state = 0; + if (wait) { + auto current_thread = + context->TranslateVirtual(GetKPCR(context)->prcb_data.current_thread); + current_thread->wait_next = wait; + current_thread->wait_irql = old_irql; + } else { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + } + return old_signalstate; +} + +int32_t xeKeResetEvent(PPCContext* context, X_KEVENT* event) { + xenia_assert(event && event->header.type < 2); + auto old_irql = context->kernel_state->LockDispatcher(context); + int32_t old_signal_state = event->header.signal_state; + + event->header.signal_state = 0; + + // is this really necessary? i thought this function was only used when a + // thread may be unwaited, but is uses it in resetevent + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return old_signal_state; +} + +int32_t xeKeReleaseMutant(PPCContext* context, X_KMUTANT* mutant, int increment, + bool abandoned, unsigned char wait) { + auto old_irql = context->kernel_state->LockDispatcher(context); + int32_t old_signal_state = mutant->header.signal_state; + int32_t new_signal_state; + auto current_thread = + context->TranslateVirtual(GetKPCR(context)->prcb_data.current_thread); + + if (!abandoned) { + if (context->TranslateVirtual(mutant->owner) != current_thread) { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + // xe::FatalError("We don't own the mutant, but we're releasing it!"); + // return -1; + // xenia_assert(false); + X_STATUS stat = mutant->abandoned ? X_STATUS_ABANDONED_WAIT_0 + : X_STATUS_MUTANT_NOT_OWNED; + // should RtlRaiseStatus! NtReleaseMutant catches the status i think, ida + // indicates a try handler + + context->RaiseStatus(stat); + return 0; + } + new_signal_state = old_signal_state + 1; + } else { + new_signal_state = 1; + mutant->abandoned = 1; + } + + mutant->header.signal_state = new_signal_state; + if (new_signal_state == 1) { + if (old_signal_state <= 0) { + util::XeRemoveEntryList(&mutant->unk_list, context); + } + mutant->owner = 0U; + if (!util::XeIsListEmpty(&mutant->header.wait_list, context)) { + xeDispatchSignalStateChange(context, &mutant->header, increment); + } + } + + if (wait) { + current_thread->wait_next = wait; + current_thread->wait_irql = old_irql; + + } else { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + } + return old_signal_state; +} + +int32_t xeKeReleaseSemaphore(PPCContext* context, X_KSEMAPHORE* semaphore, + int increment, int adjustment, + unsigned char wait) { + auto old_irql = context->kernel_state->LockDispatcher(context); + int32_t old_signal_state = semaphore->header.signal_state; + + int32_t new_signal_state = old_signal_state + adjustment; + + if (new_signal_state > semaphore->limit || + new_signal_state < old_signal_state) { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + // should RtlRaiseStatus + // xenia_assert(false); + context->RaiseStatus(X_STATUS_SEMAPHORE_LIMIT_EXCEEDED); + return 0; + } + + semaphore->header.signal_state = new_signal_state; + + if (!old_signal_state && + !util::XeIsListEmpty(&semaphore->header.wait_list, context)) { + xeDispatchSignalStateChange(context, &semaphore->header, increment); + } + + if (wait) { + GetKThread(context)->wait_next = wait; + GetKThread(context)->wait_irql = old_irql; + + } else { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + } + return old_signal_state; +} + +int xeKeSetTimerEx(PPCContext* context, X_KTIMER* timer, int64_t duetime, + int period, XDPC* dpc) { + auto old_irql = context->kernel_state->LockDispatcher(context); + auto was_inserted = timer->header.inserted; + + if (was_inserted) { + timer->header.inserted = 0; + util::XeRemoveEntryList(&timer->table_bucket_entry, context); + } + + timer->header.signal_state = 0; + timer->dpc = context->HostToGuestVirtual(dpc); + timer->period = period; + if (!XeInsertGlobalTimer(context, timer, duetime)) { + if (!util::XeIsListEmpty(&timer->header.wait_list, context)) { + xeDispatchSignalStateChange(context, &timer->header, 0); + } + if (dpc) { + auto systime = context->kernel_state->GetKernelSystemTime(); + xeKeInsertQueueDpc(dpc, static_cast(systime), + static_cast(systime >> 32), context); + } + if (period) { + while (!XeInsertGlobalTimer(context, timer, -10000LL * period)) { + //?? + xenia_assert(false); + } + } + } + + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return was_inserted; +} + +int xeKeSetTimer(PPCContext* context, X_KTIMER* timer, int64_t duetime, + XDPC* dpc) { + return xeKeSetTimerEx(context, timer, duetime, 0, dpc); +} + +int xeKeCancelTimer(PPCContext* context, X_KTIMER* timer) { + auto old_irql = context->kernel_state->LockDispatcher(context); + auto was_inserted = timer->header.inserted; + if (was_inserted) { + timer->header.inserted = 0; + util::XeRemoveEntryList(&timer->table_bucket_entry, context); + } + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return was_inserted; +} + +void xeEXTimerDPCRoutine(PPCContext* context) { + X_EXTIMER* timer = context->TranslateVirtualGPR(context->r[4]); + uint32_t apcarg1 = static_cast(context->r[5]); + uint32_t apcarg2 = static_cast(context->r[6]); + + auto old_irql = xeKeKfAcquireSpinLock(context, &timer->timer_lock); + + if (timer->has_apc) { + xeKeInsertQueueApc(&timer->apc, apcarg1, apcarg2, 0, context); + } + + xeKeKfReleaseSpinLock(context, &timer->timer_lock, old_irql); +} + +void xeKeInitializeExTimer(PPCContext* context, X_EXTIMER* timer, + uint32_t type) { + memset(timer, 0, sizeof(X_EXTIMER)); + timer->dpc.Initialize(context->kernel_state->GetKernelGuestGlobals(context) + ->extimer_dpc_routine, + context->HostToGuestVirtual(timer)); + xeKeInitializeTimerEx(&timer->ktimer, type, + xeKeGetCurrentProcessType(context), context); +} +void xeEXTimerAPCKernelRoutine(PPCContext* context) { + X_EXTIMER* timer = context->TranslateVirtualGPR( + context->r[3] - offsetof(X_EXTIMER, apc)); + + uint32_t old_irql = + xboxkrnl::xeKeKfAcquireSpinLock(context, &timer->timer_lock); + + auto current_thread = GetKThread(context); + xboxkrnl::xeKeKfAcquireSpinLock(context, ¤t_thread->timer_list_lock, + false); + bool v10 = false; + if (timer->has_apc && current_thread == timer->apc.thread_ptr.xlat()) { + if (!timer->period) { + v10 = true; + util::XeRemoveEntryList(&timer->thread_timer_list_entry, context); + timer->has_apc = false; + } + + } else { + *context->TranslateVirtualGPR(context->r[4]) = 0; + } + + xboxkrnl::xeKeKfReleaseSpinLock(context, ¤t_thread->timer_list_lock, 0, + false); + + xboxkrnl::xeKeKfReleaseSpinLock(context, &timer->timer_lock, old_irql); + + // if v10 is set, supposed to dereference here, but that must wait until we + // implement objects correctly +} +static bool HelperCancelTimer(PPCContext* context, X_EXTIMER* timer) { + if (timer->has_apc) { + xeKeKfAcquireSpinLock(context, &timer->apc.thread_ptr->timer_list_lock, + false); + + util::XeRemoveEntryList(&timer->thread_timer_list_entry, context); + timer->has_apc = false; + xeKeKfReleaseSpinLock(context, &timer->apc.thread_ptr->timer_list_lock, 0, + false); + xeKeCancelTimer(context, &timer->ktimer); + xeKeRemoveQueueDpc(&timer->dpc, context); + xeKeRemoveQueueApc(&timer->apc, context); + return true; + } else { + xeKeCancelTimer(context, &timer->ktimer); + return false; + } +} + +// todo: this is incomplete, theres a bunch of dereferenceobject calls missing +int xeKeSetExTimer(PPCContext* context, X_EXTIMER* timer, int64_t due_timer, + uint32_t apc_routine, uint32_t apc_arg, int period, + int apc_mode) { + uint32_t old_irql = xeKeKfAcquireSpinLock(context, &timer->timer_lock); + + bool v21 = HelperCancelTimer(context, timer); + + auto old_signalstate = timer->ktimer.header.signal_state; + + timer->period = period; + + if (apc_routine) { + auto current_thread = GetKThread(context); + xeKeInitializeApc(&timer->apc, + GetKPCR(context)->prcb_data.current_thread.m_ptr, + context->kernel_state->GetKernelGuestGlobals(context) + ->extimer_apc_kernel_routine, + 0, apc_routine, apc_mode, apc_arg); + xeKeKfAcquireSpinLock(context, ¤t_thread->timer_list_lock, false); + util::XeInsertTailList(¤t_thread->timer_list, + &timer->thread_timer_list_entry, context); + timer->has_apc = true; + xeKeKfReleaseSpinLock(context, ¤t_thread->timer_list_lock, 0, false); + xeKeSetTimerEx(context, &timer->ktimer, due_timer, period, &timer->dpc); + } else { + xeKeSetTimerEx(context, &timer->ktimer, due_timer, period, 0); + } + xeKeKfReleaseSpinLock(context, &timer->timer_lock, old_irql); + return old_signalstate; +} + +int xeKeCancelExTimer(PPCContext* context, X_EXTIMER* timer) { + uint32_t old_irql = xeKeKfAcquireSpinLock(context, &timer->timer_lock); + + bool v8 = HelperCancelTimer(context, timer); + xeKeKfReleaseSpinLock(context, &timer->timer_lock, old_irql); + int old_signalstate = timer->ktimer.header.signal_state; + + return old_signalstate; +} + +X_DISPATCH_HEADER* xeObGetWaitableObject(PPCContext* context, void* object) { + auto wait_object_type = context->TranslateVirtual( + reinterpret_cast(object)[-1].object_type_ptr); + + // either encodes an offset from the object base to the object to wait on, + // or a default object to wait on? + uint32_t unk = wait_object_type->unknown_size_or_object_; + auto kernel_guest_globals = + context->kernel_state->GetKernelGuestGlobals(context); + if (wait_object_type == &kernel_guest_globals + ->IoFileObjectType) { + xenia_assert(false); + } + + if (wait_object_type == &kernel_guest_globals->IoCompletionObjectType) { + xenia_assert(false); + } + X_DISPATCH_HEADER* waiter = + context->TranslateVirtual(unk); + // if (unk) { + // __debugbreak(); + // } + if (!((unsigned int)unk >> 16)) { + waiter = reinterpret_cast( + reinterpret_cast(object) + unk); + } else { + __debugbreak(); + } + return waiter; +} + +void xeKeInitializeQueue(X_KQUEUE* queue, uint32_t count, PPCContext* context) { + queue->header.signal_state = 0; + queue->header.type = DISPATCHER_QUEUE; + util::XeInitializeListHead(&queue->header.wait_list, context); + util::XeInitializeListHead(&queue->entry_list_head, context); + util::XeInitializeListHead(&queue->thread_list_head, context); + queue->current_count = 0; + if (count) { + queue->maximum_count = count; + } else { + queue->maximum_count = 1; + } +} + +template +static int32_t InsertQueueUnderLock(PPCContext* context, X_KQUEUE* queue, + X_LIST_ENTRY* entry) { + auto old_irql = context->kernel_state->LockDispatcher(context); + auto first_waitblock = context->TranslateVirtual( + queue->header.wait_list.blink_ptr); + auto current_thread = GetKThread(context); + int32_t old_signalstate = queue->header.signal_state; + if (first_waitblock == (X_KWAIT_BLOCK*)&queue->header.wait_list || + queue->current_count >= queue->maximum_count || + current_thread->queue.xlat() == queue && + current_thread->wait_reason == 4) { + queue->header.signal_state = old_signalstate + 1; + if (to_head) { + util::XeInsertHeadList(&queue->entry_list_head, entry, context); + + } else { + util::XeInsertTailList(&queue->entry_list_head, entry, context); + } + } else { + util::XeRemoveEntryList(&first_waitblock->wait_list_entry, context); + + auto thread_for_waitblock = + context->TranslateVirtual(first_waitblock->thread); + thread_for_waitblock->wait_result = (int)context->HostToGuestVirtual(entry); + ++queue->current_count; + thread_for_waitblock->wait_reason = 0; + if (thread_for_waitblock->wait_timeout_timer.header.inserted) { + thread_for_waitblock->wait_timeout_timer.header.inserted = 0; + util::XeRemoveEntryList( + &thread_for_waitblock->wait_timeout_timer.table_bucket_entry, + context); + } + xeReallyQueueThread(context, thread_for_waitblock); + } + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return old_signalstate; +} + +int32_t xeKeInsertQueue(X_KQUEUE* queue, X_LIST_ENTRY* entry, + PPCContext* context) { + return InsertQueueUnderLock(context, queue, entry); +} +int32_t xeKeInsertHeadQueue(X_KQUEUE* queue, X_LIST_ENTRY* entry, + PPCContext* context) { + return InsertQueueUnderLock(context, queue, entry); +} + +void xeKeSignalQueue(PPCContext* context, X_KQUEUE* queue) { + uint32_t new_currentcount = queue->current_count - 1U; + queue->current_count = new_currentcount; + if (new_currentcount >= queue->maximum_count) { + return; + } + + if (util::XeIsListEmpty(&queue->header.wait_list, context) || + util::XeIsListEmpty(&queue->entry_list_head, context)) { + return; + } + + X_KWAIT_BLOCK* block = context->TranslateVirtual( + queue->header.wait_list.blink_ptr); + uint32_t entry_guest = queue->entry_list_head.flink_ptr; + X_LIST_ENTRY* entry = context->TranslateVirtual(entry_guest); + + util::XeRemoveEntryList(entry, context); + entry->flink_ptr = 0u; + + queue->header.signal_state--; + // send the list entry to the waiter + xeEnqueueThreadPostWait(context, context->TranslateVirtual(block->thread), + static_cast(entry_guest), 0); +} + +X_LIST_ENTRY* xeKeRundownQueue(PPCContext* context, X_KQUEUE* queue) { + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + auto v4 = context->TranslateVirtual(queue->entry_list_head.flink_ptr); + if (v4 == &queue->entry_list_head) { + v4 = 0; + } else { + util::XeRemoveEntryList(&queue->entry_list_head, context); + } + auto v5 = &queue->thread_list_head; + while (!v5->empty(context)) { + auto kthread = v5->HeadObject(context); + kthread->queue = 0U; + + util::XeRemoveEntryList(&kthread->queue_related, context); + } + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return v4; +} + +uint32_t xeKeRemoveQueue(PPCContext* context, X_KQUEUE* queue, + unsigned char wait_mode, int64_t* timeout) { + auto this_thread = GetKThread(context); + if (this_thread->wait_next) + this_thread->wait_next = 0; + else + this_thread->wait_irql = context->kernel_state->LockDispatcher(context); + + auto v8 = context->TranslateVirtual(this_thread->queue); + this_thread->queue = context->HostToGuestVirtual(queue); + if (queue == v8) { + --queue->current_count; + } else { + auto v9 = &this_thread->queue_related; + if (v8) { + auto v10 = v9->flink_ptr; + auto v11 = this_thread->queue_related.blink_ptr; + v11->flink_ptr = v9->flink_ptr; + v10->blink_ptr = v11; + xeKeSignalQueue(context, v8); + } + auto v12 = queue->thread_list_head.blink_ptr; + v9->flink_ptr = &queue->thread_list_head; + this_thread->queue_related.blink_ptr = v12; + v12->flink_ptr = v9; + queue->thread_list_head.blink_ptr = v9; + } + auto v13 = timeout; + auto v14 = &queue->entry_list_head; + int64_t v20; + int64_t tmp_timeout; + uint32_t v15; + auto scratch_waitblock = &this_thread->scratch_waitblock_memory[0]; + while (1) { + v15 = v14->flink_ptr; + if (v15 != context->HostToGuestVirtual(v14) && + queue->current_count < queue->maximum_count) { + auto queue_newcount = queue->current_count + 1; + --queue->header.signal_state; + queue->current_count = queue_newcount; + util::XeRemoveEntryList(v15, context); + context->TranslateVirtual(v15)->flink_ptr = 0u; + goto LABEL_36; + } + if (this_thread->deferred_apc_software_interrupt_state && + !this_thread->wait_irql) { + ++queue->current_count; + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + this_thread->wait_irql); + goto LABEL_31; + } + if (wait_mode && this_thread->user_apc_pending) { + v15 = X_STATUS_USER_APC; + goto LABEL_35; + } + this_thread->wait_result = 0; + this_thread->wait_blocks = scratch_waitblock; + scratch_waitblock->object = &queue->header; + scratch_waitblock->wait_result_xstatus = 0; + scratch_waitblock->thread = this_thread; + scratch_waitblock->wait_type = 1; + if (!timeout) { + scratch_waitblock->next_wait_block = scratch_waitblock; + goto LABEL_26; + } + if (!*timeout) { + break; + } + scratch_waitblock->next_wait_block = &this_thread->wait_timeout_block; + this_thread->wait_timeout_timer.header.wait_list.flink_ptr = + &this_thread->wait_timeout_block.wait_list_entry; + this_thread->wait_timeout_timer.header.wait_list.blink_ptr = + &this_thread->wait_timeout_block.wait_list_entry; + this_thread->wait_timeout_block.next_wait_block = scratch_waitblock; + + if (!XeInsertGlobalTimer(context, &this_thread->wait_timeout_timer, + *timeout)) + break; + v20 = this_thread->wait_timeout_timer.due_time; + LABEL_26: + auto v16 = queue->header.wait_list.blink_ptr; + scratch_waitblock->wait_list_entry.flink_ptr = &queue->header.wait_list; + scratch_waitblock->wait_list_entry.blink_ptr = v16; + v16->flink_ptr = &scratch_waitblock->wait_list_entry; + queue->header.wait_list.blink_ptr = &scratch_waitblock->wait_list_entry; + this_thread->alertable = 0; + this_thread->processor_mode = wait_mode; + this_thread->wait_reason = 4; + this_thread->thread_state = KTHREAD_STATE_WAITING; + + auto result = xeSchedulerSwitchThread2(context); + this_thread->wait_reason = 0; + if (result != X_STATUS_KERNEL_APC) { + return result; + } + if (timeout) { + if (*timeout < 0) { + tmp_timeout = + context->kernel_state->GetKernelInterruptTime() - *timeout; + timeout = &tmp_timeout; + } else { + timeout = v13; + } + } + + LABEL_31: + this_thread->wait_irql = context->kernel_state->LockDispatcher(context); + --queue->current_count; + } + v15 = X_STATUS_TIMEOUT; +LABEL_35: + ++queue->current_count; +LABEL_36: + xeDispatcherSpinlockUnlock(context, + context->kernel_state->GetDispatcherLock(context), + this_thread->wait_irql); + return v15; +} + +} // namespace xboxkrnl +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_scheduler.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_scheduler.cc new file mode 100644 index 0000000000..088078e224 --- /dev/null +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading_scheduler.cc @@ -0,0 +1,1910 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD + *license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include +#include "xenia/base/atomic.h" +#include "xenia/base/clock.h" +#include "xenia/base/logging.h" +#include "xenia/base/mutex.h" +#include "xenia/cpu/processor.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/user_module.h" +#include "xenia/kernel/util/shim_utils.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +#include "xenia/kernel/xevent.h" +#include "xenia/kernel/xmutant.h" +#include "xenia/kernel/xsemaphore.h" +#include "xenia/kernel/xthread.h" +#include "xenia/kernel/xtimer.h" +#include "xenia/xbox.h" + +namespace xe { +namespace kernel { +namespace xboxkrnl { + +template +static void SCHEDLOG(PPCContext* context, const char (&fmt)[fmt_len], + Ts... args) { +#if 0 +#define prefixfmt "(Context {}, Fiber {}, HW Thread {}, Guest Thread {}) " + + char tmpbuf[fmt_len + sizeof(prefixfmt)]; + + memcpy(tmpbuf, prefixfmt, sizeof(prefixfmt) - 1); + + memcpy(&tmpbuf[sizeof(prefixfmt) - 1], &fmt[0], fmt_len); + + XELOGE(&tmpbuf[0], (void*)context, (void*)threading::Fiber::GetCurrentFiber(), + context->kernel_state->GetPCRCpuNum(GetKPCR(context)), + (void*)GetKThread(context), args...); +#else + +#endif +} + +static void insert_8009CFE0(PPCContext* context, X_KTHREAD* thread, int unk); +static void insert_8009D048(PPCContext* context, X_KTHREAD* thread); +XE_COMPARISON_NOINLINE +static X_KTHREAD* xeScanForReadyThread(PPCContext* context, X_KPRCB* prcb, + int priority); +static void xeProcessQueuedThreads(PPCContext* context, + bool under_dispatcher_lock); +X_KTHREAD* xeSelectThreadDueToTimesliceExpiration(PPCContext* context); + +static void set_msr_interrupt_bits(PPCContext* context, uint32_t value) { + // todo: implement! + uint64_t old_msr = context->msr; + context->msr = (old_msr & ~0x8000ULL) | (value & 0x8000); +} + +using ready_thread_pointer_t = + ShiftedPointer; + +/* + a special spinlock-releasing function thats used in a lot of scheduler + related functions im not very confident in the correctness of this one. the + original jumps around a lot, directly into the bodies of other functions and + appears to have been written in asm +*/ +void xeDispatcherSpinlockUnlock(PPCContext* context, X_KSPINLOCK* lock, + uint32_t irql) { + // SCHEDLOG(context, "xeDispatcherSpinlockUnlock irql = {}", irql); + xenia_assert(lock->pcr_of_owner == static_cast(context->r[13])); + lock->pcr_of_owner = 0; +reenter: + auto kpcr = GetKPCR(context); + if (kpcr->prcb_data.enqueued_threads_list.next) { + xeProcessQueuedThreads(context, false); + // todo: theres a jump here!! + // this doesnt feel right + goto reenter; + } else if (kpcr->prcb_data.next_thread.m_ptr != 0) { + if (irql > IRQL_APC) { + if (!kpcr->prcb_data.dpc_active) { + kpcr->generic_software_interrupt = irql; + } else { + __nop(); + } + } else { + xboxkrnl::xeKeKfAcquireSpinLock( + context, &kpcr->prcb_data.enqueued_processor_threads_lock, false); + auto next_thread = kpcr->prcb_data.next_thread; + auto v3 = kpcr->prcb_data.current_thread.xlat(); + v3->wait_irql = irql; + kpcr->prcb_data.next_thread = 0U; + kpcr->prcb_data.current_thread = next_thread; + insert_8009D048(context, v3); + // jump here! + // r31 = next_thread + // r30 = current thread + // definitely switching threads + + context->kernel_state->ContextSwitch(context, next_thread.xlat()); + +// this is all already done in ContextSwitch! +#if 0 + // at this point we're supposed to load a bunch of fields from r31 and do + // shit + + // im just assuming r31 is supposed to be this + X_KTHREAD* r31 = next_thread.xlat(); + + auto r3 = r31->unk_A4; + auto r29 = r31->wait_result; + GetKPCR(context)->current_irql = r3; + auto r4 = GetKPCR(context)->software_interrupt_state; + if (r3 < r4) { + xeDispatchProcedureCallInterrupt(r3, r4, context); + } +#endif + } + } else if (irql <= IRQL_APC) { + kpcr->current_irql = irql; + auto v4 = kpcr->software_interrupt_state; + if (irql < v4) { + xeDispatchProcedureCallInterrupt(irql, v4, context); + } + } +} + +void xeHandleReadyThreadOnDifferentProcessor(PPCContext* context, + X_KTHREAD* kthread) { + auto kpcr = GetKPCR(context); + auto v3 = &kpcr->prcb_data; + xboxkrnl::xeKeKfAcquireSpinLock( + context, &kpcr->prcb_data.enqueued_processor_threads_lock, false); + SCHEDLOG(context, + "xeHandleReadyThreadOnDifferentProcessor kthread = {}, " + "thread_state = {}", + (void*)kthread, kthread->thread_state); + if (kthread->thread_state != KTHREAD_STATE_RUNNING) { + // xe::FatalError("Doing some fpu/vmx shit here?"); + // it looks like its saving the fpu and vmx state + // we don't have to do this i think, because we already have different + // PPCContext per guest thread + } + // https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/ntos/ke/kthread_state.htm + + xenia_assert(kthread->a_prcb_ptr.xlat() == v3); + switch (kthread->thread_state) { + case KTHREAD_STATE_READY: { + auto v23 = kthread->ready_prcb_entry.flink_ptr; + auto v24 = kthread->ready_prcb_entry.blink_ptr; + v24->flink_ptr = v23; + v23->blink_ptr = v24; + if (v24 == v23) { + v3->has_ready_thread_by_priority = + v3->has_ready_thread_by_priority & (~(1 << kthread->priority)); + } + break; + } + case KTHREAD_STATE_RUNNING: { // running + if (!v3->next_thread) { + auto v22 = xeScanForReadyThread(context, v3, 0); + if (!v22) { + v22 = v3->idle_thread.xlat(); + v3->running_idle_thread = v22; + } + v22->thread_state = KTHREAD_STATE_STANDBY; + v3->next_thread = v22; + GetKPCR(context)->generic_software_interrupt = 2; + } + xboxkrnl::xeKeKfReleaseSpinLock( + context, &kpcr->prcb_data.enqueued_processor_threads_lock, 0, false); + return; + } + case KTHREAD_STATE_STANDBY: { // standby + auto v21 = xeScanForReadyThread(context, v3, 0); + if (!v21) { + v21 = v3->idle_thread.xlat(); + v3->running_idle_thread = v21; + } + v21->thread_state = KTHREAD_STATE_STANDBY; + v3->next_thread = v21; + break; + } + default: { + auto v19 = kthread->another_prcb_ptr; + auto v20 = v19->current_cpu; + kthread->a_prcb_ptr = v19; + kthread->current_cpu = v20; + xboxkrnl::xeKeKfReleaseSpinLock( + context, &kpcr->prcb_data.enqueued_processor_threads_lock, 0, false); + return; + } + } + + auto v25 = kthread->another_prcb_ptr; + auto v26 = v25->current_cpu; + kthread->a_prcb_ptr = v25; + kthread->current_cpu = v26; + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &kpcr->prcb_data.enqueued_processor_threads_lock, 0, false); + xeReallyQueueThread(context, kthread); +} +// if was_preempted, insert to front of ready list +static void insert_8009CFE0(PPCContext* context, X_KTHREAD* thread, + int was_preempted) { + SCHEDLOG(context, "insert_8009D048 - thread = {}, unk = {}", (void*)thread, + was_preempted); + auto priority = thread->priority; + auto thread_prcb = context->TranslateVirtual(thread->a_prcb_ptr); + auto thread_ready_list_entry = &thread->ready_prcb_entry; + thread->thread_state = KTHREAD_STATE_READY; + auto& list_for_priority = thread_prcb->ready_threads_by_priority[priority]; + if (was_preempted) { + auto v6 = list_for_priority.flink_ptr; + thread->ready_prcb_entry.blink_ptr = &list_for_priority; + thread_ready_list_entry->flink_ptr = v6; + v6->blink_ptr = thread_ready_list_entry; + list_for_priority.flink_ptr = thread_ready_list_entry; + } else { + auto v7 = list_for_priority.blink_ptr; + thread_ready_list_entry->flink_ptr = &list_for_priority; + thread->ready_prcb_entry.blink_ptr = v7; + v7->flink_ptr = thread_ready_list_entry; + list_for_priority.blink_ptr = thread_ready_list_entry; + } + + thread_prcb->has_ready_thread_by_priority = + thread_prcb->has_ready_thread_by_priority | (1U << priority); +} + +static void insert_8009D048(PPCContext* context, X_KTHREAD* thread) { + SCHEDLOG(context, "insert_8009D048 - thread = {}", (void*)thread); + if (context->TranslateVirtual(thread->another_prcb_ptr) == + &GetKPCR(context)->prcb_data) { + unsigned char unk = thread->was_preempted; + thread->was_preempted = 0; + insert_8009CFE0(context, thread, unk); + } else { + thread->thread_state = KTHREAD_STATE_UNKNOWN; + auto kpcr = GetKPCR(context); + + thread->ready_prcb_entry.flink_ptr = + kpcr->prcb_data.enqueued_threads_list.next; + + kpcr->prcb_data.enqueued_threads_list.next = + context->HostToGuestVirtual(&thread->ready_prcb_entry); + kpcr->generic_software_interrupt = 2; + } +} +/* + performs bitscanning on the bitmask of available thread priorities to + select the first runnable one that is greater than or equal to the prio arg +*/ +XE_COMPARISON_NOINLINE +static X_KTHREAD* xeScanForReadyThread(PPCContext* context, X_KPRCB* prcb, + int priority) { + SCHEDLOG(context, "xeScanForReadyThread - prcb = {}, priority = {}", + (void*)prcb, priority); + unsigned v3 = prcb->has_ready_thread_by_priority; + if ((prcb->unk_mask_64 & ~((1 << priority) - 1) & v3) == 0) { + return nullptr; + } + unsigned int v4 = xe::lzcnt(prcb->unk_mask_64 & ~((1 << priority) - 1) & v3); + char v5 = 31 - v4; + + auto result = prcb->ready_threads_by_priority[31 - v4].HeadObject(context); + + uint32_t v7 = result->ready_prcb_entry.flink_ptr; + uint32_t v8 = result->ready_prcb_entry.blink_ptr; + context->TranslateVirtual(v8)->flink_ptr = v7; + context->TranslateVirtual(v7)->blink_ptr = v8; + if (v8 == v7) { + prcb->has_ready_thread_by_priority &= ~(1 << v5); + } + return result; +} + +void HandleCpuThreadDisownedIPI(void* ud) { + // xenia_assert(false); + // this is incorrect + // xeHandleDPCsAndThreadSwapping(cpu::ThreadState::GetContext(), false); + auto context = cpu::ThreadState::GetContext(); + // hack!!! don't know what the ipi that the kernel sends actually does + + auto kpcr = GetKPCR(context); + KernelState::HWThreadFor(context)->interrupt_controller()->SetEOI(1); + GetKPCR(context)->generic_software_interrupt = 2; +} + +void xeReallyQueueThread(PPCContext* context, X_KTHREAD* kthread) { + SCHEDLOG(context, "xeReallyQueueThread - kthread = {}", (void*)kthread); + auto prcb_for_thread = context->TranslateVirtual(kthread->a_prcb_ptr); + xboxkrnl::xeKeKfAcquireSpinLock( + context, &prcb_for_thread->enqueued_processor_threads_lock, false); + + auto thread_priority = kthread->priority; + auto was_preempted = kthread->was_preempted; + kthread->was_preempted = 0; + if ((prcb_for_thread->unk_mask_64 & (1 << thread_priority)) == 0) { + insert_8009CFE0(context, kthread, was_preempted); + xboxkrnl::xeKeKfReleaseSpinLock( + context, &prcb_for_thread->enqueued_processor_threads_lock, 0, false); + return; + } + + if (prcb_for_thread->running_idle_thread != 0) { + xenia_assert(prcb_for_thread->running_idle_thread.m_ptr == + prcb_for_thread->idle_thread.m_ptr); + + prcb_for_thread->running_idle_thread = 0U; + label_6: + kthread->thread_state = KTHREAD_STATE_STANDBY; + prcb_for_thread->next_thread = context->HostToGuestVirtual(kthread); + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &prcb_for_thread->enqueued_processor_threads_lock, 0, false); + + uint32_t old_cpu_for_thread = kthread->current_cpu; + if (old_cpu_for_thread != GetKPCR(context)->prcb_data.current_cpu) { + /* + do a non-blocking host IPI here. we need to be sure the original cpu + this thread belonged to has given it up before we continue + */ + cpu::SendInterruptArguments interrupt_args; + interrupt_args.ipi_func = HandleCpuThreadDisownedIPI; + interrupt_args.ud = reinterpret_cast(kthread); + interrupt_args.irql_ = 0; //randomly chosen irql + interrupt_args.wait_done = false; + context->processor->GetCPUThread(old_cpu_for_thread) + ->SendGuestIPI(interrupt_args); + } + return; + } + + X_KTHREAD* next_thread = + context->TranslateVirtual(prcb_for_thread->next_thread); + + if (!prcb_for_thread->next_thread) { + if (thread_priority > + context->TranslateVirtual(prcb_for_thread->current_thread)->priority) { + context->TranslateVirtual(prcb_for_thread->current_thread) + ->was_preempted = 1; + goto label_6; + } + insert_8009CFE0(context, kthread, was_preempted); + xboxkrnl::xeKeKfReleaseSpinLock( + context, &prcb_for_thread->enqueued_processor_threads_lock, 0, false); + return; + } + + kthread->thread_state = KTHREAD_STATE_STANDBY; + + prcb_for_thread->next_thread = context->HostToGuestVirtual(kthread); + uint32_t v10 = next_thread->priority; + auto v11 = context->TranslateVirtual(next_thread->a_prcb_ptr); + + next_thread->thread_state = KTHREAD_STATE_READY; + v11->ready_threads_by_priority[v10].InsertHead(next_thread, context); + + v11->has_ready_thread_by_priority |= (1 << v10); + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &prcb_for_thread->enqueued_processor_threads_lock, 0, false); +} + +static void xeProcessQueuedThreads(PPCContext* context, + bool under_dispatcher_lock) { + SCHEDLOG(context, "xeProcessQueuedThreads - under_dispatcher_lock {}", + under_dispatcher_lock); + auto kernel = context->kernel_state; + + if (under_dispatcher_lock) { + kernel->AssertDispatcherLocked(context); + } else { + kernel->LockDispatcherAtIrql(context); + } + + uint32_t first_ready_thread = + GetKPCR(context)->prcb_data.enqueued_threads_list.next; + + GetKPCR(context)->prcb_data.enqueued_threads_list.next = 0; + + while (first_ready_thread) { + ready_thread_pointer_t ready_thread = + context->TranslateVirtual(first_ready_thread); + first_ready_thread = ready_thread->flink_ptr; + // xeEnqueueThreadPostWait sets it to 6 + xenia_assert(ready_thread.GetAdjacent()->thread_state == KTHREAD_STATE_UNKNOWN); + + uint32_t prcb = + static_cast(context->r[13]) + offsetof(X_KPCR, prcb_data); + + auto adj = ready_thread.GetAdjacent(); + if (adj->a_prcb_ptr == prcb && adj->another_prcb_ptr != prcb) { + xeHandleReadyThreadOnDifferentProcessor(context, adj); + } + xeReallyQueueThread(context, adj); + } + + kernel->AssertDispatcherLocked(context); + if (!under_dispatcher_lock) { + kernel->UnlockDispatcherAtIrql(context); + } +} + +X_KTHREAD* xeSelectThreadDueToTimesliceExpiration(PPCContext* context) { + SCHEDLOG(context, "xeSelectThreadDueToTimesliceExpiration"); + auto pcr = GetKPCR(context); + auto prcb = &pcr->prcb_data; + + auto list_lock = &prcb->enqueued_processor_threads_lock; + + xboxkrnl::xeKeKfAcquireSpinLock(context, list_lock, false); + + auto current_thread = context->TranslateVirtual(prcb->current_thread); + + if (current_thread->quantum <= 0) { + auto current_process = current_thread->process; + if (current_process->unk_1B && current_thread->priority >= 0x12u) { + current_thread->quantum = 0x7FFFFFFF; + } else { + auto current_prio = current_thread->priority; + current_thread->quantum = current_process->quantum; + if ((unsigned int)current_prio < 0x12) { + current_prio = current_prio - current_thread->unk_BA - 1; + if (current_prio < current_thread->unk_B9) { + current_prio = current_thread->unk_B9; + } + current_thread->unk_BA = 0; + } + current_thread->priority = current_prio; + if (prcb->next_thread) { + current_thread->was_preempted = 0; + } else { + auto v7 = xeScanForReadyThread(context, prcb, current_prio); + if (v7) { + v7->thread_state = KTHREAD_STATE_STANDBY; + prcb->next_thread = v7; + } + } + } + } + uint32_t unk_mask; + + if (pcr->background_scheduling_1A) { + pcr->background_scheduling_1A = 0; + pcr->background_scheduling_1B = 0; + pcr->background_scheduling_active = true; + + uint32_t cpunum = context->kernel_state->GetPCRCpuNum(pcr); + auto hw_thread = context->processor->GetCPUThread(cpunum); + // todo: this is a variable that isnt in rdata, it might be modified by + // other things, so this timeout might not be 100% accurate + hw_thread->SetDecrementerTicks(50000); + + unk_mask = 0xEDB403FF; + } else { + if (!pcr->background_scheduling_1B) { + + auto result = context->TranslateVirtual(prcb->next_thread); + if (result) { + // not releasing the spinlock! this appears to be intentional + return result; + } + xboxkrnl::xeKeKfReleaseSpinLock(context, list_lock, 0, false); + return nullptr; + } + pcr->background_scheduling_1B = 0; + pcr->background_scheduling_active = false; + unk_mask = 0xF6DBFC03; + } + X_KTHREAD* v12; + if (prcb->unk_mask_64 != unk_mask) { + auto next_thread = prcb->next_thread.xlat(); + prcb->unk_mask_64 = unk_mask; + if (next_thread) { + prcb->next_thread = 0U; + insert_8009CFE0(context, next_thread, 1); + } + auto prcb_idle_thread = prcb->idle_thread.xlat(); + auto current_prio2 = current_thread->priority; + prcb->running_idle_thread = 0U; + if (current_thread == prcb_idle_thread || + ((1 << current_prio2) & unk_mask) == 0) { + v12 = xeScanForReadyThread(context, prcb, 0); + + if (!v12) { + v12 = prcb->idle_thread.xlat(); + prcb->running_idle_thread = v12; + } + if (v12 == current_thread) { + xboxkrnl::xeKeKfReleaseSpinLock(context, list_lock, 0, false); + return nullptr; + } + } else { + v12 = xeScanForReadyThread(context, prcb, current_prio2); + if (!v12) { + xboxkrnl::xeKeKfReleaseSpinLock(context, list_lock, 0, false); + + return nullptr; + } + } + v12->thread_state = KTHREAD_STATE_STANDBY; + prcb->next_thread = v12; + } + + auto v13 = prcb->next_thread; + if (v13) { + return v13.xlat(); + } + xboxkrnl::xeKeKfReleaseSpinLock(context, list_lock, 0, false); + + return nullptr; +} + +// handles DPCS, also switches threads? +// timer related? +void xeHandleDPCsAndThreadSwapping(PPCContext* context, bool from_idle_loop) { + SCHEDLOG(context, "xeHandleDPCsAndThreadSwapping"); + + X_KTHREAD* next_thread = nullptr; + while (true) { + set_msr_interrupt_bits(context, 0); + + GetKPCR(context)->generic_software_interrupt = 0; + if (!GetKPCR(context)->prcb_data.queued_dpcs_list_head.empty(context) || + GetKPCR(context)->timer_pending) { + // todo: incomplete! + if (from_idle_loop) { + xeExecuteDPCList2(context, GetKPCR(context)->timer_pending, + GetKPCR(context)->prcb_data.queued_dpcs_list_head, 0); + } else { + uint32_t altstack = GetKPCR(context)->use_alternative_stack; + + xenia_assert(altstack == 0); + + uint32_t r4 = GetKPCR(context)->alt_stack_base_ptr; + GetKPCR(context)->use_alternative_stack = + static_cast(context->r[1]); + /* + addi r4, r4, -320 + subf r4, r1, r4 + addi r5, r1, 0xF0 + stwux r5, r1, r4 + */ + + SCHEDLOG(context, + "xeHandleDPCsAndThreadSwapping - entering xeExecuteDPCList2"); + xeExecuteDPCList2(context, GetKPCR(context)->timer_pending, + GetKPCR(context)->prcb_data.queued_dpcs_list_head, 0); + GetKPCR(context)->use_alternative_stack = 0; + } + } + set_msr_interrupt_bits(context, 0xFFFF8000); + + if (GetKPCR(context)->prcb_data.enqueued_threads_list.next) { + SCHEDLOG(context, + "xeHandleDPCsAndThreadSwapping - entering " + "xeProcessQueuedThreads"); + xeProcessQueuedThreads(context, false); + } + + if (GetKPCR(context)->timeslice_ended) { + GetKPCR(context)->timeslice_ended = 0; + next_thread = xeSelectThreadDueToTimesliceExpiration(context); + + if (!next_thread) { + return; + } + + break; + } + // failed to select a thread to switch to + if (!GetKPCR(context)->prcb_data.next_thread) { + return; + } + xboxkrnl::xeKeKfAcquireSpinLock( + context, &GetKPCR(context)->prcb_data.enqueued_processor_threads_lock, + false); + // some kind of lock acquire function here?? + + uint32_t thrd_u = GetKPCR(context)->prcb_data.next_thread.m_ptr; + + if (from_idle_loop && thrd_u == GetKPCR(context)->prcb_data.idle_thread) { + GetKPCR(context)->prcb_data.next_thread = 0U; + xboxkrnl::xeKeKfReleaseSpinLock( + context, &GetKPCR(context)->prcb_data.enqueued_processor_threads_lock, + 0, false); + return; + } + + if (!thrd_u) { + next_thread = nullptr; + } else { + next_thread = context->TranslateVirtual(thrd_u); + break; + } + } + SCHEDLOG(context, "xeHandleDPCsAndThreadSwapping - Got a new next thread"); + // requeue ourselves + // GetKPCR(context)->prcb_data.current_thread + auto& prcb = GetKPCR(context)->prcb_data; + auto ble = context->TranslateVirtual(prcb.current_thread); + prcb.next_thread = 0U; + prcb.current_thread = context->HostToGuestVirtual(next_thread); + // idle loop does not reinsert itself! + if (!from_idle_loop) { + insert_8009D048(context, ble); + } + context->kernel_state->ContextSwitch(context, next_thread, from_idle_loop); +} + +void xeEnqueueThreadPostWait(PPCContext* context, X_KTHREAD* thread, + X_STATUS wait_result, int priority_increment) { + SCHEDLOG(context, + "xeEnqueueThreadPostWait - thread {}, wait_result {:04X}, " + "priority_increment {}", + (void*)thread, wait_result, priority_increment); + xenia_assert(thread->thread_state == KTHREAD_STATE_WAITING); + thread->wait_result = thread->wait_result | wait_result; + auto kpcr = GetKPCR(context); + xenia_assert(kpcr->current_irql == IRQL_DISPATCH); + + X_KWAIT_BLOCK* wait_blocks = context->TranslateVirtual(thread->wait_blocks); + do { + uint32_t v7 = wait_blocks->wait_list_entry.flink_ptr; + uint32_t v8 = wait_blocks->wait_list_entry.blink_ptr; + + context->TranslateVirtual(v8)->flink_ptr = + wait_blocks->wait_list_entry.flink_ptr; + context->TranslateVirtual(v7)->blink_ptr = v8; + wait_blocks = context->TranslateVirtual(wait_blocks->next_wait_block); + } while (wait_blocks != context->TranslateVirtual(thread->wait_blocks)); + + // wait is over, so cancel the timeout timer + if (thread->wait_timeout_timer.header.inserted) { + thread->wait_timeout_timer.header.inserted = 0; + util::XeRemoveEntryList(&thread->wait_timeout_timer.table_bucket_entry, + context); + thread->wait_timeout_timer.table_bucket_entry.flink_ptr = 0U; + thread->wait_timeout_timer.table_bucket_entry.blink_ptr = 0U; + } + auto unk_ptr = thread->queue; + if (unk_ptr) { + auto unk_counter = context->TranslateVirtualBE(unk_ptr + 0x18); + *unk_counter++; + } + + auto prcb = context->TranslateVirtual(thread->a_prcb_ptr); + + xboxkrnl::xeKeKfAcquireSpinLock( + context, &prcb->enqueued_processor_threads_lock, false); + + auto thread_priority = thread->priority; + auto thread_process = thread->process; + if (thread_priority >= 0x12) { + thread->quantum = thread_process->quantum; + + } else { + auto v15 = thread->unk_BA; + if (!v15 && !thread->boost_disabled) { + auto v16 = thread->unk_B9 + priority_increment; + if (v16 > (int)thread_priority) { + if (v16 < thread->unk_CA) + thread->priority = v16; + else + thread->priority = thread->unk_CA; + } + } + auto v17 = thread->unk_B9; + if (v17 >= (unsigned int)thread->unk_CA) { + thread->quantum = thread_process->quantum; + } else { + auto v18 = thread->quantum - 10; + thread->quantum = v18; + if (v18 <= 0) { + auto v19 = (unsigned char)(thread->priority - v15 - 1); + thread->quantum = thread_process->quantum; + + thread->priority = v19; + if (v19 < v17) { + thread->priority = v17; + } + thread->unk_BA = 0; + } + } + } + thread->thread_state = KTHREAD_STATE_UNKNOWN; + +#if 0 + thread->ready_prcb_entry.flink_ptr = prcb->enqueued_threads_list.next; + + prcb->enqueued_threads_list.next = + context->HostToGuestVirtual(&thread->ready_prcb_entry); +#else + thread->ready_prcb_entry.flink_ptr = + GetKPCR(context)->prcb_data.enqueued_threads_list.next; + GetKPCR(context)->prcb_data.enqueued_threads_list.next = + context->HostToGuestVirtual(&thread->ready_prcb_entry); +#endif + xboxkrnl::xeKeKfReleaseSpinLock( + context, &prcb->enqueued_processor_threads_lock, 0, false); +} + +static void xeSignalDispatcher(PPCContext* context, X_DISPATCH_HEADER* entry, + X_KTHREAD* thread_for_wait) { + auto current_wait_object = entry; + int current_object_type = current_wait_object->type; + if ((current_object_type & 7) == 1) { + current_wait_object->signal_state = 0; + } else if (current_object_type == 5) { // semaphore + --current_wait_object->signal_state; + } else if (current_object_type == 2) { // mutant + int new_signalstate = current_wait_object->signal_state - 1; + current_wait_object->signal_state = new_signalstate; + if (!new_signalstate) { + X_KMUTANT* mutant = reinterpret_cast(current_wait_object); + auto v6 = mutant->abandoned; + mutant->owner = context->HostToGuestVirtual(thread_for_wait); + if (v6 == 1) { + mutant->abandoned = 0; + thread_for_wait->wait_result = X_STATUS_ABANDONED_WAIT_0; + } + + // clearly inserthead or tail, determine which and clean this up + uint32_t v7 = thread_for_wait->mutants_list.blink_ptr; + auto v7ptr = context->TranslateVirtual(v7); + uint32_t v8 = v7ptr->flink_ptr; + + auto v8ptr = context->TranslateVirtual(v8); + mutant->unk_list.blink_ptr = v7; + mutant->unk_list.flink_ptr = v8; + auto unk_list = context->HostToGuestVirtual(&mutant->unk_list); + v8ptr->blink_ptr = unk_list; + v7ptr->flink_ptr = unk_list; + } + } +} + +void xeHandleWaitTypeAll(PPCContext* context, X_KWAIT_BLOCK* block) { + auto thread_for_wait = context->TranslateVirtual(block->thread); + auto current_waitblock = block; + do { + if (current_waitblock->wait_result_xstatus != X_STATUS_TIMEOUT) { + xeSignalDispatcher(context, + context->TranslateVirtual(current_waitblock->object), + thread_for_wait); + } + current_waitblock = + context->TranslateVirtual(current_waitblock->next_wait_block); + } while (current_waitblock != block); +} +void xeDispatchSignalStateChange(PPCContext* context, X_DISPATCH_HEADER* header, + int increment) { + SCHEDLOG(context, "xeDispatchSignalStateChange - header {}, increment = {}", + (void*)header, increment); + auto waitlist_head = &header->wait_list; + + // hack! + // happens in marathon durandal. todo: figure out why + if (waitlist_head->blink_ptr == 0 && waitlist_head->flink_ptr == 0) { + return; + } + + for (X_KWAIT_BLOCK* i = context->TranslateVirtual( + header->wait_list.flink_ptr); + static_cast(header->signal_state) > 0; + i = context->TranslateVirtual( + i->wait_list_entry.flink_ptr)) { + if ((X_LIST_ENTRY*)i == waitlist_head) { + break; + } + + auto v6 = i; + auto v7 = context->TranslateVirtual(i->thread); + if (i->wait_type == WAIT_ANY) { + xeSignalDispatcher(context, header, v7); + } else { + for (X_KWAIT_BLOCK* j = context->TranslateVirtual(i->next_wait_block); + j != i; j = context->TranslateVirtual(j->next_wait_block)) { + if (j->wait_result_xstatus != X_STATUS_TIMEOUT) { + auto v9 = context->TranslateVirtual(j->object); + + if ((v9->type != 2 || v9->signal_state > 0 || + v7 != context->TranslateVirtual( + reinterpret_cast(v9)->owner)) && + v9->signal_state <= 0) { + goto LABEL_23; + } + } + } + i = context->TranslateVirtual( + i->wait_list_entry.blink_ptr); + xeHandleWaitTypeAll(context, v6); + } + xeEnqueueThreadPostWait(context, v7, v6->wait_result_xstatus, increment); + LABEL_23:; + } +} + +X_STATUS xeNtYieldExecution(PPCContext* context) { + SCHEDLOG(context, "xeNtYieldExecution"); + X_STATUS result; + auto kpcr = GetKPCR(context); + auto v1 = context->TranslateVirtual(kpcr->prcb_data.current_thread); + auto old_irql = kpcr->current_irql; + kpcr->current_irql = IRQL_DISPATCH; + + v1->wait_irql = old_irql; + auto v2 = &kpcr->prcb_data; + xboxkrnl::xeKeKfAcquireSpinLock(context, &v2->enqueued_processor_threads_lock, + false); + X_KTHREAD* next_thread = context->TranslateVirtual(v2->next_thread); + if (!next_thread) { + next_thread = xeScanForReadyThread(context, v2, 1); + v2->next_thread = context->HostToGuestVirtual(next_thread); + } + if (next_thread) { + v1->quantum = v1->process->quantum; + int v4 = v1->priority; + if ((unsigned int)v4 < 0x12) { + v4 = v4 - v1->unk_BA - 1; + if (v4 < v1->unk_B9) { + v4 = v1->unk_B9; + } + v1->unk_BA = 0; + } + v1->priority = v4; + insert_8009D048(context, v1); + xeSchedulerSwitchThread(context); + + result = X_STATUS_SUCCESS; + } else { + xboxkrnl::xeKeKfReleaseSpinLock( + context, &v2->enqueued_processor_threads_lock, 0, false); + xboxkrnl::xeKfLowerIrql(context, v1->wait_irql); + result = X_STATUS_NO_YIELD_PERFORMED; + } + return result; +} +XE_COMPARISON_NOINLINE +void scheduler_80097F90(PPCContext* context, X_KTHREAD* thread) { + SCHEDLOG(context, "scheduler_80097F90 - thread {}", (void*)thread); + auto pcrb = &GetKPCR(context)->prcb_data; + + xboxkrnl::xeKeKfAcquireSpinLock( + context, &pcrb->enqueued_processor_threads_lock, false); + + unsigned int priority = thread->priority; + if (priority < 0x12) { + unsigned int v6 = thread->unk_B9; + if (v6 < thread->unk_CA) { + int v7 = thread->quantum - 10; + thread->quantum = v7; + if (v7 <= 0) { + thread->quantum = thread->process->quantum; + int v8 = priority - thread->unk_BA - 1; + if (v8 < (int)v6) { + v8 = v6; + } + thread->priority = v8; + thread->unk_BA = 0; + if (pcrb->next_thread) { + thread->was_preempted = 0; + } else { + X_KTHREAD* v9 = xeScanForReadyThread(context, pcrb, v8); + if (v9) { + v9->thread_state = KTHREAD_STATE_STANDBY; + pcrb->next_thread = v9; + } + } + } + } + } + xeDispatcherSpinlockUnlock(context, &pcrb->enqueued_processor_threads_lock, + thread->wait_irql); +} +XE_COMPARISON_NOINLINE +X_STATUS xeSchedulerSwitchThread(PPCContext* context) { + SCHEDLOG(context, "xeSchedulerSwitchThread"); + auto pcr = GetKPCR(context); + auto prcb = &pcr->prcb_data; + + auto current_thread = prcb->current_thread; + auto next_thread = prcb->next_thread.xlat(); + + if (next_thread) { + } else { + unsigned int ready_by_prio = prcb->has_ready_thread_by_priority; + int has_ready = ready_by_prio & prcb->unk_mask_64; + if (has_ready) { + unsigned int v5 = 31 - xe::lzcnt(has_ready); + auto v6 = &prcb->ready_threads_by_priority[v5]; + + // if the list has a bit set in the mask, it definitely should have an + // entry + xenia_assert(!v6->empty(context)); + + int v8 = ready_by_prio ^ (1 << v5); + next_thread = v6->UnlinkHeadObject(context); + + if (v6->empty(context)) { + // list is empty now, update mask + prcb->has_ready_thread_by_priority = v8; + } + } else { + unsigned i = 0; + for (auto&& thrdlist : prcb->ready_threads_by_priority) { + if (prcb->unk_mask_64 & (1U << i)) { + xenia_assert(thrdlist.empty(context)); + } + ++i; + } + } + } + + if (next_thread) { + prcb->next_thread = 0U; + } else { + // idle thread + auto idle_thread = + &reinterpret_cast(pcr)->idle_process_thread; + next_thread = idle_thread; + prcb->running_idle_thread = idle_thread; + } + + prcb->current_thread = next_thread; + auto result = context->kernel_state->ContextSwitch(context, next_thread); + pcr = GetKPCR(context); +#if 0 + auto v9 = next_thread->unk_A4; + auto result = next_thread->wait_result; + pcr->current_irql = v9; + auto v11 = pcr->software_interrupt_state; + + if (v9 < v11) { + xeDispatchProcedureCallInterrupt(v9, v11, context); + } +#endif + return result; +} + +/* + this function is quite confusing and likely wrong, probably was written in + asm + +*/ +XE_COMPARISON_NOINLINE +X_STATUS xeSchedulerSwitchThread2(PPCContext* context) { + SCHEDLOG(context, "xeSchedulerSwitchThread2"); +reenter: + auto pcr = GetKPCR(context); + auto prcb = &pcr->prcb_data; + if (prcb->enqueued_threads_list.next) { + xeProcessQueuedThreads(context, true); + goto reenter; + } + + // this is wrong! its doing something else here, + // some kind of "try lock" and then falling back to another function + // xboxkrnl::xeKeKfAcquireSpinLock( + // context, &prcb->enqueued_processor_threads_lock, false); + if (prcb->enqueued_processor_threads_lock.pcr_of_owner.value != 0) { + while (prcb->enqueued_processor_threads_lock.pcr_of_owner.value) { + // db16cyc + } + goto reenter; + } else { + xeKeKfAcquireSpinLock(context, &prcb->enqueued_processor_threads_lock, + false); + } + auto disp = context->kernel_state->GetDispatcherLock(context); + xenia_assert(disp->pcr_of_owner == static_cast(context->r[13])); + + disp->pcr_of_owner = 0; + return xeSchedulerSwitchThread(context); +} + +int xeKeSuspendThread(PPCContext* context, X_KTHREAD* thread) { + int result; + uint32_t old_irql = + xboxkrnl::xeKeKfAcquireSpinLock(context, &thread->apc_lock); + + result = thread->suspend_count; + if (result == 0x7F) { + xenia_assert(false); + // raise status here + } + + if (thread->may_queue_apcs) { + if (!thread->unk_CB) { + thread->suspend_count = result + 1; + if (!result) { + if (thread->on_suspend.enqueued) { + XELOGE("Just using suspend signal state decrement"); + context->kernel_state->LockDispatcherAtIrql(context); + thread->suspend_sema.header.signal_state--; + context->kernel_state->UnlockDispatcherAtIrql(context); + + } else { + XELOGE("Enqueuing suspendthread apc"); + thread->on_suspend.enqueued = 1; + xeKeInsertQueueApcHelper(context, &thread->on_suspend, 0); + } + } + } + } + + xeDispatcherSpinlockUnlock(context, &thread->apc_lock, old_irql); + return result; +} +int xeKeResumeThread(PPCContext* context, X_KTHREAD* thread) { + int result; + uint32_t old_irql = + xboxkrnl::xeKeKfAcquireSpinLock(context, &thread->apc_lock); + + char suspendcount = thread->suspend_count; + result = suspendcount; + if (suspendcount) { + thread->suspend_count = suspendcount - 1; + XELOGE("New suspendcount {}", (int)suspendcount - 1); + if (suspendcount == 1) { + XELOGE("Awaking for suspendcount"); + context->kernel_state->LockDispatcherAtIrql(context); + thread->suspend_sema.header.signal_state++; + xeDispatchSignalStateChange(context, &thread->suspend_sema.header, 0); + context->kernel_state->UnlockDispatcherAtIrql(context); + } + } + + xeDispatcherSpinlockUnlock(context, &thread->apc_lock, old_irql); + return result; +} + +void xeSuspendThreadApcRoutine(PPCContext* context) { + XELOGE("xeSuspendThreadApcRoutine called"); + auto thrd = GetKThread(context); + xeKeWaitForSingleObject(context, &thrd->suspend_sema.header, 2, 0, 0, 0); + XELOGE("xeSuspendThreadApcRoutine awoken"); +} + +X_STATUS xeKeWaitForSingleObject(PPCContext* context, X_DISPATCH_HEADER* object, + unsigned reason, unsigned processor_mode, + bool alertable, int64_t* timeout) { + int64_t tmp_timeout; + auto this_thread = GetKThread(context); + + uint32_t guest_stash = + context->HostToGuestVirtual(&this_thread->scratch_waitblock_memory); + + X_KWAIT_BLOCK* stash = &this_thread->scratch_waitblock_memory[0]; + + auto reason2 = reason; + if (this_thread->wait_next) + this_thread->wait_next = 0; + else { + this_thread->wait_irql = context->kernel_state->LockDispatcher(context); + } + + X_STATUS v14; + uint64_t v11 = 0; + auto v12 = timeout; + while (1) { + if (this_thread->deferred_apc_software_interrupt_state && + !this_thread->wait_irql) { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), 0); + goto LABEL_41; + } + this_thread->wait_result = 0; + + auto obj_signalstate = object->signal_state; + if (object->type == 2) { + X_KMUTANT* mutant = reinterpret_cast(object); + if (obj_signalstate > 0 || this_thread == mutant->owner.xlat()) { + if (obj_signalstate != 0x80000000) { + auto v20 = mutant->header.signal_state - 1; + mutant->header.signal_state = v20; + if (!v20) { + auto v21 = mutant->abandoned; + mutant->owner = this_thread; + if (v21 == 1) { + mutant->abandoned = 0; + this_thread->wait_result = 128; + } + auto v22 = this_thread->mutants_list.blink_ptr; + auto v23 = v22->flink_ptr; + mutant->unk_list.blink_ptr = v22; + mutant->unk_list.flink_ptr = v23; + v23->blink_ptr = &mutant->unk_list; + v22->flink_ptr = &mutant->unk_list; + } + v14 = this_thread->wait_result; + goto LABEL_57; + } + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + this_thread->wait_irql); + + // X_STATUS_MUTANT_LIMIT_EXCEEDED + // should raise status + xenia_assert(false); + } + goto LABEL_16; + } + if (obj_signalstate > 0) { + break; + } + + LABEL_16: + this_thread->wait_blocks = guest_stash; + stash->object = object; + stash->wait_result_xstatus = 0; + stash->wait_type = WAIT_ANY; + stash->thread = this_thread; + if (alertable) { + if (this_thread->alerted[processor_mode]) { + v14 = X_STATUS_ALERTED; + this_thread->alerted[processor_mode] = 0; + goto LABEL_55; + } + if (processor_mode && + !util::XeIsListEmpty(&this_thread->apc_lists[1], context)) { + this_thread->user_apc_pending = 1; + LABEL_54: + v14 = X_STATUS_USER_APC; + LABEL_55: + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + this_thread->wait_irql); + goto LABEL_58; + } + if (this_thread->alerted[0]) { + v14 = X_STATUS_ALERTED; + this_thread->alerted[0] = 0; + goto LABEL_55; + } + } else if (processor_mode && this_thread->user_apc_pending) { + goto LABEL_54; + } + if (timeout) { + if (!*timeout || + (stash->next_wait_block = &this_thread->wait_timeout_block, + this_thread->wait_timeout_timer.header.wait_list.flink_ptr = + &this_thread->wait_timeout_block.wait_list_entry, + this_thread->wait_timeout_timer.header.wait_list.blink_ptr = + &this_thread->wait_timeout_block.wait_list_entry, + this_thread->wait_timeout_block.next_wait_block = guest_stash, + !xboxkrnl::XeInsertGlobalTimer( + context, &this_thread->wait_timeout_timer, *timeout))) { + v14 = X_STATUS_TIMEOUT; + goto LABEL_57; + } + v11 = this_thread->wait_timeout_timer.due_time; + } else { + stash->next_wait_block = guest_stash; + } + auto v15 = object->wait_list.blink_ptr; + stash->wait_list_entry.flink_ptr = &object->wait_list; + stash->wait_list_entry.blink_ptr = v15; + v15->flink_ptr = guest_stash; + object->wait_list.blink_ptr = guest_stash; + + uint32_t v16 = this_thread->queue; + if (v16) { + xeKeSignalQueue(context, context->TranslateVirtual(v16)); + } + + auto v17 = (unsigned __int8)this_thread->wait_irql; + this_thread->alertable = alertable; + this_thread->processor_mode = processor_mode; + this_thread->wait_reason = reason2; + this_thread->thread_state = KTHREAD_STATE_WAITING; + v14 = xeSchedulerSwitchThread2(context); + + if (v14 == X_STATUS_USER_APC) { + xeProcessUserApcs(context); + } + if (v14 != X_STATUS_KERNEL_APC) { + return v14; + } + if (timeout) { + if (*timeout < 0) { + tmp_timeout = + context->kernel_state->GetKernelInterruptTime() - *timeout; + timeout = &tmp_timeout; + } else { + timeout = v12; + } + } + LABEL_41: + this_thread->wait_irql = context->kernel_state->LockDispatcher(context); + } + auto obj_type = object->type; + if ((obj_type & 7) == 1) { + object->signal_state = 0; + } else if (obj_type == 5) { + --object->signal_state; + } + v14 = X_STATUS_SUCCESS; +LABEL_57: + context->kernel_state->UnlockDispatcherAtIrql(context); + scheduler_80097F90(context, this_thread); +LABEL_58: + if (v14 == X_STATUS_USER_APC) { + xeProcessUserApcs(context); + } + return v14; +} + +void xeKeSetAffinityThread(PPCContext* context, X_KTHREAD* thread, + uint32_t affinity, uint32_t* prev_affinity) { + uint32_t irql = context->kernel_state->LockDispatcher(context); + auto old_cpu = thread->current_cpu; + uint32_t affinity_to_cpu = 31 - xe::lzcnt(affinity); + if (old_cpu != affinity_to_cpu) { + thread->another_prcb_ptr = + &context->kernel_state->KPCRPageForCpuNumber(affinity_to_cpu) + ->pcr.prcb_data; + + if (old_cpu == GetKPCR(context)->prcb_data.current_cpu) { + if (thread->thread_state != KTHREAD_STATE_UNKNOWN) { + xeHandleReadyThreadOnDifferentProcessor(context, thread); + } + } else { + // todo: args are undefined in ida! find out why + xeKeInsertQueueDpc(&thread->a_prcb_ptr->switch_thread_processor_dpc, 0, 0, + context); + } + } + + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), irql); + *prev_affinity = 1U << old_cpu; +} +void xeKeSetPriorityClassThread(PPCContext* context, X_KTHREAD* thread, + bool a2) { + SCHEDLOG(context, "xeKeSetPriorityClassThread - thread {}, a2 {}", + (void*)thread, a2); + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + xboxkrnl::xeKeKfAcquireSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, false); + + auto v8 = thread->unk_C9; + auto v9 = thread->unk_C9; + auto v10 = a2 == 0 ? 5 : 13; + char v11 = v10 - v9; + if (v10 != v9) { + auto v12 = thread->priority; + thread->unk_C9 = v10; + auto v13 = thread->unk_C8 + v11; + auto v14 = thread->unk_B9 + v11; + auto v15 = thread->unk_CA + v11; + thread->unk_C8 = v13; + thread->unk_B9 = v14; + thread->unk_CA = v15; + if (v12 < 0x12) { + auto v16 = thread->process; + thread->unk_BA = 0; + thread->quantum = v16->quantum; + xeKeChangeThreadPriority(context, thread, v14); + } + } + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, 0, false); + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); +} + +void xeKeChangeThreadPriority(PPCContext* context, X_KTHREAD* thread, + int priority) { + SCHEDLOG(context, "xeKeChangeThreadPriority - thread {}, a2 {}", + (void*)thread, priority); + int prio = thread->priority; + auto thread_prcb = thread->a_prcb_ptr; + + if (prio == priority) { + SCHEDLOG(context, "Skipping, priority is the same"); + return; + } + auto thread_state = thread->thread_state; + thread->priority = priority; + + // todo: lzcnt & 0x20 is just a zero test + bool v7 = (xe::lzcnt(thread_prcb->unk_mask_64 & (1 << priority)) & 0x20) == 0; + X_KTHREAD* new_next_thread; + switch (thread_state) { + case KTHREAD_STATE_READY: { + auto v17 = &thread->ready_prcb_entry; + auto v18 = thread->ready_prcb_entry.flink_ptr; + auto v19 = thread->ready_prcb_entry.blink_ptr; + v19->flink_ptr = v18; + v18->blink_ptr = v19; + if (v19 == v18) { + thread_prcb->has_ready_thread_by_priority = + thread_prcb->has_ready_thread_by_priority & (~(1 << prio)); + } + thread->thread_state = KTHREAD_STATE_UNKNOWN; + auto kpcr = GetKPCR(context); + v17->flink_ptr = kpcr->prcb_data.enqueued_threads_list.next; + kpcr->prcb_data.enqueued_threads_list.next = + context->HostToGuestVirtual(v17); + break; + } + case KTHREAD_STATE_RUNNING: { + if (thread_prcb->next_thread) { + return; + } + if (!v7) { + goto LABEL_9; + } + if (priority < prio) { + new_next_thread = + xeScanForReadyThread(context, thread_prcb.xlat(), priority); + if (new_next_thread) { + new_next_thread->thread_state = KTHREAD_STATE_STANDBY; + thread_prcb->next_thread = new_next_thread; + return; + } + } + break; + } + case KTHREAD_STATE_STANDBY: { + if (!v7) { + thread->thread_state = KTHREAD_STATE_READY; + auto v8 = &thread_prcb->ready_threads_by_priority[priority]; + auto v9 = v8->flink_ptr; + thread->ready_prcb_entry.blink_ptr = v8; + thread->ready_prcb_entry.flink_ptr = v9; + v9->blink_ptr = &thread->ready_prcb_entry; + v8->flink_ptr = &thread->ready_prcb_entry; + thread_prcb->has_ready_thread_by_priority = + (1 << priority) | thread_prcb->has_ready_thread_by_priority; + LABEL_9: + new_next_thread = xeScanForReadyThread(context, thread_prcb.xlat(), 0); + if (!new_next_thread) { + new_next_thread = thread_prcb->idle_thread.xlat(); + thread_prcb->running_idle_thread = new_next_thread; + } + new_next_thread->thread_state = KTHREAD_STATE_STANDBY; + thread_prcb->next_thread = new_next_thread; + return; + } + if (priority < prio) { + auto v11 = xeScanForReadyThread(context, thread_prcb.xlat(), priority); + if (v11) { + v11->thread_state = KTHREAD_STATE_STANDBY; + thread_prcb->next_thread = v11; + int v12 = thread->priority; + auto v13 = thread->a_prcb_ptr; + thread->thread_state = KTHREAD_STATE_READY; + int v14 = 1 << v12; + auto v15 = &v13->ready_threads_by_priority[v12]; + auto v16 = v15->flink_ptr; + thread->ready_prcb_entry.blink_ptr = v15; + thread->ready_prcb_entry.flink_ptr = v16; + v16->blink_ptr = &thread->ready_prcb_entry; + v15->flink_ptr = &thread->ready_prcb_entry; + v13->has_ready_thread_by_priority = + v13->has_ready_thread_by_priority | v14; + } + } + break; + } + default: + return; + } +} + +X_STATUS xeKeDelayExecutionThread(PPCContext* context, char mode, + bool alertable, int64_t* interval) { + auto thread = GetKThread(context); + + int64_t v6 = *interval; + if (thread->wait_next) + thread->wait_next = 0; + else + thread->wait_irql = context->kernel_state->LockDispatcher(context); + auto v7 = v6; + X_STATUS result; + while (1) { + if (thread->deferred_apc_software_interrupt_state && !thread->wait_irql) { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), 0); + goto LABEL_28; + } + if (alertable) { + if (thread->alerted[mode]) { + result = X_STATUS_ALERTED; + thread->alerted[mode] = 0; + goto LABEL_32; + } + if (mode && !thread->apc_lists[1].empty(context)) { + thread->user_apc_pending = 1; + LABEL_31: + result = X_STATUS_USER_APC; + LABEL_32: + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + thread->wait_irql); + if (result == X_STATUS_USER_APC) { + xboxkrnl::xeProcessUserApcs(context); + } + return result; + } + if (thread->alerted[0]) { + result = X_STATUS_ALERTED; + thread->alerted[0] = 0; + goto LABEL_32; + } + } else if (mode && thread->user_apc_pending) { + goto LABEL_31; + } + thread->wait_result = 0; + thread->wait_blocks = &thread->wait_timeout_block; + thread->wait_timeout_block.next_wait_block = &thread->wait_timeout_block; + + thread->wait_timeout_timer.header.wait_list.flink_ptr = + &thread->wait_timeout_block.wait_list_entry; + + thread->wait_timeout_timer.header.wait_list.blink_ptr = + &thread->wait_timeout_block.wait_list_entry; + + if (!XeInsertGlobalTimer(context, &thread->wait_timeout_timer, v6)) { + break; + } + uint32_t v9 = thread->queue; + v6 = thread->wait_timeout_timer.due_time; + if (v9) { + xeKeSignalQueue(context, context->TranslateVirtual(v9)); + } + thread->alertable = alertable; + thread->processor_mode = mode; + thread->wait_reason = 1; + thread->thread_state = KTHREAD_STATE_WAITING; + + result = xeSchedulerSwitchThread2(context); + if (result == X_STATUS_USER_APC) { + xeProcessUserApcs(context); + } + if (result != X_STATUS_KERNEL_APC) { + if (result == X_STATUS_TIMEOUT) { + result = X_STATUS_SUCCESS; + } + return result; + } + // this part is a bit fucked up, not sure this is right + if (v7 < 0) { + v6 = static_cast( + context->kernel_state->GetKernelInterruptTime()) - + v6; + + } else { + v6 = v7; + } + + LABEL_28: + thread->wait_irql = context->kernel_state->LockDispatcher(context); + } + if (v6) { + context->kernel_state->UnlockDispatcherAtIrql(context); + scheduler_80097F90(context, thread); + result = X_STATUS_SUCCESS; + } else { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + thread->wait_irql); + result = xeNtYieldExecution(context); + } + return result; +} + +int32_t xeKeSetBasePriorityThread(PPCContext* context, X_KTHREAD* thread, + int increment) { + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + xboxkrnl::xeKeKfAcquireSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, false); + + int v8 = thread->unk_C9; + int v9 = thread->unk_B9; + int result = v9 - v8; + if (thread->unk_B8) { + result = 16 * thread->unk_B8; + } + thread->unk_B8 = 0; + + if (std::abs(increment) >= 16) { + char v11 = 1; + if (increment <= 0) { + v11 = -1; + } + thread->unk_B8 = v11; + } + + int v12 = thread->unk_CA; + + int v13 = v8 + increment; + if (v8 + increment <= v12) { + if (v13 < thread->unk_C8) { + v13 = thread->unk_C8; + } + } else { + v13 = thread->unk_CA; + } + int v14; + if (thread->unk_B8) { + v14 = v13; + } else { + v14 = thread->priority - thread->unk_BA - v9 + v13; + if (v14 > v12) { + v14 = thread->unk_CA; + } + } + int v15 = thread->priority; + thread->unk_B9 = v13; + thread->unk_BA = 0; + if (v14 != v15) { + thread->quantum = thread->process->quantum; + xeKeChangeThreadPriority(context, thread, v14); + } + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, 0, false); + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return result; +} +uint32_t xeKeWaitForSingleObjectEx( + PPCContext* context, + ShiftedPointer wait, + unsigned char waitmode, bool alertable, int64_t* timeout) { + return xeKeWaitForSingleObject(context, + xeObGetWaitableObject(context, wait.m_base), 3, + waitmode, alertable, timeout); +} + +X_STATUS xeKeSignalAndWaitForSingleObjectEx( + PPCContext* context, + ShiftedPointer signal, + ShiftedPointer wait, + unsigned char mode, bool alertable, int64_t* timeout) { + X_DISPATCH_HEADER* waiter = xeObGetWaitableObject(context, wait.m_base); + + X_STATUS result = X_STATUS_SUCCESS; + auto signal_type_ptr = signal.GetAdjacent()->object_type_ptr; + auto globals = context->kernel_state->GetKernelGuestGlobals(); + + if (signal_type_ptr == + static_cast(globals + + offsetof(KernelGuestGlobals, ExEventObjectType))) { + xeKeSetEvent(context, reinterpret_cast(signal.m_base), 1, 1); + + } else if (signal_type_ptr == + static_cast( + globals + offsetof(KernelGuestGlobals, ExMutantObjectType))) { + xeKeReleaseMutant(context, reinterpret_cast(signal.m_base), 1, + 0, 1); + uint32_t cstatus = context->CatchStatus(); + if (cstatus) { + return cstatus; + } + + } else if (signal_type_ptr == + static_cast(globals + offsetof(KernelGuestGlobals, + ExSemaphoreObjectType))) { + xeKeReleaseSemaphore( + context, reinterpret_cast(signal.m_base), 1, 1, 1); + uint32_t cstatus = context->CatchStatus(); + if (cstatus) { + return cstatus; + } + } else { + result = X_STATUS_OBJECT_TYPE_MISMATCH; + } + if (result >= X_STATUS_SUCCESS) { + result = + xeKeWaitForSingleObject(context, waiter, 3, mode, alertable, timeout); + } + return result; +} + +int32_t xeKeQueryBasePriorityThread(PPCContext* context, X_KTHREAD* thread) { + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + char v4 = thread->unk_B8; + int v5 = thread->unk_B9 - thread->unk_C9; + if (v4) { + v5 = 16 * v4; + } + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return v5; +} + +X_STATUS xeKeWaitForMultipleObjects( + PPCContext* context, unsigned int num_objects, X_DISPATCH_HEADER** objects, + unsigned wait_type, unsigned reason, unsigned char mode, int alertable, + int64_t* timeout, X_KWAIT_BLOCK* wait_blocks) { + X_STATUS result; + auto thread = GetKThread(context); + if (thread->wait_next) + thread->wait_next = 0; + else { + thread->wait_irql = context->kernel_state->LockDispatcher(context); + } + unsigned int v21; + int64_t v43 = 0; + ShiftedPointer, X_KWAIT_BLOCK, 0x14> wait_blocks_shifted = + nullptr; + + int64_t v16 = 0; // v43; + auto v17 = context->TranslateVirtual(v43 >> 32); + auto timeout2 = timeout; + int v20; + int64_t other_timer_storage; + while (true) { + auto v19 = thread->deferred_apc_software_interrupt_state; + thread->wait_blocks = wait_blocks; + if (v19 && !thread->wait_irql) { + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), 0); + goto LABEL_60; + } + v20 = 1; + thread->wait_result = 0; + v21 = 0; // HIDWORD(v16); + if (num_objects) { + break; + } + LABEL_32: + if (wait_type == 0 && v20) { + v17->next_wait_block = wait_blocks; + xeHandleWaitTypeAll(context, v17); + result = thread->wait_result; + goto LABEL_75; + } + if (alertable) { + if (thread->alerted[mode]) { + result = X_STATUS_ALERTED; + thread->alerted[mode] = 0; + goto LABEL_73; + } + if (mode && !util::XeIsListEmpty(&thread->apc_lists[1], context)) { + thread->user_apc_pending = 1; + LABEL_72: + result = X_STATUS_USER_APC; + LABEL_73: + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + thread->wait_irql); + goto deliver_apc_and_return; + } + if (thread->alerted[0]) { + result = X_STATUS_ALERTED; + thread->alerted[0] = 0; + goto LABEL_73; + } + } else if (mode && thread->user_apc_pending) { + goto LABEL_72; + } + if (timeout) { + if (!*timeout || (v17->next_wait_block = &thread->wait_timeout_block, + v17 = &thread->wait_timeout_block, + thread->wait_timeout_timer.header.wait_list.flink_ptr = + &thread->wait_timeout_timer.header.wait_list, + thread->wait_timeout_timer.header.wait_list.blink_ptr = + &thread->wait_timeout_timer.header.wait_list, + + !XeInsertGlobalTimer( + context, &thread->wait_timeout_timer, *timeout))) { + result = X_STATUS_TIMEOUT; + goto LABEL_75; + } + v16 = thread->wait_timeout_timer.due_time; + } + v17->next_wait_block = wait_blocks; + v17 = wait_blocks; + do { + auto v32 = &v17->object->wait_list; + auto v33 = v17->object->wait_list.blink_ptr; + v17->wait_list_entry.flink_ptr = v32; + v17->wait_list_entry.blink_ptr = v33; + v33->flink_ptr = &v17->wait_list_entry; + v32->blink_ptr = &v17->wait_list_entry; + v17 = v17->next_wait_block.xlat(); + } while (v17 != wait_blocks); + + uint32_t v34 = thread->queue; + if (v34) { + xeKeSignalQueue(context, context->TranslateVirtual(v34)); + } + thread->alertable = alertable; + thread->processor_mode = mode; + thread->wait_reason = reason; + auto v35 = (unsigned char)thread->wait_irql; + thread->thread_state = KTHREAD_STATE_WAITING; + result = xeSchedulerSwitchThread2(context); + if (result == X_STATUS_USER_APC) { + xeProcessUserApcs(context); + } + if (result != X_STATUS_KERNEL_APC) { + return result; + } + if (timeout) { + if (timeout2 < 0) { + other_timer_storage = + context->kernel_state->GetKernelInterruptTime() - *timeout; + timeout2 = &other_timer_storage; + timeout = &other_timer_storage; + } else { + timeout = timeout2; + } + } + LABEL_60: + thread->wait_irql = context->kernel_state->LockDispatcher(context); + } + auto v22 = objects; + wait_blocks_shifted = &wait_blocks->wait_result_xstatus; + int obj_type; + // not actually X_KMUTANT, but it covers all the fields we need here + X_KMUTANT* v24; + while (1) { + v24 = (X_KMUTANT*)*v22; + + obj_type = v24->header.type; + if (wait_type == 1) { + break; + } + if (obj_type == 2) { + auto v29 = v24->owner; + if (thread != v29.xlat() || v24->header.signal_state != 0x80000000) { + if (v24->header.signal_state > 0 || thread == v29.xlat()) { + goto LABEL_31; + } + LABEL_30: + // todo: fix! + v20 = 0; // HIDWORD(v16); + goto LABEL_31; + } + goto LABEL_19; + } + if (v24->header.signal_state <= 0) { + goto LABEL_30; + } + LABEL_31: + auto v30 = v21; + ADJ(wait_blocks_shifted)->object = &v24->header; + ADJ(wait_blocks_shifted)->wait_type = wait_type; + ++v21; + ADJ(wait_blocks_shifted)->thread = thread; + v17 = ADJ(wait_blocks_shifted); + ++v22; + ADJ(wait_blocks_shifted)->wait_result_xstatus = v30; + ADJ(wait_blocks_shifted)->next_wait_block = ADJ(wait_blocks_shifted) + 1; + wait_blocks_shifted.m_base += 12; + if (v21 >= num_objects) { + goto LABEL_32; + } + } + bool is_mutant = obj_type == DISPATCHER_MUTANT; + int saved_signalstate = v24->header.signal_state; + if (is_mutant) { + if (saved_signalstate <= 0 && thread != v24->owner.xlat()) { + goto LABEL_31; + } + + if (saved_signalstate != 0x80000000) { + auto v38 = v24->header.signal_state - 1; + v24->header.signal_state = v38; + if (!v38) { + auto v39 = v24->abandoned; + v24->owner = thread; + if (v39 == 1) { + v24->abandoned = 0; /// BYTE3(v16); + thread->wait_result = 128; + } + auto v40 = thread->mutants_list.blink_ptr; + auto v41 = v40->flink_ptr; + v24->unk_list.blink_ptr = v40; + v24->unk_list.flink_ptr = v41; + v41->blink_ptr = &v24->unk_list; + v40->flink_ptr = &v24->unk_list; + } + result = thread->wait_result | v21; + goto LABEL_75; + } + LABEL_19: + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), + thread->wait_irql); + // RtlRaiseStatus(X_STATUS_MUTANT_LIMIT_EXCEEDED); + xenia_assert(false); + goto LABEL_31; + } + if (saved_signalstate <= 0) { + goto LABEL_31; + } + auto object_type = v24->header.type; + if ((object_type & 7) == 1) { + v24->header.signal_state = 0; + // HIDWORD(v16); + } else if (object_type == 5) { + --v24->header.signal_state; + } + result = v21; +LABEL_75: + context->kernel_state->UnlockDispatcherAtIrql(context); + scheduler_80097F90(context, thread); +deliver_apc_and_return: + if (result == X_STATUS_USER_APC) { + xeProcessUserApcs(context); + } + return result; +} + +int32_t xeKeSetDisableBoostThread(PPCContext* context, X_KTHREAD* thread, + char a2) { + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + xboxkrnl::xeKeKfAcquireSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, false); + + auto old_disable_boost = thread->boost_disabled; + + thread->boost_disabled = a2; + + xboxkrnl::xeKeKfReleaseSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, 0, false); + + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return old_disable_boost; +} + +int32_t xeKeSetPriorityThread(PPCContext* context, X_KTHREAD* thread, + int priority) { + uint32_t old_irql = context->kernel_state->LockDispatcher(context); + xboxkrnl::xeKeKfAcquireSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, false); + + auto old_priority = thread->priority; + auto v8 = thread->process->quantum; + thread->unk_BA = 0; + thread->quantum = v8; + xeKeChangeThreadPriority(context, thread, priority); + xboxkrnl::xeKeKfReleaseSpinLock( + context, &thread->a_prcb_ptr->enqueued_processor_threads_lock, 0, false); + + xeDispatcherSpinlockUnlock( + context, context->kernel_state->GetDispatcherLock(context), old_irql); + return old_priority; +} +static void BackgroundModeIPI(void* ud) { + auto context = cpu::ThreadState::GetContext(); + auto KPCR = GetKPCR(context); + KPCR->generic_software_interrupt = 2; + KPCR->background_scheduling_1A = 0x20; + KPCR->timeslice_ended = 0x20; + KernelState::HWThreadFor(context)->interrupt_controller()->SetEOI(1); +} +void xeKeEnterBackgroundMode(PPCContext* context) { + uint32_t BackgroundProcessors = + xboxkrnl::xeKeQueryBackgroundProcessors(context); + auto KPCR = GetKPCR(context); + uint32_t processor_mask = KPCR->prcb_data.processor_mask; + if ((BackgroundProcessors & processor_mask) != 0) { + BackgroundProcessors &= ~processor_mask; + KPCR->background_scheduling_1A = 1; + KPCR->timeslice_ended = 1; + KPCR->generic_software_interrupt = 2; + } + if (BackgroundProcessors) { + for (uint32_t i = 0; i < 6; ++i) { + if (((1 << i) & BackgroundProcessors) != 0) { + auto CPUThread = context->processor->GetCPUThread(i); + cpu::SendInterruptArguments interrupt_args; + interrupt_args.ipi_func = BackgroundModeIPI; + interrupt_args.ud = nullptr; + interrupt_args.wait_done = false; + interrupt_args.irql_ = 0;//randomly chosen irql + CPUThread->SendGuestIPI(interrupt_args); + } + } + } +} + +uint32_t xeKeQueryBackgroundProcessors(PPCContext* context) { + return context->kernel_state->GetKernelGuestGlobals(context) + ->background_processors; +} + +void xeKeSetBackgroundProcessors(PPCContext* context, unsigned int new_bgproc) { + context->kernel_state->GetKernelGuestGlobals(context)->background_processors = + new_bgproc; +} + +} // namespace xboxkrnl +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc index d8e2259248..b64f3b9d4a 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc @@ -18,6 +18,7 @@ #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_rtl.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/xbox.h" DEFINE_int32(internal_display_resolution, 8, @@ -144,6 +145,7 @@ static_assert_size(X_DISPLAY_INFO, 0x58); void VdGetCurrentDisplayInformation_entry( pointer_t display_info) { + cpu::MFTBFence timing_fence{39}; X_VIDEO_MODE mode; VdQueryVideoMode(&mode); @@ -170,6 +172,7 @@ void VdGetCurrentDisplayInformation_entry( DECLARE_XBOXKRNL_EXPORT1(VdGetCurrentDisplayInformation, kVideo, kStub); void VdQueryVideoMode(X_VIDEO_MODE* video_mode) { + cpu::MFTBFence timing_fence{172}; // TODO(benvanik): get info from actual display. std::memset(video_mode, 0, sizeof(X_VIDEO_MODE)); @@ -188,11 +191,13 @@ void VdQueryVideoMode(X_VIDEO_MODE* video_mode) { } void VdQueryVideoMode_entry(pointer_t video_mode) { + cpu::MFTBFence timing_fence{481}; VdQueryVideoMode(video_mode); } DECLARE_XBOXKRNL_EXPORT1(VdQueryVideoMode, kVideo, kStub); dword_result_t VdQueryVideoFlags_entry() { + cpu::MFTBFence timing_fence{293}; X_VIDEO_MODE mode; VdQueryVideoMode(&mode); @@ -206,6 +211,8 @@ dword_result_t VdQueryVideoFlags_entry() { DECLARE_XBOXKRNL_EXPORT1(VdQueryVideoFlags, kVideo, kStub); dword_result_t VdSetDisplayMode_entry(dword_t flags) { + // todo: this may need to make context switches too + cpu::MFTBFence timing_fence{1490688}; // Often 0x40000000. // 0?ccf000 00000000 00000000 000000r0 @@ -234,7 +241,11 @@ DECLARE_XBOXKRNL_EXPORT1(VdSetDisplayModeOverride, kVideo, kStub); dword_result_t VdInitializeEngines_entry(unknown_t unk0, function_t callback, lpvoid_t arg, lpdword_t pfp_ptr, - lpdword_t me_ptr) { + lpdword_t me_ptr, + const ppc_context_t& context) { + cpu::MFTBFence timing_fence{4793932}; + xboxkrnl::xeKeEnterCriticalRegion(context); + xboxkrnl::xeKeLeaveCriticalRegion(context); // r3 = 0x4F810000 // r4 = function ptr (cleanup callback?) // r5 = function arg @@ -248,6 +259,7 @@ void VdShutdownEngines_entry() { // Ignored for now. // Games seem to call an Initialize/Shutdown pair to query info, then // re-initialize. + return; } DECLARE_XBOXKRNL_EXPORT1(VdShutdownEngines, kVideo, kStub); @@ -266,6 +278,7 @@ DECLARE_XBOXKRNL_EXPORT1(VdEnableDisableClockGating, kVideo, kStub); void VdSetGraphicsInterruptCallback_entry(function_t callback, lpvoid_t user_data) { + cpu::MFTBFence timing_fence{244}; // callback takes 2 params // r3 = bool 0/1 - 0 is normal interrupt, 1 is some acquire/lock mumble // r4 = user_data (r4 of VdSetGraphicsInterruptCallback) @@ -274,17 +287,22 @@ void VdSetGraphicsInterruptCallback_entry(function_t callback, } DECLARE_XBOXKRNL_EXPORT1(VdSetGraphicsInterruptCallback, kVideo, kImplemented); -void VdInitializeRingBuffer_entry(lpvoid_t ptr, int_t size_log2) { +void VdInitializeRingBuffer_entry(lpvoid_t ptr, int_t size_log2, + const ppc_context_t& context) { + cpu::MFTBFence timing_fence{100086}; + xboxkrnl::xeKeEnterCriticalRegion(context); // r3 = result of MmGetPhysicalAddress // r4 = log2(size) // Buffer pointers are from MmAllocatePhysicalMemory with WRITE_COMBINE. auto graphics_system = kernel_state()->emulator()->graphics_system(); graphics_system->InitializeRingBuffer(ptr, size_log2); + xboxkrnl::xeKeLeaveCriticalRegion(context); } DECLARE_XBOXKRNL_EXPORT1(VdInitializeRingBuffer, kVideo, kImplemented); void VdEnableRingBufferRPtrWriteBack_entry(lpvoid_t ptr, int_t block_size_log2) { + cpu::MFTBFence timing_fence{37}; // r4 = log2(block size), 6, usually --- <=19 auto graphics_system = kernel_state()->emulator()->graphics_system(); graphics_system->EnableReadPointerWriteBack(ptr, block_size_log2); @@ -292,6 +310,7 @@ void VdEnableRingBufferRPtrWriteBack_entry(lpvoid_t ptr, DECLARE_XBOXKRNL_EXPORT1(VdEnableRingBufferRPtrWriteBack, kVideo, kImplemented); void VdGetSystemCommandBuffer_entry(lpunknown_t p0_ptr, lpunknown_t p1_ptr) { + cpu::MFTBFence timing_fence{23}; p0_ptr.Zero(0x94); xe::store_and_swap(p0_ptr, 0xBEEF0000); xe::store_and_swap(p1_ptr, 0xBEEF0001); @@ -299,6 +318,7 @@ void VdGetSystemCommandBuffer_entry(lpunknown_t p0_ptr, lpunknown_t p1_ptr) { DECLARE_XBOXKRNL_EXPORT1(VdGetSystemCommandBuffer, kVideo, kStub); void VdSetSystemCommandBufferGpuIdentifierAddress_entry(lpunknown_t unk) { + cpu::MFTBFence timing_fence{35}; // r3 = 0x2B10(d3d?) + 8 } DECLARE_XBOXKRNL_EXPORT1(VdSetSystemCommandBufferGpuIdentifierAddress, kVideo, @@ -324,6 +344,7 @@ dword_result_t VdInitializeScalerCommandBuffer_entry( // sources from. dword_t dest_count // Count in words. ) { + cpu::MFTBFence timing_fence{284}; // We could fake the commands here, but I'm not sure the game checks for // anything but success (non-zero ret). // For now, we just fill it with NOPs. @@ -350,9 +371,11 @@ void AppendParam(StringBuffer* string_buffer, pointer_t param) { } dword_result_t VdCallGraphicsNotificationRoutines_entry( - unknown_t unk0, pointer_t args_ptr) { + unknown_t unk0, pointer_t args_ptr, + const ppc_context_t& context) { assert_true(unk0 == 1); - + xboxkrnl::xeKeEnterCriticalRegion(context); + xboxkrnl::xeKeLeaveCriticalRegion(context); // TODO(benvanik): what does this mean, I forget: // callbacks get 0, r3, r4 return 0; @@ -367,6 +390,7 @@ dword_result_t VdIsHSIOTrainingSucceeded_entry() { DECLARE_XBOXKRNL_EXPORT1(VdIsHSIOTrainingSucceeded, kVideo, kStub); dword_result_t VdPersistDisplay_entry(unknown_t unk0, lpdword_t unk1_ptr) { + cpu::MFTBFence timing_fence{810}; // unk1_ptr needs to be populated with a pointer passed to // MmFreePhysicalMemory(1, *unk1_ptr). if (unk1_ptr) { @@ -381,12 +405,16 @@ dword_result_t VdPersistDisplay_entry(unknown_t unk0, lpdword_t unk1_ptr) { } DECLARE_XBOXKRNL_EXPORT2(VdPersistDisplay, kVideo, kImplemented, kSketchy); -dword_result_t VdRetrainEDRAMWorker_entry(unknown_t unk0) { return 0; } +dword_result_t VdRetrainEDRAMWorker_entry(unknown_t unk0) { + cpu::MFTBFence timing_fence{0x1b}; + return 0; +} DECLARE_XBOXKRNL_EXPORT1(VdRetrainEDRAMWorker, kVideo, kStub); dword_result_t VdRetrainEDRAM_entry(unknown_t unk0, unknown_t unk1, unknown_t unk2, unknown_t unk3, unknown_t unk4, unknown_t unk5) { + cpu::MFTBFence timing_fence{9}; return 0; } DECLARE_XBOXKRNL_EXPORT1(VdRetrainEDRAM, kVideo, kStub); @@ -400,6 +428,7 @@ void VdSwap_entry( lpdword_t frontbuffer_ptr, // ptr to frontbuffer address lpdword_t texture_format_ptr, lpdword_t color_space_ptr, lpdword_t width, lpdword_t height) { + cpu::MFTBFence timing_fence{84}; // All of these parameters are REQUIRED. assert(buffer_ptr); assert(fetch_ptr); @@ -479,45 +508,7 @@ DECLARE_XBOXKRNL_EXPORT3(VdSwap, kVideo, kImplemented, kHighFrequency, kImportant); void RegisterVideoExports(xe::cpu::ExportResolver* export_resolver, - KernelState* kernel_state) { - auto memory = kernel_state->memory(); - - // VdGlobalDevice (4b) - // Pointer to a global D3D device. Games only seem to set this, so we don't - // have to do anything. We may want to read it back later, though. - uint32_t pVdGlobalDevice = - memory->SystemHeapAlloc(4, 32, kSystemHeapPhysical); - export_resolver->SetVariableMapping("xboxkrnl.exe", ordinals::VdGlobalDevice, - pVdGlobalDevice); - xe::store_and_swap(memory->TranslateVirtual(pVdGlobalDevice), 0); - - // VdGlobalXamDevice (4b) - // Pointer to the XAM D3D device, which we don't have. - uint32_t pVdGlobalXamDevice = - memory->SystemHeapAlloc(4, 32, kSystemHeapPhysical); - export_resolver->SetVariableMapping( - "xboxkrnl.exe", ordinals::VdGlobalXamDevice, pVdGlobalXamDevice); - xe::store_and_swap(memory->TranslateVirtual(pVdGlobalXamDevice), 0); - - // VdGpuClockInMHz (4b) - // GPU clock. Xenos is 500MHz. Hope nothing is relying on this timing... - uint32_t pVdGpuClockInMHz = - memory->SystemHeapAlloc(4, 32, kSystemHeapPhysical); - export_resolver->SetVariableMapping("xboxkrnl.exe", ordinals::VdGpuClockInMHz, - pVdGpuClockInMHz); - xe::store_and_swap(memory->TranslateVirtual(pVdGpuClockInMHz), 500); - - // VdHSIOCalibrationLock (28b) - // CriticalSection. - uint32_t pVdHSIOCalibrationLock = - memory->SystemHeapAlloc(28, 32, kSystemHeapPhysical); - export_resolver->SetVariableMapping( - "xboxkrnl.exe", ordinals::VdHSIOCalibrationLock, pVdHSIOCalibrationLock); - auto hsio_lock = - memory->TranslateVirtual(pVdHSIOCalibrationLock); - xeRtlInitializeCriticalSectionAndSpinCount(hsio_lock, pVdHSIOCalibrationLock, - 10000); -} + KernelState* kernel_state) {} } // namespace xboxkrnl } // namespace kernel diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.h index 4cb3861f42..f7fff883c9 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.h @@ -11,7 +11,7 @@ #define XENIA_KERNEL_XBOXKRNL_XBOXKRNL_VIDEO_H_ #include "xenia/kernel/util/shim_utils.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { diff --git a/src/xenia/kernel/xenumerator.cc b/src/xenia/kernel/xenumerator.cc index dcaa6a07a4..c8c6597143 100644 --- a/src/xenia/kernel/xenumerator.cc +++ b/src/xenia/kernel/xenumerator.cc @@ -8,7 +8,9 @@ */ #include "xenia/kernel/xenumerator.h" - +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" namespace xe { namespace kernel { @@ -24,11 +26,19 @@ X_STATUS XEnumerator::Initialize(uint32_t user_index, uint32_t app_id, uint32_t open_message, uint32_t close_message, uint32_t flags, uint32_t extra_size, void** extra_buffer) { - auto native_object = CreateNative(sizeof(X_KENUMERATOR) + extra_size); - if (!native_object) { - return X_STATUS_NO_MEMORY; - } - auto guest_object = reinterpret_cast(native_object); + auto context = cpu::ThreadState::Get()->context(); + uint32_t guest_objptr = 0; + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + X_STATUS create_status = xboxkrnl::xeObCreateObject( + &guest_globals->XamEnumeratorObjectType, nullptr, + sizeof(X_KENUMERATOR) + extra_size, &guest_objptr, context); + + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + SetNativePointer(guest_objptr); + + auto guest_object = context->TranslateVirtual(guest_objptr); guest_object->app_id = app_id; guest_object->open_message = open_message; guest_object->close_message = close_message; @@ -38,7 +48,7 @@ X_STATUS XEnumerator::Initialize(uint32_t user_index, uint32_t app_id, guest_object->flags = flags; if (extra_buffer) { *extra_buffer = - !extra_buffer ? nullptr : &native_object[sizeof(X_KENUMERATOR)]; + !extra_buffer ? nullptr : &guest_object[sizeof(X_KENUMERATOR)]; } return X_STATUS_SUCCESS; } diff --git a/src/xenia/kernel/xenumerator.h b/src/xenia/kernel/xenumerator.h index 7b2218e978..e381747cc6 100644 --- a/src/xenia/kernel/xenumerator.h +++ b/src/xenia/kernel/xenumerator.h @@ -15,7 +15,7 @@ #include #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { diff --git a/src/xenia/kernel/xevent.cc b/src/xenia/kernel/xevent.cc index bf1176af82..e59fab25d3 100644 --- a/src/xenia/kernel/xevent.cc +++ b/src/xenia/kernel/xevent.cc @@ -11,6 +11,9 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" namespace xe { namespace kernel { @@ -18,114 +21,63 @@ namespace kernel { XEvent::XEvent(KernelState* kernel_state) : XObject(kernel_state, kObjectType) {} -XEvent::~XEvent() = default; +XEvent::~XEvent() {} void XEvent::Initialize(bool manual_reset, bool initial_state) { - assert_false(event_); - - this->CreateNative(); - - if (manual_reset) { - event_ = xe::threading::Event::CreateManualResetEvent(initial_state); - } else { - event_ = xe::threading::Event::CreateAutoResetEvent(initial_state); - } - assert_not_null(event_); + auto context = cpu::ThreadState::Get()->context(); + + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + uint32_t guest_objptr = 0; + // todo: attributes + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->ExEventObjectType, nullptr, + sizeof(X_KEVENT), &guest_objptr, context); + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + + auto guest_object = context->TranslateVirtual(guest_objptr); + + guest_object->header.type = !manual_reset; + guest_object->header.signal_state = initial_state; + util::XeInitializeListHead(&guest_object->header.wait_list, context); + SetNativePointer(guest_objptr); } void XEvent::InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header) { - assert_false(event_); - - switch (header->type) { - case 0x00: // EventNotificationObject (manual reset) - manual_reset_ = true; - break; - case 0x01: // EventSynchronizationObject (auto reset) - manual_reset_ = false; - break; - default: - assert_always(); - return; - } - - bool initial_state = header->signal_state ? true : false; - if (manual_reset_) { - event_ = xe::threading::Event::CreateManualResetEvent(initial_state); - } else { - event_ = xe::threading::Event::CreateAutoResetEvent(initial_state); - } - assert_not_null(event_); -} -int32_t XEvent::Set(uint32_t priority_increment, bool wait) { - event_->Set(); - return 1; + + SetNativePointer(cpu::ThreadState::GetContext()->HostToGuestVirtual(header)); } -int32_t XEvent::Pulse(uint32_t priority_increment, bool wait) { - event_->Pulse(); +int32_t XEvent::Set(uint32_t priority_increment, bool wait) { + xenia_assert(!wait); + xboxkrnl::xeKeSetEvent(cpu::ThreadState::GetContext(), + guest_object(), 0, wait); return 1; } int32_t XEvent::Reset() { - event_->Reset(); + xboxkrnl::xeKeResetEvent(cpu::ThreadState::GetContext(), + guest_object()); return 1; } void XEvent::Query(uint32_t* out_type, uint32_t* out_state) { - auto [type, state] = event_->Query(); - - *out_type = type; - *out_state = state; + xenia_assert(false); } -void XEvent::Clear() { event_->Reset(); } +void XEvent::Clear() { Reset(); } bool XEvent::Save(ByteStream* stream) { - XELOGD("XEvent {:08X} ({})", handle(), manual_reset_ ? "manual" : "auto"); - SaveObject(stream); - - bool signaled = true; - auto result = - xe::threading::Wait(event_.get(), false, std::chrono::milliseconds(0)); - if (result == xe::threading::WaitResult::kSuccess) { - signaled = true; - } else if (result == xe::threading::WaitResult::kTimeout) { - signaled = false; - } else { - assert_always(); - } - - if (signaled) { - // Reset the event in-case it's an auto-reset. - event_->Set(); - } - - stream->Write(signaled); - stream->Write(manual_reset_); + xenia_assert(false); return true; } object_ref XEvent::Restore(KernelState* kernel_state, ByteStream* stream) { - auto evt = new XEvent(nullptr); - evt->kernel_state_ = kernel_state; - - evt->RestoreObject(stream); - bool signaled = stream->Read(); - evt->manual_reset_ = stream->Read(); - - if (evt->manual_reset_) { - evt->event_ = xe::threading::Event::CreateManualResetEvent(false); - } else { - evt->event_ = xe::threading::Event::CreateAutoResetEvent(false); - } - assert_not_null(evt->event_); - - if (signaled) { - evt->event_->Set(); - } + xenia_assert(false); - return object_ref(evt); + return object_ref(nullptr); } } // namespace kernel diff --git a/src/xenia/kernel/xevent.h b/src/xenia/kernel/xevent.h index 4fd174cd04..ee467218e5 100644 --- a/src/xenia/kernel/xevent.h +++ b/src/xenia/kernel/xevent.h @@ -12,17 +12,10 @@ #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" - +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { -// https://www.nirsoft.net/kernel_struct/vista/KEVENT.html -struct X_KEVENT { - X_DISPATCH_HEADER header; -}; -static_assert_size(X_KEVENT, 0x10); - class XEvent : public XObject { public: static const XObject::Type kObjectType = XObject::Type::Event; @@ -34,7 +27,6 @@ class XEvent : public XObject { void InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header); int32_t Set(uint32_t priority_increment, bool wait); - int32_t Pulse(uint32_t priority_increment, bool wait); int32_t Reset(); void Query(uint32_t* out_type, uint32_t* out_state); void Clear(); @@ -44,11 +36,7 @@ class XEvent : public XObject { ByteStream* stream); protected: - xe::threading::WaitHandle* GetWaitHandle() override { return event_.get(); } - - private: - bool manual_reset_ = false; - std::unique_ptr event_; + xe::threading::WaitHandle* GetWaitHandle() override { return nullptr; } }; } // namespace kernel diff --git a/src/xenia/kernel/xfile.cc b/src/xenia/kernel/xfile.cc index f46a9e1370..9bc96c3042 100644 --- a/src/xenia/kernel/xfile.cc +++ b/src/xenia/kernel/xfile.cc @@ -17,7 +17,7 @@ #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/xevent.h" #include "xenia/memory.h" - +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" namespace xe { namespace kernel { @@ -27,6 +27,18 @@ XFile::XFile(KernelState* kernel_state, vfs::File* file, bool synchronous) is_synchronous_(synchronous) { async_event_ = threading::Event::CreateAutoResetEvent(false); assert_not_null(async_event_); + uint32_t guest_objptr = 0; + // todo: attributes + auto context = cpu::ThreadState::GetContext(); + auto guest_globals = kernel_state->GetKernelGuestGlobals(context); + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->IoFileObjectType, nullptr, + sizeof(X_KFILE_OBJECT), &guest_objptr, context); + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + + auto guest_object = context->TranslateVirtual(guest_objptr); + SetNativePointer(guest_objptr); } XFile::XFile() : XObject(kObjectType) { @@ -37,12 +49,14 @@ XFile::XFile() : XObject(kObjectType) { XFile::~XFile() { // TODO(benvanik): signal that the file is closing? async_event_->Set(); + file_->Destroy(); } X_STATUS XFile::QueryDirectory(X_FILE_DIRECTORY_INFORMATION* out_info, size_t length, const std::string_view file_name, bool restart) { + //std::unique_lock lock{file_lock_}; assert_not_null(out_info); vfs::Entry* entry = nullptr; @@ -96,6 +110,8 @@ X_STATUS XFile::QueryDirectory(X_FILE_DIRECTORY_INFORMATION* out_info, X_STATUS XFile::Read(uint32_t buffer_guest_address, uint32_t buffer_length, uint64_t byte_offset, uint32_t* out_bytes_read, uint32_t apc_context, bool notify_completion) { + std::lock_guard lock(completion_port_lock_); + // std::unique_lock lock{file_lock_}; if (byte_offset == uint64_t(-1)) { // Read from current position. byte_offset = position_; @@ -162,6 +178,8 @@ X_STATUS XFile::Read(uint32_t buffer_guest_address, uint32_t buffer_length, } } + //lock.unlock(); + if (out_bytes_read) { *out_bytes_read = uint32_t(bytes_read); } @@ -183,6 +201,8 @@ X_STATUS XFile::Read(uint32_t buffer_guest_address, uint32_t buffer_length, X_STATUS XFile::ReadScatter(uint32_t segments_guest_address, uint32_t length, uint64_t byte_offset, uint32_t* out_bytes_read, uint32_t apc_context) { + std::lock_guard lock(completion_port_lock_); + //std::unique_lock lock{file_lock_}; X_STATUS result = X_STATUS_SUCCESS; // segments points to an array of buffer pointers of type @@ -219,7 +239,7 @@ X_STATUS XFile::ReadScatter(uint32_t segments_guest_address, uint32_t length, read_total += bytes_read; read_remain -= read_length; } - + //lock.unlock(); if (out_bytes_read) { *out_bytes_read = uint32_t(read_total); } @@ -239,6 +259,8 @@ X_STATUS XFile::ReadScatter(uint32_t segments_guest_address, uint32_t length, X_STATUS XFile::Write(uint32_t buffer_guest_address, uint32_t buffer_length, uint64_t byte_offset, uint32_t* out_bytes_written, uint32_t apc_context) { + std::lock_guard lock(completion_port_lock_); + //std::unique_lock lock{file_lock_}; if (byte_offset == uint64_t(-1)) { // Write from current position. byte_offset = position_; @@ -248,9 +270,11 @@ X_STATUS XFile::Write(uint32_t buffer_guest_address, uint32_t buffer_length, X_STATUS result = file_->WriteSync(memory()->TranslateVirtual(buffer_guest_address), buffer_length, size_t(byte_offset), &bytes_written); + if (XSUCCEEDED(result)) { position_ += bytes_written; } + //lock.unlock(); XIOCompletion::IONotification notify; notify.apc_context = apc_context; @@ -267,7 +291,10 @@ X_STATUS XFile::Write(uint32_t buffer_guest_address, uint32_t buffer_length, return result; } -X_STATUS XFile::SetLength(size_t length) { return file_->SetLength(length); } +X_STATUS XFile::SetLength(size_t length) { + //std::unique_lock lock{file_lock_}; + return file_->SetLength(length); +} void XFile::RegisterIOCompletionPort(uint32_t key, object_ref port) { @@ -342,7 +369,7 @@ object_ref XFile::Restore(KernelState* kernel_state, void XFile::NotifyIOCompletionPorts( XIOCompletion::IONotification& notification) { - std::lock_guard lock(completion_port_lock_); + //std::lock_guard lock(completion_port_lock_); for (auto port : completion_ports_) { notification.key_context = port.first; diff --git a/src/xenia/kernel/xfile.h b/src/xenia/kernel/xfile.h index a5130da8d9..7d2813168b 100644 --- a/src/xenia/kernel/xfile.h +++ b/src/xenia/kernel/xfile.h @@ -18,7 +18,7 @@ #include "xenia/vfs/device.h" #include "xenia/vfs/entry.h" #include "xenia/vfs/file.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { @@ -135,6 +135,7 @@ class XFile : public XObject { std::mutex completion_port_lock_; std::vector>> completion_ports_; + //std::mutex file_lock_; // TODO(benvanik): create flags, open state, etc. diff --git a/src/xenia/kernel/xiocompletion.h b/src/xenia/kernel/xiocompletion.h index a7d34889d6..3791febbfc 100644 --- a/src/xenia/kernel/xiocompletion.h +++ b/src/xenia/kernel/xiocompletion.h @@ -14,7 +14,7 @@ #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { diff --git a/src/xenia/kernel/xmodule.h b/src/xenia/kernel/xmodule.h index 28e860da35..9e0accf868 100644 --- a/src/xenia/kernel/xmodule.h +++ b/src/xenia/kernel/xmodule.h @@ -14,7 +14,7 @@ #include "xenia/cpu/module.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { diff --git a/src/xenia/kernel/xmutant.cc b/src/xenia/kernel/xmutant.cc index 0a67f21831..fef0a56783 100644 --- a/src/xenia/kernel/xmutant.cc +++ b/src/xenia/kernel/xmutant.cc @@ -12,8 +12,11 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xthread.h" +#include "xenia/cpu/processor.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" namespace xe { namespace kernel { @@ -22,74 +25,55 @@ XMutant::XMutant(KernelState* kernel_state) XMutant::XMutant() : XObject(kObjectType) {} -XMutant::~XMutant() = default; - -void XMutant::Initialize(bool initial_owner) { - assert_false(mutant_); - - mutant_ = xe::threading::Mutant::Create(initial_owner); - assert_not_null(mutant_); +XMutant::~XMutant() { + // mutant object type delete proc. + auto context = cpu::ThreadState::GetContext(); + xboxkrnl::xeKeReleaseMutant(cpu::ThreadState::GetContext(), + guest_object(), 1, 1, 0); + //discard any status raised + context->CatchStatus(); } -void XMutant::InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header) { - assert_false(mutant_); - - // Haven't seen this yet, but it's possible. - assert_always(); +void XMutant::Initialize(bool initial_owner, X_OBJECT_ATTRIBUTES* attributes) { + auto context = cpu::ThreadState::Get()->context(); + uint32_t guest_objptr = 0; + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->ExMutantObjectType, nullptr, + sizeof(X_KMUTANT), &guest_objptr, context); + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + + auto guest_object = context->TranslateVirtual(guest_objptr); + xboxkrnl::xeKeInitializeMutant(guest_object, initial_owner, context); + + SetNativePointer(guest_objptr); } -X_STATUS XMutant::ReleaseMutant(uint32_t priority_increment, bool abandon, - bool wait) { - // Call should succeed if we own the mutant, so go ahead and do this. - if (owning_thread_ == XThread::GetCurrentThread()) { - owning_thread_ = nullptr; - } - - // TODO(benvanik): abandoning. - assert_false(abandon); - if (mutant_->Release()) { - return X_STATUS_SUCCESS; - } else { - return X_STATUS_MUTANT_NOT_OWNED; - } +void XMutant::InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header) { + xe::FatalError("Unimplemented XMutant::InitializeNative"); } -bool XMutant::Save(ByteStream* stream) { - if (!SaveObject(stream)) { - return false; +X_STATUS XMutant::GetSignaledStatus(X_STATUS success_in) { + if (success_in <= 63U) { + auto km = guest_object(); + if (km->abandoned) { + return X_STATUS_ABANDONED_WAIT_0 + success_in; + } } - - uint32_t owning_thread_handle = owning_thread_ ? owning_thread_->handle() : 0; - stream->Write(owning_thread_handle); - XELOGD("XMutant {:08X} (owner: {:08X})", handle(), owning_thread_handle); - - return true; + return success_in; } +bool XMutant::Save(ByteStream* stream) { return true; } object_ref XMutant::Restore(KernelState* kernel_state, ByteStream* stream) { auto mutant = new XMutant(); - mutant->kernel_state_ = kernel_state; - - if (!mutant->RestoreObject(stream)) { - delete mutant; - return nullptr; - } - - mutant->Initialize(false); - - auto owning_thread_handle = stream->Read(); - if (owning_thread_handle) { - mutant->owning_thread_ = kernel_state->object_table() - ->LookupObject(owning_thread_handle) - .get(); - mutant->owning_thread_->AcquireMutantOnStartup(retain_object(mutant)); - } return object_ref(mutant); } - -void XMutant::WaitCallback() { owning_thread_ = XThread::GetCurrentThread(); } +xe::threading::WaitHandle* XMutant::GetWaitHandle() { return nullptr; } +void XMutant::WaitCallback() {} } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xmutant.h b/src/xenia/kernel/xmutant.h index 5dddf905b0..7df527fb68 100644 --- a/src/xenia/kernel/xmutant.h +++ b/src/xenia/kernel/xmutant.h @@ -12,10 +12,11 @@ #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { + class XThread; class XMutant : public XObject { @@ -25,24 +26,19 @@ class XMutant : public XObject { explicit XMutant(KernelState* kernel_state); ~XMutant() override; - void Initialize(bool initial_owner); + void Initialize(bool initial_owner, X_OBJECT_ATTRIBUTES* attributes); void InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header); - X_STATUS ReleaseMutant(uint32_t priority_increment, bool abandon, bool wait); - bool Save(ByteStream* stream) override; static object_ref Restore(KernelState* kernel_state, ByteStream* stream); protected: - xe::threading::WaitHandle* GetWaitHandle() override { return mutant_.get(); } + xe::threading::WaitHandle* GetWaitHandle() override; void WaitCallback() override; - + virtual X_STATUS GetSignaledStatus(X_STATUS success_in) override; private: XMutant(); - - std::unique_ptr mutant_; - XThread* owning_thread_ = nullptr; }; } // namespace kernel diff --git a/src/xenia/kernel/xnotifylistener.cc b/src/xenia/kernel/xnotifylistener.cc index fc2d24c98c..c2cb713b85 100644 --- a/src/xenia/kernel/xnotifylistener.cc +++ b/src/xenia/kernel/xnotifylistener.cc @@ -6,14 +6,15 @@ * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ - #include "xenia/kernel/xnotifylistener.h" +#include "xenia/kernel/xam/xam_guest_structures.h" #include "xenia/base/assert.h" #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" #include "xenia/kernel/kernel_state.h" - +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" namespace xe { namespace kernel { @@ -21,12 +22,29 @@ XNotifyListener::XNotifyListener(KernelState* kernel_state) : XObject(kernel_state, kObjectType) {} XNotifyListener::~XNotifyListener() {} - +X_XAMNOTIFY* XNotifyListener::Get() { return guest_object(); } void XNotifyListener::Initialize(uint64_t mask, uint32_t max_version) { - assert_false(wait_handle_); - - wait_handle_ = xe::threading::Event::CreateManualResetEvent(false); - assert_not_null(wait_handle_); + auto context = cpu::ThreadState::Get()->context(); + uint32_t guest_objptr = 0; + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + X_STATUS create_status = xboxkrnl::xeObCreateObject( + &guest_globals->XamNotifyListenerObjectType, nullptr, sizeof(X_XAMNOTIFY), + &guest_objptr, context); + + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + + auto ksem = context->TranslateVirtual(guest_objptr); + + ksem->event.header.type = 1; + ksem->event.header.signal_state = 0; + util::XeInitializeListHead(&ksem->event.header.wait_list, context); + ksem->process_type_related = + -2 - ((xboxkrnl::xeKeGetCurrentProcessType(context) == 2) - 3); + ksem->spinlock.pcr_of_owner = 0; + + SetNativePointer(guest_objptr); mask_ = mask; max_version_ = max_version; @@ -43,14 +61,23 @@ void XNotifyListener::EnqueueNotification(XNotificationID id, uint32_t data) { if (key.version > max_version_) { return; } - auto global_lock = global_critical_region_.Acquire(); + auto thiz = Get(); + auto context = cpu::ThreadState::GetContext(); + + thiz->an_irql = xboxkrnl::xeKeKfAcquireSpinLock(context, &thiz->spinlock); + notifications_.push_back(std::pair(id, data)); - wait_handle_->Set(); + + xboxkrnl::xeKeSetEvent(context, &thiz->event, 1, 0); + xboxkrnl::xeKeKfReleaseSpinLock(context, &thiz->spinlock, thiz->an_irql); } bool XNotifyListener::DequeueNotification(XNotificationID* out_id, uint32_t* out_data) { - auto global_lock = global_critical_region_.Acquire(); + auto thiz = Get(); + auto context = cpu::ThreadState::GetContext(); + thiz->an_irql = xboxkrnl::xeKeKfAcquireSpinLock(context, &thiz->spinlock); + bool dequeued = false; if (notifications_.size()) { dequeued = true; @@ -59,16 +86,22 @@ bool XNotifyListener::DequeueNotification(XNotificationID* out_id, *out_data = it->second; notifications_.erase(it); if (!notifications_.size()) { - wait_handle_->Reset(); + // inlined clearevent? original XNotifyGetNext does this + thiz->event.header.signal_state = 0; } } + + xboxkrnl::xeKeKfReleaseSpinLock(context, &thiz->spinlock, thiz->an_irql); return dequeued; } bool XNotifyListener::DequeueNotification(XNotificationID id, uint32_t* out_data) { - auto global_lock = global_critical_region_.Acquire(); + auto thiz = Get(); + auto context = cpu::ThreadState::GetContext(); + thiz->an_irql = xboxkrnl::xeKeKfAcquireSpinLock(context, &thiz->spinlock); if (!notifications_.size()) { + xboxkrnl::xeKeKfReleaseSpinLock(context, &thiz->spinlock, thiz->an_irql); return false; } bool dequeued = false; @@ -80,10 +113,11 @@ bool XNotifyListener::DequeueNotification(XNotificationID id, *out_data = it->second; notifications_.erase(it); if (!notifications_.size()) { - wait_handle_->Reset(); + thiz->event.header.signal_state = 0; } break; } + xboxkrnl::xeKeKfReleaseSpinLock(context, &thiz->spinlock, thiz->an_irql); return dequeued; } diff --git a/src/xenia/kernel/xnotifylistener.h b/src/xenia/kernel/xnotifylistener.h index 7ec5be2c9a..e58f3d73cb 100644 --- a/src/xenia/kernel/xnotifylistener.h +++ b/src/xenia/kernel/xnotifylistener.h @@ -21,7 +21,7 @@ namespace xe { namespace kernel { - +struct X_XAMNOTIFY; union XNotificationKey { XNotificationID id; struct { @@ -61,16 +61,16 @@ class XNotifyListener : public XObject { ByteStream* stream); protected: - xe::threading::WaitHandle* GetWaitHandle() override { - return wait_handle_.get(); + xe::threading::WaitHandle* GetWaitHandle() override { return nullptr; } + + private: - std::unique_ptr wait_handle_; - xe::global_critical_region global_critical_region_; std::vector> notifications_; uint64_t mask_ = 0; uint32_t max_version_ = 0; + X_XAMNOTIFY* Get(); }; } // namespace kernel diff --git a/src/xenia/kernel/xobject.cc b/src/xenia/kernel/xobject.cc index 01a144a6c4..6dd1231c5f 100644 --- a/src/xenia/kernel/xobject.cc +++ b/src/xenia/kernel/xobject.cc @@ -13,9 +13,11 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/clock.h" +#include "xenia/cpu/processor.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xenumerator.h" #include "xenia/kernel/xevent.h" #include "xenia/kernel/xfile.h" @@ -26,7 +28,6 @@ #include "xenia/kernel/xsymboliclink.h" #include "xenia/kernel/xthread.h" #include "xenia/xbox.h" - namespace xe { namespace kernel { @@ -59,7 +60,29 @@ XObject::~XObject() { auto header = memory()->TranslateVirtual(ptr); kernel_state()->object_table()->UnmapGuestObjectHostHandle(ptr); - memory()->SystemHeapFree(ptr); + + auto context = cpu::ThreadState::GetContext(); + auto type = + context->TranslateVirtual(header->object_type_ptr); + //from ObDereferenceObject + + if (type->delete_proc) { + uint64_t args[] = {guest_object_ptr_}; + context->processor->Execute(context->thread_state(), type->delete_proc, + args, 1); + } + void* object_base = header; + if (header->flags & OBJECT_HEADER_FLAG_NAMED_OBJECT) { + object_base = &reinterpret_cast(header)[-1]; + } + + { + uint64_t free_args[] = {context->HostToGuestVirtual(object_base)}; + context->processor->Execute(context->thread_state(), type->free_proc, + free_args, 1); + } + guest_object_ptr_ = 0; + allocated_guest_object_ = false; } } @@ -191,127 +214,11 @@ uint32_t XObject::TimeoutTicksToMs(int64_t timeout_ticks) { X_STATUS XObject::Wait(uint32_t wait_reason, uint32_t processor_mode, uint32_t alertable, uint64_t* opt_timeout) { - auto wait_handle = GetWaitHandle(); - if (!wait_handle) { - // Object doesn't support waiting. - return X_STATUS_SUCCESS; - } - - auto timeout_ms = - opt_timeout ? std::chrono::milliseconds(Clock::ScaleGuestDurationMillis( - TimeoutTicksToMs(*opt_timeout))) - : std::chrono::milliseconds::max(); - - auto result = - xe::threading::Wait(wait_handle, alertable ? true : false, timeout_ms); - switch (result) { - case xe::threading::WaitResult::kSuccess: - WaitCallback(); - return X_STATUS_SUCCESS; - case xe::threading::WaitResult::kUserCallback: - // Or X_STATUS_ALERTED? - return X_STATUS_USER_APC; - case xe::threading::WaitResult::kTimeout: - xe::threading::MaybeYield(); - return X_STATUS_TIMEOUT; - default: - case xe::threading::WaitResult::kAbandoned: - case xe::threading::WaitResult::kFailed: - return X_STATUS_ABANDONED_WAIT_0; - } + return xboxkrnl::xeKeWaitForSingleObject( + cpu::ThreadState::GetContext(), guest_object(), + wait_reason, processor_mode, alertable, (int64_t*)opt_timeout); } - -X_STATUS XObject::SignalAndWait(XObject* signal_object, XObject* wait_object, - uint32_t wait_reason, uint32_t processor_mode, - uint32_t alertable, uint64_t* opt_timeout) { - auto timeout_ms = - opt_timeout ? std::chrono::milliseconds(Clock::ScaleGuestDurationMillis( - TimeoutTicksToMs(*opt_timeout))) - : std::chrono::milliseconds::max(); - - auto result = xe::threading::SignalAndWait( - signal_object->GetWaitHandle(), wait_object->GetWaitHandle(), - alertable ? true : false, timeout_ms); - switch (result) { - case xe::threading::WaitResult::kSuccess: - wait_object->WaitCallback(); - return X_STATUS_SUCCESS; - case xe::threading::WaitResult::kUserCallback: - // Or X_STATUS_ALERTED? - return X_STATUS_USER_APC; - case xe::threading::WaitResult::kTimeout: - xe::threading::MaybeYield(); - return X_STATUS_TIMEOUT; - default: - case xe::threading::WaitResult::kAbandoned: - case xe::threading::WaitResult::kFailed: - return X_STATUS_ABANDONED_WAIT_0; - } -} - -X_STATUS XObject::WaitMultiple(uint32_t count, XObject** objects, - uint32_t wait_type, uint32_t wait_reason, - uint32_t processor_mode, uint32_t alertable, - uint64_t* opt_timeout) { - xe::threading::WaitHandle* wait_handles[64]; - - for (size_t i = 0; i < count; ++i) { - wait_handles[i] = objects[i]->GetWaitHandle(); - assert_not_null(wait_handles[i]); - } - - auto timeout_ms = - opt_timeout ? std::chrono::milliseconds(Clock::ScaleGuestDurationMillis( - TimeoutTicksToMs(*opt_timeout))) - : std::chrono::milliseconds::max(); - - if (wait_type) { - auto result = xe::threading::WaitAny(wait_handles, count, - alertable ? true : false, timeout_ms); - switch (result.first) { - case xe::threading::WaitResult::kSuccess: - objects[result.second]->WaitCallback(); - - return X_STATUS(result.second); - case xe::threading::WaitResult::kUserCallback: - // Or X_STATUS_ALERTED? - return X_STATUS_USER_APC; - case xe::threading::WaitResult::kTimeout: - xe::threading::MaybeYield(); - return X_STATUS_TIMEOUT; - default: - case xe::threading::WaitResult::kAbandoned: - return X_STATUS(X_STATUS_ABANDONED_WAIT_0 + result.second); - case xe::threading::WaitResult::kFailed: - return X_STATUS_UNSUCCESSFUL; - } - } else { - auto result = xe::threading::WaitAll(wait_handles, count, - alertable ? true : false, timeout_ms); - switch (result) { - case xe::threading::WaitResult::kSuccess: - for (uint32_t i = 0; i < count; i++) { - objects[i]->WaitCallback(); - } - - return X_STATUS_SUCCESS; - case xe::threading::WaitResult::kUserCallback: - // Or X_STATUS_ALERTED? - return X_STATUS_USER_APC; - case xe::threading::WaitResult::kTimeout: - xe::threading::MaybeYield(); - return X_STATUS_TIMEOUT; - default: - case xe::threading::WaitResult::kAbandoned: - case xe::threading::WaitResult::kFailed: - return X_STATUS_ABANDONED_WAIT_0; - } - } -} - uint8_t* XObject::CreateNative(uint32_t size) { - auto global_lock = xe::global_critical_region::AcquireDirect(); - uint32_t total_size = size + sizeof(X_OBJECT_HEADER); auto mem = memory()->SystemHeapAlloc(total_size); @@ -335,18 +242,16 @@ uint8_t* XObject::CreateNative(uint32_t size) { } void XObject::SetNativePointer(uint32_t native_ptr, bool uninitialized) { - auto global_lock = xe::global_critical_region::AcquireDirect(); - // If hit: We've already setup the native ptr with CreateNative! assert_zero(guest_object_ptr_); // Stash pointer in struct. // FIXME: This assumes the object has a dispatch header (some don't!) - //StashHandle(header, handle()); + // StashHandle(header, handle()); kernel_state()->object_table()->MapGuestObjectToHostHandle(native_ptr, handle()); - guest_object_ptr_ = native_ptr; + allocated_guest_object_ = true; } object_ref XObject::GetNativeObject(KernelState* kernel_state, @@ -375,23 +280,40 @@ object_ref XObject::GetNativeObject(KernelState* kernel_state, as_type = header->type; } auto true_object_header = - kernel_state->memory()->TranslateVirtual(guest_ptr-sizeof(X_OBJECT_HEADER)); + kernel_state->memory()->TranslateVirtual( + guest_ptr - sizeof(X_OBJECT_HEADER)); X_HANDLE host_handle; - - if (kernel_state->object_table()->HostHandleForGuestObject(guest_ptr, host_handle)) { + bool successfully_mapped_to_host = + kernel_state->object_table()->HostHandleForGuestObject(guest_ptr, + host_handle); + if (successfully_mapped_to_host) { + if (result = kernel_state->object_table() + ->LookupObject(host_handle, true) + .release()) { + } + } + + if (successfully_mapped_to_host) { // Already initialized. // TODO: assert if the type of the object != as_type - - + result = kernel_state->object_table() ->LookupObject(host_handle, true) .release(); + + if (HasDispatcherHeader(result->type())) { + if (MapGuestTypeToHost(header->type) != result->type()) { + goto create_new; + } + } + goto return_result; // TODO(benvanik): assert nothing has been changed in the struct. // return object; } else { + create_new: // First use, create new. // https://www.nirsoft.net/kernel_struct/vista/KOBJECTS.html XObject* object = nullptr; @@ -431,16 +353,13 @@ object_ref XObject::GetNativeObject(KernelState* kernel_state, case 23: // ProfileObject case 24: // ThreadedDpcObject default: - assert_always(); + // assert_always(); result = nullptr; goto return_result; // return NULL; } - // Stash pointer in struct. - // FIXME: This assumes the object contains a dispatch header (some don't!) - // StashHandle(header, object->handle()); kernel_state->object_table()->MapGuestObjectToHostHandle(guest_ptr, object->handle()); result = object; diff --git a/src/xenia/kernel/xobject.h b/src/xenia/kernel/xobject.h index e44dc1ea2d..24c7f5f25c 100644 --- a/src/xenia/kernel/xobject.h +++ b/src/xenia/kernel/xobject.h @@ -14,10 +14,10 @@ #include #include #include - +#include "xenia/cpu/ppc/ppc_context.h" #include "xenia/base/threading.h" #include "xenia/memory.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { class ByteStream; @@ -34,33 +34,7 @@ class KernelState; template class object_ref; -// https://www.nirsoft.net/kernel_struct/vista/DISPATCHER_HEADER.html -typedef struct { - struct { - uint8_t type; - - union { - uint8_t abandoned; - uint8_t absolute; - uint8_t npx_irql; - uint8_t signalling; - }; - union { - uint8_t size; - uint8_t hand; - }; - union { - uint8_t inserted; - uint8_t debug_active; - uint8_t dpc_active; - }; - }; - xe::be signal_state; - xe::be wait_list_flink; - xe::be wait_list_blink; -} X_DISPATCH_HEADER; -static_assert_size(X_DISPATCH_HEADER, 0x10); // https://www.nirsoft.net/kernel_struct/vista/OBJECT_CREATE_INFORMATION.html struct X_OBJECT_CREATE_INFORMATION { @@ -102,9 +76,39 @@ class XObject { SymbolicLink, Thread, Timer, - Device + Device }; + static bool HasDispatcherHeader(Type type) { + switch (type) { + case Type::Event: + case Type::Mutant: + case Type::Semaphore: + case Type::Thread: + case Type::Timer: + return true; + } + return false; + } + + static Type MapGuestTypeToHost(uint16_t type) { + // todo: this is not fully filled in + switch (type) { + case 0: + case 1: + return Type::Event; + case 2: + return Type::Mutant; + case 5: + return Type::Semaphore; + case 6: + return Type::Thread; + case 8: + case 9: + return Type::Timer; + } + return Type::Undefined; + } XObject(Type type); XObject(KernelState* kernel_state, Type type, bool host_object = false); virtual ~XObject(); @@ -123,7 +127,10 @@ class XObject { std::vector& handles() { return handles_; } const std::string& name() const { return name_; } - uint32_t guest_object() const { return guest_object_ptr_; } + uint32_t guest_object() const { + xenia_assert(guest_object_ptr_); + return guest_object_ptr_; + } // Has this object been created for use by the host? // Host objects are persisted through reloads/etc. @@ -132,9 +139,12 @@ class XObject { template T* guest_object() { - return memory()->TranslateVirtual(guest_object_ptr_); + return memory()->TranslateVirtual(guest_object()); + } + template + const T* guest_object() const { + return memory()->TranslateVirtual(guest_object()); } - void RetainHandle(); bool ReleaseHandle(); void Retain(); @@ -155,13 +165,6 @@ class XObject { X_STATUS Wait(uint32_t wait_reason, uint32_t processor_mode, uint32_t alertable, uint64_t* opt_timeout); - static X_STATUS SignalAndWait(XObject* signal_object, XObject* wait_object, - uint32_t wait_reason, uint32_t processor_mode, - uint32_t alertable, uint64_t* opt_timeout); - static X_STATUS WaitMultiple(uint32_t count, XObject** objects, - uint32_t wait_type, uint32_t wait_reason, - uint32_t processor_mode, uint32_t alertable, - uint64_t* opt_timeout); static object_ref GetNativeObject(KernelState* kernel_state, void* native_ptr, @@ -179,6 +182,7 @@ class XObject { // Called on successful wait. virtual void WaitCallback() {} virtual xe::threading::WaitHandle* GetWaitHandle() { return nullptr; } + virtual X_STATUS GetSignaledStatus(X_STATUS success_in) { return success_in; } // Creates the kernel object for guest code to use. Typically not needed. uint8_t* CreateNative(uint32_t size); @@ -189,15 +193,15 @@ class XObject { return reinterpret_cast(CreateNative(sizeof(T))); } - - static uint32_t TimeoutTicksToMs(int64_t timeout_ticks); KernelState* kernel_state_; // Host objects are persisted through resets/etc. bool host_object_ = false; - + // Guest pointer for kernel object. Remember: X_OBJECT_HEADER precedes this + // if we allocated it! + uint32_t guest_object_ptr_ = 0; private: std::atomic pointer_ref_count_; @@ -205,9 +209,7 @@ class XObject { std::vector handles_; std::string name_; // May be zero length. - // Guest pointer for kernel object. Remember: X_OBJECT_HEADER precedes this - // if we allocated it! - uint32_t guest_object_ptr_ = 0; + bool allocated_guest_object_ = false; }; diff --git a/src/xenia/kernel/xsemaphore.cc b/src/xenia/kernel/xsemaphore.cc index 34b960b252..f1aa6768a1 100644 --- a/src/xenia/kernel/xsemaphore.cc +++ b/src/xenia/kernel/xsemaphore.cc @@ -11,6 +11,8 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" namespace xe { namespace kernel { @@ -21,75 +23,36 @@ XSemaphore::XSemaphore(KernelState* kernel_state) XSemaphore::~XSemaphore() = default; bool XSemaphore::Initialize(int32_t initial_count, int32_t maximum_count) { - assert_false(semaphore_); - - CreateNative(sizeof(X_KSEMAPHORE)); - - maximum_count_ = maximum_count; - semaphore_ = xe::threading::Semaphore::Create(initial_count, maximum_count); - return !!semaphore_; + auto context = cpu::ThreadState::Get()->context(); + uint32_t guest_objptr = 0; + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->ExSemaphoreObjectType, nullptr, + sizeof(X_KSEMAPHORE), &guest_objptr, context); + + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); + + auto ksem = context->TranslateVirtual(guest_objptr); + xboxkrnl::xeKeInitializeSemaphore(ksem, initial_count, maximum_count); + SetNativePointer(guest_objptr); + return true; } bool XSemaphore::InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header) { - assert_false(semaphore_); - - auto semaphore = reinterpret_cast(native_ptr); - maximum_count_ = semaphore->limit; - semaphore_ = xe::threading::Semaphore::Create(semaphore->header.signal_state, - semaphore->limit); - return !!semaphore_; -} - -int32_t XSemaphore::ReleaseSemaphore(int32_t release_count) { - int32_t previous_count = 0; - semaphore_->Release(release_count, &previous_count); - return previous_count; + return true; } bool XSemaphore::Save(ByteStream* stream) { - if (!SaveObject(stream)) { - return false; - } - - // Get the free number of slots from the semaphore. - uint32_t free_count = 0; - while ( - threading::Wait(semaphore_.get(), false, std::chrono::milliseconds(0)) == - threading::WaitResult::kSuccess) { - free_count++; - } - - XELOGD("XSemaphore {:08X} (count {}/{})", handle(), free_count, - maximum_count_); - - // Restore the semaphore back to its previous count. - semaphore_->Release(free_count, nullptr); - - stream->Write(maximum_count_); - stream->Write(free_count); return true; } object_ref XSemaphore::Restore(KernelState* kernel_state, ByteStream* stream) { - auto sem = new XSemaphore(nullptr); - sem->kernel_state_ = kernel_state; - - if (!sem->RestoreObject(stream)) { - return nullptr; - } - - sem->maximum_count_ = stream->Read(); - auto free_count = stream->Read(); - XELOGD("XSemaphore {:08X} (count {}/{})", sem->handle(), free_count, - sem->maximum_count_); - - sem->semaphore_ = - threading::Semaphore::Create(free_count, sem->maximum_count_); - assert_not_null(sem->semaphore_); - return object_ref(sem); + return object_ref(nullptr); } } // namespace kernel diff --git a/src/xenia/kernel/xsemaphore.h b/src/xenia/kernel/xsemaphore.h index db06f9ee2d..3b45abd782 100644 --- a/src/xenia/kernel/xsemaphore.h +++ b/src/xenia/kernel/xsemaphore.h @@ -12,7 +12,7 @@ #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" #include "xenia/kernel/xthread.h" namespace xe { namespace kernel { @@ -28,20 +28,16 @@ class XSemaphore : public XObject { [[nodiscard]] bool InitializeNative(void* native_ptr, X_DISPATCH_HEADER* header); - int32_t ReleaseSemaphore(int32_t release_count); - bool Save(ByteStream* stream) override; static object_ref Restore(KernelState* kernel_state, ByteStream* stream); protected: xe::threading::WaitHandle* GetWaitHandle() override { - return semaphore_.get(); + return nullptr; } private: - std::unique_ptr semaphore_; - uint32_t maximum_count_ = 0; }; } // namespace kernel diff --git a/src/xenia/kernel/xsymboliclink.h b/src/xenia/kernel/xsymboliclink.h index 12e339250d..ac2c083a3a 100644 --- a/src/xenia/kernel/xsymboliclink.h +++ b/src/xenia/kernel/xsymboliclink.h @@ -16,7 +16,7 @@ #include "xenia/base/mutex.h" #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 130f4f06aa..8bfa7bcfc0 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -23,28 +23,39 @@ #include "xenia/cpu/ppc/ppc_decode_data.h" #include "xenia/cpu/processor.h" #include "xenia/emulator.h" +#include "xenia/kernel/kernel_guest_structures.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" + #include "xenia/kernel/xevent.h" #include "xenia/kernel/xmutant.h" -DEFINE_bool(ignore_thread_priorities, true, - "Ignores game-specified thread priorities.", "Kernel"); -DEFINE_bool(ignore_thread_affinities, true, - "Ignores game-specified thread affinities.", "Kernel"); - -#if 0 -DEFINE_int64(stack_size_multiplier_hack, 1, - "A hack for games with setjmp/longjmp issues.", "Kernel"); -DEFINE_int64(main_xthread_stack_size_multiplier_hack, 1, - "A hack for games with setjmp/longjmp issues.", "Kernel"); -#endif + +#define LOOKUP_XTHREAD_FROM_KTHREAD 1 namespace xe { namespace kernel { +X_KPCR* GetKPCR() { return GetKPCR(cpu::ThreadState::Get()->context()); } +X_KPCR* GetKPCR(PPCContext* context) { +#if XE_COMPARISON_BUILD + return reinterpret_cast(context->kpcr); +#else + return context->TranslateVirtualGPR(context->r[13]); +#endif +} + +X_KTHREAD* GetKThread() { + return GetKThread(cpu::ThreadState::Get()->context()); +} +X_KTHREAD* GetKThread(PPCContext* context) { + return context->TranslateVirtual(GetKPCR(context)->prcb_data.current_thread); +} +X_KTHREAD* GetKThread(cpu::ppc::PPCContext* context, X_KPCR* pcr) { + return context->TranslateVirtual(pcr->prcb_data.current_thread); +} const uint32_t XAPC::kSize; -const uint32_t XAPC::kDummyKernelRoutine; -const uint32_t XAPC::kDummyRundownRoutine; using xe::cpu::ppc::PPCOpcode; @@ -60,14 +71,17 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t start_context, uint32_t creation_flags, bool guest_thread, bool main_thread, uint32_t guest_process) : XObject(kernel_state, kObjectType, !guest_thread), - thread_id_(++next_xthread_id_), guest_thread_(guest_thread), main_thread_(main_thread) { creation_params_.stack_size = stack_size; creation_params_.xapi_thread_startup = xapi_thread_startup; creation_params_.start_address = start_address; creation_params_.start_context = start_context; + X_HANDLE threadid_handle; + kernel_state->object_table()->DuplicateHandle(this->handle(), + &threadid_handle); + thread_id_ = threadid_handle; // top 8 bits = processor ID (or 0 for default) // bit 0 = 1 to create suspended creation_params_.creation_flags = creation_flags; @@ -79,6 +93,36 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, creation_params_.guest_process = guest_process; // The kernel does not take a reference. We must unregister in the dtor. kernel_state_->RegisterThread(this); + + // Allocate thread state block from heap. + // https://web.archive.org/web/20170704035330/https://www.microsoft.com/msj/archive/S2CE.aspx + // This is set as r13 for user code and some special inlined Win32 calls + // (like GetLastError/etc) will poke it directly. + // We try to use it as our primary store of data just to keep things all + // consistent. + // 0x000: pointer to tls data + // 0x100: pointer to TEB(?) + // 0x10C: Current CPU(?) + // 0x150: if >0 then error states don't get set (DPC active bool?) + // TEB: + // 0x14C: thread id + // 0x160: last error + // So, at offset 0x100 we have a 4b pointer to offset 200, then have the + // structure. + // pcr_address_ = memory()->SystemHeapAlloc(0x2D8); + // if (!pcr_address_) { + + //} + + // Allocate processor thread state. + // This is thread safe. + thread_state_ = cpu::ThreadState::Create(kernel_state->processor(), + this->handle(), stack_base_, 0); + XELOGI("XThread{:08X} ({:X}) Stack: {:08X}-{:08X}", handle(), handle(), + stack_limit_, stack_base_); + + // Exports use this to get the kernel. + thread_state_->context()->kernel_state = kernel_state_; } XThread::~XThread() { @@ -86,57 +130,99 @@ XThread::~XThread() { kernel_state_->UnregisterThread(this); // Notify processor of our impending destruction. - emulator()->processor()->OnThreadDestroyed(thread_id_); + emulator()->processor()->OnThreadDestroyed(thread_id()); - thread_.reset(); + fiber_.reset(); if (thread_state_) { delete thread_state_; } kernel_state()->memory()->SystemHeapFree(tls_static_address_); - kernel_state()->memory()->SystemHeapFree(pcr_address_); - FreeStack(); +} + +bool XThread::IsInThread() { + // return Thread::IsInThread(); + xenia_assert(false); + return false; +} + +XThread* XThread::FromGuest(X_KTHREAD* guest_thread) { + return reinterpret_cast(guest_thread->host_xthread_stash); +} - if (thread_) { - // TODO(benvanik): platform kill - XELOGE("Thread disposed without exiting"); +#if !LOOKUP_XTHREAD_FROM_KTHREAD +static threading::TlsHandle g_current_xthread_fls = + threading::kInvalidTlsHandle; + +struct handle_initializer_t { + handle_initializer_t() { + g_current_xthread_fls = threading::AllocateFlsHandle(); } + ~handle_initializer_t() { threading::FreeFlsHandle(g_current_xthread_fls); } +} handle_initializer; + +static XThread* GetFlsXThread() { + return reinterpret_cast( + threading::GetFlsValue(g_current_xthread_fls)); +} + +bool XThread::IsInThread(XThread* other) { return GetFlsXThread() == other; } + +XThread* XThread::GetCurrentThread() { + XThread* thread = GetFlsXThread(); + if (!thread) { + // assert_always("Attempting to use guest stuff from a non-guest thread."); + } else { + thread->assert_valid(); + } + return thread; +} + +void XThread::SetCurrentThread(XThread* thrd) { + threading::SetFlsValue(g_current_xthread_fls, (uintptr_t)thrd); } -thread_local XThread* current_xthread_tls_ = nullptr; +#else +static XThread* GetFlsXThread() { + auto context = cpu::ThreadState::GetContext(); -bool XThread::IsInThread() { return Thread::IsInThread(); } + auto kthread = GetKThread(context); -bool XThread::IsInThread(XThread* other) { - return current_xthread_tls_ == other; + return XThread::FromGuest(kthread); } +bool XThread::IsInThread(XThread* other) { return GetFlsXThread() == other; } + XThread* XThread::GetCurrentThread() { - XThread* thread = reinterpret_cast(current_xthread_tls_); + XThread* thread = GetFlsXThread(); if (!thread) { - assert_always("Attempting to use guest stuff from a non-guest thread."); + // assert_always("Attempting to use guest stuff from a non-guest thread."); + } else { + thread->assert_valid(); } return thread; } +void XThread::SetCurrentThread(XThread* thrd) { + // threading::SetFlsValue(g_current_xthread_fls, (uintptr_t)thrd); +} +#endif + +void XThread::SetCurrentThread() { SetCurrentThread(this); } + uint32_t XThread::GetCurrentThreadHandle() { XThread* thread = XThread::GetCurrentThread(); return thread->handle(); } -uint32_t XThread::GetCurrentThreadId() { - XThread* thread = XThread::GetCurrentThread(); - return thread->guest_object()->thread_id; -} +uint32_t XThread::GetCurrentThreadId() { return GetKThread()->thread_id; } -uint32_t XThread::GetLastError() { - XThread* thread = XThread::GetCurrentThread(); - return thread->last_error(); -} +uint32_t XThread::GetLastError() { return GetKThread()->last_error; } void XThread::SetLastError(uint32_t error_code) { - XThread* thread = XThread::GetCurrentThread(); - thread->set_last_error(error_code); + // XThread* thread = XThread::GetCurrentThread(); + // thread->set_last_error(error_code); + GetKThread()->last_error = error_code; } uint32_t XThread::last_error() { return guest_object()->last_error; } @@ -146,134 +232,129 @@ void XThread::set_last_error(uint32_t error_code) { } void XThread::set_name(const std::string_view name) { - thread_name_ = fmt::format("{} ({:08X})", name, handle()); - - if (thread_) { - // May be getting set before the thread is created. - // One the thread is ready it will handle it. - thread_->set_name(thread_name_); - } -} - -static uint8_t next_cpu = 0; -static uint8_t GetFakeCpuNumber(uint8_t proc_mask) { - // NOTE: proc_mask is logical processors, not physical processors or cores. - if (!proc_mask) { - next_cpu = (next_cpu + 1) % 6; - return next_cpu; // is this reasonable? - // TODO(Triang3l): Does the following apply here? - // https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores - // "On Xbox 360, you must explicitly assign software threads to a particular - // hardware thread by using XSetThreadProcessor. Otherwise, all child - // threads will stay on the same hardware thread as the parent." - } - assert_false(proc_mask & 0xC0); - - uint8_t cpu_number = 7 - xe::lzcnt(proc_mask); - assert_true(1 << cpu_number == proc_mask); - assert_true(cpu_number < 6); - return cpu_number; + fiber()->set_name(std::string{name}); } void XThread::InitializeGuestObject() { + /* + * not doing this right at all! we're not using our threads context, because + * we may be on the host and have no underlying context. in reality we should + * have a context and acquire any locks using that context! + */ + auto context_here = cpu::ThreadState::GetContext(); auto guest_thread = guest_object(); auto thread_guest_ptr = guest_object(); guest_thread->header.type = 6; - guest_thread->suspend_count = - (creation_params_.creation_flags & X_CREATE_SUSPENDED) ? 1 : 0; - - guest_thread->unk_10 = (thread_guest_ptr + 0x10); - guest_thread->unk_14 = (thread_guest_ptr + 0x10); - guest_thread->unk_40 = (thread_guest_ptr + 0x20); - guest_thread->unk_44 = (thread_guest_ptr + 0x20); - guest_thread->unk_48 = (thread_guest_ptr); - uint32_t v6 = thread_guest_ptr + 0x18; - *(uint32_t*)&guest_thread->unk_54 = 16777729; - guest_thread->unk_4C = (v6); - guest_thread->stack_base = (this->stack_base_); - guest_thread->stack_limit = (this->stack_limit_); - guest_thread->stack_kernel = (this->stack_base_ - 240); - guest_thread->tls_address = (this->tls_static_address_); - guest_thread->thread_state = 0; + util::XeInitializeListHead(&guest_thread->header.wait_list, context_here); + auto guest_globals = kernel_state()->GetKernelGuestGlobals(context_here); + util::XeInitializeListHead(&guest_thread->mutants_list, memory()); uint32_t process_info_block_address = creation_params_.guest_process ? creation_params_.guest_process : this->kernel_state_->GetTitleProcess(); X_KPROCESS* process = memory()->TranslateVirtual(process_info_block_address); - uint32_t kpcrb = pcr_address_ + offsetof(X_KPCR, prcb_data); - auto process_type = process->process_type; + + xboxkrnl::xeKeInitializeTimerEx(&guest_thread->wait_timeout_timer, 0, + process_type, context_here); + + xboxkrnl::xeKeInitializeApc(&guest_thread->on_suspend, thread_guest_ptr, + guest_globals->guest_nullsub, 0, + guest_globals->suspendthread_apc_routine, 0, 0); + + xboxkrnl::xeKeInitializeSemaphore(&guest_thread->suspend_sema, 0, 2); + + guest_thread->wait_timeout_block.object = + memory()->HostToGuestVirtual(&guest_thread->wait_timeout_timer); + guest_thread->wait_timeout_block.wait_type = 1; + guest_thread->wait_timeout_block.thread = thread_guest_ptr; + + auto timer_wait_header_list_entry = memory()->HostToGuestVirtual( + &guest_thread->wait_timeout_timer.header.wait_list); + + guest_thread->wait_timeout_block.wait_list_entry.blink_ptr = + timer_wait_header_list_entry; + guest_thread->wait_timeout_block.wait_list_entry.flink_ptr = + timer_wait_header_list_entry; + guest_thread->wait_timeout_block.wait_result_xstatus = X_STATUS_TIMEOUT; + guest_thread->stack_base = (this->stack_base_); + guest_thread->stack_limit = (this->stack_limit_); + guest_thread->stack_kernel = (this->stack_base_ - 240); + guest_thread->tls_address = this->tls_dynamic_address_; + guest_thread->thread_state = KTHREAD_STATE_INITIALIZED; + guest_thread->process_type_dup = process_type; guest_thread->process_type = process_type; guest_thread->apc_lists[0].Initialize(memory()); guest_thread->apc_lists[1].Initialize(memory()); - guest_thread->a_prcb_ptr = kpcrb; - guest_thread->another_prcb_ptr = kpcrb; + auto current_pcr = GetKPCR(context_here); + guest_thread->a_prcb_ptr = ¤t_pcr->prcb_data; + guest_thread->another_prcb_ptr = ¤t_pcr->prcb_data; + guest_thread->current_cpu = current_pcr->prcb_data.current_cpu; guest_thread->may_queue_apcs = 1; guest_thread->msr_mask = 0xFDFFD7FF; guest_thread->process = process_info_block_address; guest_thread->stack_alloc_base = this->stack_base_; - guest_thread->create_time = Clock::QueryGuestSystemTime(); - guest_thread->unk_144 = thread_guest_ptr + 324; - guest_thread->unk_148 = thread_guest_ptr + 324; - guest_thread->thread_id = this->thread_id_; + guest_thread->create_time = context_here->kernel_state->GetKernelSystemTime(); + util::XeInitializeListHead(&guest_thread->timer_list, memory()); + + guest_thread->thread_id = thread_id_; guest_thread->start_address = this->creation_params_.start_address; - guest_thread->unk_154 = thread_guest_ptr + 340; + util::XeInitializeListHead(&guest_thread->unk_154, context_here); uint32_t v9 = thread_guest_ptr; guest_thread->last_error = 0; - guest_thread->unk_158 = v9 + 340; guest_thread->creation_flags = this->creation_params_.creation_flags; - guest_thread->unk_17C = 1; - /* - * not doing this right at all! we're not using our threads context, because - * we may be on the host and have no underlying context. in reality we should - * have a context and acquire any locks using that context! - */ - auto context_here = thread_state_->context(); + guest_thread->host_xthread_stash = reinterpret_cast(this); + + guest_thread->thread_state = KTHREAD_STATE_INITIALIZED; + + // priority related values + guest_thread->unk_C8 = process->unk_18; + auto v19 = process->unk_19; + guest_thread->unk_C9 = v19; + auto v20 = process->unk_1A; + guest_thread->unk_B9 = v19; + guest_thread->priority = v19; + guest_thread->unk_CA = v20; + // timeslice related + guest_thread->quantum = process->quantum; + + guest_thread->tls_address = tls_static_address_; + + guest_thread->stack_base = stack_base_; + guest_thread->stack_limit = stack_limit_; + kernel_state()->InitKernelAuxstack(guest_thread); auto old_irql = xboxkrnl::xeKeKfAcquireSpinLock( context_here, &process->thread_list_spinlock); + context_here->kernel_state->LockDispatcherAtIrql(context_here); // todo: acquire dispatcher lock here? util::XeInsertTailList(&process->thread_list, &guest_thread->process_threads, context_here); process->thread_count += 1; + context_here->kernel_state->UnlockDispatcherAtIrql(context_here); // todo: release dispatcher lock here? xboxkrnl::xeKeKfReleaseSpinLock(context_here, &process->thread_list_spinlock, old_irql); } -bool XThread::AllocateStack(uint32_t size) { - auto heap = memory()->LookupHeap(kStackAddressRangeBegin); - - auto alignment = heap->page_size(); - auto padding = heap->page_size() * 2; // Guard page size * 2 - size = xe::round_up(size, alignment); - auto actual_size = size + padding; - - uint32_t address = 0; - if (!heap->AllocRange( - kStackAddressRangeBegin, kStackAddressRangeEnd, actual_size, - alignment, kMemoryAllocationReserve | kMemoryAllocationCommit, - kMemoryProtectRead | kMemoryProtectWrite, false, &address)) { - return false; - } - - stack_alloc_base_ = address; - stack_alloc_size_ = actual_size; - stack_limit_ = address + (padding / 2); - stack_base_ = stack_limit_ + size; +uint32_t XThread::thread_id() const { return thread_id_; } - // Initialize the stack with junk - memory()->Fill(stack_alloc_base_, actual_size, 0xBE); +bool XThread::AllocateStack(uint32_t size) { + uint32_t kstack = xboxkrnl::xeMmCreateKernelStack( + size, 1); // if 0, allocates for current process type. if 1, allocates + // title memory, if 2, allocates system memory - // Setup the guard pages - heap->Protect(stack_alloc_base_, padding / 2, kMemoryProtectNoAccess); - heap->Protect(stack_base_, padding / 2, kMemoryProtectNoAccess); + stack_alloc_base_ = kstack; + stack_alloc_size_ = size; + stack_limit_ = kstack - size; + stack_base_ = kstack; + thread_state_->context()->r[1] = kstack; return true; } @@ -281,7 +362,11 @@ bool XThread::AllocateStack(uint32_t size) { void XThread::FreeStack() { if (stack_alloc_base_) { auto heap = memory()->LookupHeap(kStackAddressRangeBegin); - heap->Release(stack_alloc_base_); + uint32_t region_size = 0; + heap->Release(stack_alloc_base_, ®ion_size); + xenia_assert(region_size); + kernel_state()->object_table()->FlushGuestToHostMapping(stack_alloc_base_, + region_size); stack_alloc_base_ = 0; stack_alloc_size_ = 0; @@ -291,12 +376,20 @@ void XThread::FreeStack() { } X_STATUS XThread::Create() { - // Thread kernel object. - if (!CreateNative()) { - XELOGW("Unable to allocate thread object"); - return X_STATUS_NO_MEMORY; - } + auto context = cpu::ThreadState::GetContext(); + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); + uint32_t created_object = 0; + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->ExThreadObjectType, nullptr, + sizeof(X_KTHREAD), &created_object, context); + + Retain(); + if (create_status != X_STATUS_SUCCESS) { + return create_status; + } + SetNativePointer(created_object); // Allocate a stack. if (!AllocateStack(creation_params_.stack_size)) { return X_STATUS_NO_MEMORY; @@ -339,82 +432,52 @@ X_STATUS XThread::Create() { tls_header->raw_data_size); } - // Allocate thread state block from heap. - // https://web.archive.org/web/20170704035330/https://www.microsoft.com/msj/archive/S2CE.aspx - // This is set as r13 for user code and some special inlined Win32 calls - // (like GetLastError/etc) will poke it directly. - // We try to use it as our primary store of data just to keep things all - // consistent. - // 0x000: pointer to tls data - // 0x100: pointer to TEB(?) - // 0x10C: Current CPU(?) - // 0x150: if >0 then error states don't get set (DPC active bool?) - // TEB: - // 0x14C: thread id - // 0x160: last error - // So, at offset 0x100 we have a 4b pointer to offset 200, then have the - // structure. - pcr_address_ = memory()->SystemHeapAlloc(0x2D8); - if (!pcr_address_) { - XELOGW("Unable to allocate thread state block"); - return X_STATUS_NO_MEMORY; - } - - // Allocate processor thread state. - // This is thread safe. - thread_state_ = new cpu::ThreadState(kernel_state()->processor(), thread_id_, - stack_base_, pcr_address_); - XELOGI("XThread{:08X} ({:X}) Stack: {:08X}-{:08X}", handle(), thread_id_, - stack_limit_, stack_base_); - - // Exports use this to get the kernel. - thread_state_->context()->kernel_state = kernel_state_; - - uint8_t cpu_index = GetFakeCpuNumber( - static_cast(creation_params_.creation_flags >> 24)); + // Assign the newly created thread to the logical processor, and also set up + // the current CPU in KPCR and KTHREAD + // SetActiveCpu(cpu_index, true); // Initialize the KTHREAD object. InitializeGuestObject(); - X_KPCR* pcr = memory()->TranslateVirtual(pcr_address_); - - pcr->tls_ptr = tls_static_address_; - pcr->pcr_ptr = pcr_address_; - pcr->prcb_data.current_thread = guest_object(); - pcr->prcb = pcr_address_ + offsetof(X_KPCR, prcb_data); - pcr->host_stash = reinterpret_cast(thread_state_->context()); - pcr->stack_base_ptr = stack_base_; - pcr->stack_end_ptr = stack_limit_; - - pcr->prcb_data.dpc_active = 0; // DPC active bool? - - // Always retain when starting - the thread owns itself until exited. - RetainHandle(); - - xe::threading::Thread::CreationParameters params; - - params.create_suspended = true; + xe::threading::Fiber::CreationParameters params; params.stack_size = 16_MiB; // Allocate a big host stack. - thread_ = xe::threading::Thread::Create(params, [this]() { - // Set thread ID override. This is used by logging. - xe::threading::set_current_thread_id(handle()); - // Set name immediately, if we have one. - thread_->set_name(thread_name_); - - // Profiler needs to know about the thread. - xe::Profiler::ThreadEnter(thread_name_.c_str()); + if ((creation_params_.creation_flags & XE_FLAG_THREAD_INITIALLY_SUSPENDED) != + 0) { + xboxkrnl::xeKeSuspendThread(cpu::ThreadState::GetContext(), + guest_object()); + } + uint32_t affinity_by = + static_cast(creation_params_.creation_flags >> 24); + if (affinity_by) { + SetAffinity(affinity_by); + } + // todo: not sure about this! + if (creation_params()->creation_flags & XE_FLAG_PRIORITY_CLASS2) { + xboxkrnl::xeKeSetPriorityClassThread(cpu::ThreadState::GetContext(), + guest_object(), false); + + } else if ((creation_params()->creation_flags & XE_FLAG_PRIORITY_CLASS1) != + 0) { + xboxkrnl::xeKeSetPriorityClassThread(cpu::ThreadState::GetContext(), + guest_object(), true); + } + fiber_ = xe::threading::Fiber::Create(params, [this]() { + // Execute user code. +#if !LOOKUP_XTHREAD_FROM_KTHREAD + threading::SetFlsValue(g_current_xthread_fls, (uintptr_t)this); +#endif + cpu::ThreadState::Bind(thread_state_); + xenia_assert(GetKThread() == this->guest_object()); - // Execute user code. - current_xthread_tls_ = this; - current_thread_ = this; - cpu::ThreadState::Bind(this->thread_state()); + xenia_assert(static_cast(thread_state_->context()->r[13]) != + thread_state_->context() + ->kernel_state->GetDispatcherLock(thread_state_->context()) + ->pcr_of_owner); running_ = true; Execute(); running_ = false; - current_thread_ = nullptr; - current_xthread_tls_ = nullptr; xe::Profiler::ThreadExit(); @@ -422,80 +485,92 @@ X_STATUS XThread::Create() { ReleaseHandle(); }); - if (!thread_) { + if (!fiber_) { // TODO(benvanik): translate error? XELOGE("CreateThread failed"); return X_STATUS_NO_MEMORY; } - - // Set the thread name based on host ID (for easier debugging). - if (thread_name_.empty()) { - set_name(fmt::format("XThread{:04X}", thread_->system_id())); - } - - if (creation_params_.creation_flags & 0x60) { - thread_->set_priority(creation_params_.creation_flags & 0x20 ? 1 : 0); - } - - // Assign the newly created thread to the logical processor, and also set up - // the current CPU in KPCR and KTHREAD. - SetActiveCpu(cpu_index); - - // Notify processor of our creation. - emulator()->processor()->OnThreadCreated(handle(), thread_state_, this); - - if ((creation_params_.creation_flags & X_CREATE_SUSPENDED) == 0) { - // Start the thread now that we're all setup. - thread_->Resume(); - } - + Schedule(); return X_STATUS_SUCCESS; } X_STATUS XThread::Exit(int exit_code) { // This may only be called on the thread itself. assert_true(XThread::GetCurrentThread() == this); + auto cpu_context = thread_state_->context(); + xboxkrnl::xeKfLowerIrql(cpu_context, IRQL_PASSIVE); + + kernel_state()->OnThreadExit(this); + + // Notify processor of our exit. + emulator()->processor()->OnThreadExit(thread_id()); + running_ = false; + // TODO(chrispy): not sure if this order is correct, should it come after // apcs? auto kthread = guest_object(); - auto cpu_context = thread_state_->context(); + kthread->terminated = 1; + xenia_assert(util::XeIsListEmpty(&kthread->timer_list, cpu_context)); + + xboxkrnl::xeKeEnterCriticalRegion(cpu_context); + uint32_t old_irql2 = + xboxkrnl::xeKeKfAcquireSpinLock(cpu_context, &kthread->apc_lock); + + kthread->may_queue_apcs = 0; + // also does some stuff with the suspendsemaphore here, which doesnt make + // sense to me the thread is already running + + xboxkrnl::xeKeKfReleaseSpinLock(cpu_context, &kthread->apc_lock, old_irql2); + xboxkrnl::xeKeLeaveCriticalRegion(cpu_context); + // TODO(benvanik): dispatch events? waiters? etc? RundownAPCs(); - // Set exit code. - kthread->header.signal_state = 1; - kthread->exit_status = exit_code; - auto kprocess = cpu_context->TranslateVirtual(kthread->process); uint32_t old_irql = xboxkrnl::xeKeKfAcquireSpinLock( cpu_context, &kprocess->thread_list_spinlock); - util::XeRemoveEntryList(&kthread->process_threads, cpu_context); + // xe::FatalError("Brokey!"); + // NOTE: this does not return! + // xe::threading::Thread::Exit(exit_code); + // return X_STATUS_SUCCESS; + kernel_state()->LockDispatcherAtIrql(cpu_context); - kprocess->thread_count = kprocess->thread_count - 1; + uint32_t queue_guest = kthread->queue; - xboxkrnl::xeKeKfReleaseSpinLock(cpu_context, &kprocess->thread_list_spinlock, - old_irql); + if (queue_guest) { + util::XeRemoveEntryList(&kthread->queue_related, cpu_context); + xboxkrnl::xeKeSignalQueue( + cpu_context, cpu_context->TranslateVirtual(queue_guest)); + } - kernel_state()->OnThreadExit(this); + // Set exit code. + kthread->header.signal_state = 1; + kthread->exit_status = exit_code; - // Notify processor of our exit. - emulator()->processor()->OnThreadExit(thread_id_); + if (!util::XeIsListEmpty(&kthread->header.wait_list, cpu_context)) { + xboxkrnl::xeDispatchSignalStateChange(cpu_context, &kthread->header, 0); + } + util::XeRemoveEntryList(&kthread->process_threads, cpu_context); - // NOTE: unless PlatformExit fails, expect it to never return! - current_xthread_tls_ = nullptr; - current_thread_ = nullptr; - xe::Profiler::ThreadExit(); + xboxkrnl::xeKeKfReleaseSpinLock(cpu_context, &kprocess->thread_list_spinlock, + 0, false); + kthread->thread_state = KTHREAD_STATE_TERMINATED; - running_ = false; - ReleaseHandle(); + util::XeInsertHeadList( + &GetKPCR(cpu_context)->prcb_data.terminating_threads_list, + &kthread->ready_prcb_entry, cpu_context); - // NOTE: this does not return! - xe::threading::Thread::Exit(exit_code); - return X_STATUS_SUCCESS; + // unsure about these args + xboxkrnl::xeKeInsertQueueDpc(&GetKPCR(cpu_context)->prcb_data.thread_exit_dpc, + 0, 0, cpu_context); + + xenia_assert(kthread->mutants_list.empty(cpu_context)); + fiber_->SetTerminated(); + return xboxkrnl::xeSchedulerSwitchThread2(cpu_context); } X_STATUS XThread::Terminate(int exit_code) { @@ -507,14 +582,16 @@ X_STATUS XThread::Terminate(int exit_code) { thread->exit_status = exit_code; // Notify processor of our exit. - emulator()->processor()->OnThreadExit(thread_id_); + emulator()->processor()->OnThreadExit(thread_id()); running_ = false; + + xe::FatalError("XThread::Terminate brokey"); if (XThread::IsInThread(this)) { ReleaseHandle(); xe::threading::Thread::Exit(exit_code); } else { - thread_->Terminate(exit_code); + // thread_->Terminate(exit_code); ReleaseHandle(); } @@ -533,18 +610,27 @@ class reenter_exception { void XThread::Execute() { XELOGKERNEL("XThread::Execute thid {} (handle={:08X}, '{}', native={:08X})", - thread_id_, handle(), thread_name_, thread_->system_id()); - // Let the kernel know we are starting. - kernel_state()->OnThreadExecute(this); + thread_id(), handle(), "", 69420); - // All threads get a mandatory sleep. This is to deal with some buggy - // games that are assuming the 360 is so slow to create threads that they - // have time to initialize shared structures AFTER CreateThread (RR). - xe::threading::Sleep(std::chrono::milliseconds(10)); + auto context = thread_state_->context(); + auto kthrd = guest_object(); + // hack!!! not sure atm why the priority is wrong + if (main_thread_) { + xboxkrnl::xeKeSetPriorityThread(context, kthrd, 0xE); + } + // i believe this is possible. it just means the thread switching dpc hasnt + // run yet + // xenia_assert(context->TranslateVirtual(kthrd->another_prcb_ptr) == + // &GetKPCR(context)->prcb_data); + cpu::ppc::PPCGprSnapshot snapshot{}; + context->TakeGPRSnapshot(&snapshot); + xboxkrnl::xeKfLowerIrql(thread_state_->context(), IRQL_PASSIVE); - // Dispatch any APCs that were queued before the thread was created first. - DeliverAPCs(); + assert_valid(); + // Let the kernel know we are starting. + kernel_state()->OnThreadExecute(this); + context->RestoreGPRSnapshot(&snapshot); uint32_t address; std::vector args; bool want_exit_code; @@ -592,6 +678,7 @@ void XThread::Execute() { } void XThread::Reenter(uint32_t address) { + assert_valid(); // TODO(gibbed): Maybe use setjmp/longjmp on Windows? // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/longjmp#remarks // On Windows with /EH, setjmp/longjmp do stack unwinding. @@ -599,16 +686,6 @@ void XThread::Reenter(uint32_t address) { throw reenter_exception(address); } -void XThread::EnterCriticalRegion() { - guest_object()->apc_disable_count--; -} - -void XThread::LeaveCriticalRegion() { - auto kthread = guest_object(); - // this has nothing to do with user mode apcs! - auto apc_disable_count = ++kthread->apc_disable_count; -} - void XThread::EnqueueApc(uint32_t normal_routine, uint32_t normal_context, uint32_t arg1, uint32_t arg2) { // don't use thread_state_ -> context() ! we're not running on the thread @@ -619,382 +696,107 @@ void XThread::EnqueueApc(uint32_t normal_routine, uint32_t normal_context, xenia_assert(success == X_STATUS_SUCCESS); } - -void XThread::SetCurrentThread() { current_xthread_tls_ = this; } - void XThread::DeliverAPCs() { + xenia_assert(GetKThread() == guest_object()); // https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1 // https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7 xboxkrnl::xeProcessUserApcs(thread_state_->context()); } void XThread::RundownAPCs() { + xenia_assert(GetKThread() == guest_object()); xboxkrnl::xeRundownApcs(thread_state_->context()); } -int32_t XThread::QueryPriority() { return thread_->priority(); } - -void XThread::SetPriority(int32_t increment) { - if (is_guest_thread()) { - guest_object()->priority = static_cast(increment); - } - priority_ = increment; - int32_t target_priority = 0; - if (increment > 0x22) { - target_priority = xe::threading::ThreadPriority::kHighest; - } else if (increment > 0x11) { - target_priority = xe::threading::ThreadPriority::kAboveNormal; - } else if (increment < -0x22) { - target_priority = xe::threading::ThreadPriority::kLowest; - } else if (increment < -0x11) { - target_priority = xe::threading::ThreadPriority::kBelowNormal; - } else { - target_priority = xe::threading::ThreadPriority::kNormal; - } - if (!cvars::ignore_thread_priorities) { - thread_->set_priority(target_priority); - } -} - void XThread::SetAffinity(uint32_t affinity) { - SetActiveCpu(GetFakeCpuNumber(affinity)); + auto context = cpu::ThreadState::GetContext(); + + uint32_t prev_affinity = 0; + xboxkrnl::xeKeSetAffinityThread(context, guest_object(), affinity, + &prev_affinity); } uint8_t XThread::active_cpu() const { - const X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); - return pcr.prcb_data.current_cpu; + return guest_object()->current_cpu; } -void XThread::SetActiveCpu(uint8_t cpu_index) { - // May be called during thread creation - don't skip if current == new. - - assert_true(cpu_index < 6); +cpu::HWThread* XThread::HWThread() { + uint32_t cpunum = active_cpu(); - X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); - pcr.prcb_data.current_cpu = cpu_index; - - if (is_guest_thread()) { - X_KTHREAD& thread_object = - *memory()->TranslateVirtual(guest_object()); - thread_object.current_cpu = cpu_index; - } + return kernel_state()->processor()->GetCPUThread(cpunum); +} +void XThread::Schedule() { + auto context = + cpu::ThreadState::Get()->context(); // thread_state()->context(); + uint32_t old_irql = kernel_state()->LockDispatcher(context); + xboxkrnl::xeReallyQueueThread(context, guest_object()); + xboxkrnl::xeDispatcherSpinlockUnlock( + context, kernel_state()->GetDispatcherLock(context), old_irql); +} - if (xe::threading::logical_processor_count() >= 6) { - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(uint64_t(1) << cpu_index); - } - } else { - // there no good reason why we need to log this... we don't perfectly - // emulate the 360's scheduler in any way - // XELOGW("Too few processor cores - scheduling will be wonky"); - } +void XThread::SwitchToDirect() { + xenia_assert(cpu::ThreadState::Get() != thread_state()); + xenia_assert(fiber() != threading::Fiber::GetCurrentFiber()); + GetKPCR()->prcb_data.current_thread = guest_object(); + fiber()->SwitchTo(); } -bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) { - if (slot * 4 > tls_total_size_) { - return false; - } +void XThread::assert_valid() { + auto current_threadstate = cpu::ThreadState::Get(); + auto expected_threadstate = thread_state(); - auto mem = memory()->TranslateVirtual(tls_dynamic_address_ + slot * 4); - *value_out = xe::load_and_swap(mem); - return true; -} + xenia_assert(current_threadstate == expected_threadstate); -bool XThread::SetTLSValue(uint32_t slot, uint32_t value) { - if (slot * 4 >= tls_total_size_) { - return false; - } + auto context = current_threadstate->context(); - auto mem = memory()->TranslateVirtual(tls_dynamic_address_ + slot * 4); - xe::store_and_swap(mem, value); - return true; + xenia_assert(GetKThread(context) == guest_object()); + + xenia_assert(GetFlsXThread() == this); } uint32_t XThread::suspend_count() { return guest_object()->suspend_count; } -X_STATUS XThread::Resume(uint32_t* out_suspend_count) { - auto guest_thread = guest_object(); - - uint8_t previous_suspend_count = - reinterpret_cast(&guest_thread->suspend_count) - ->fetch_sub(1); - if (out_suspend_count) { - *out_suspend_count = previous_suspend_count; - } - uint32_t unused_host_suspend_count = 0; - if (thread_->Resume(&unused_host_suspend_count)) { - return X_STATUS_SUCCESS; - } else { - return X_STATUS_UNSUCCESSFUL; - } -} - X_STATUS XThread::Suspend(uint32_t* out_suspend_count) { // this normally holds the apc lock for the thread, because it queues a kernel // mode apc that does the actual suspension X_KTHREAD* guest_thread = guest_object(); - uint8_t previous_suspend_count = - reinterpret_cast(&guest_thread->suspend_count) - ->fetch_add(1); + int count = + xboxkrnl::xeKeSuspendThread(cpu::ThreadState::GetContext(), guest_thread); + if (out_suspend_count) { - *out_suspend_count = previous_suspend_count; - } - // If we are suspending ourselves, we can't hold the lock. - uint32_t unused_host_suspend_count = 0; - if (thread_->Suspend(&unused_host_suspend_count)) { - return X_STATUS_SUCCESS; - } else { - return X_STATUS_UNSUCCESSFUL; + *out_suspend_count = count; } + + return 0; } X_STATUS XThread::Delay(uint32_t processor_mode, uint32_t alertable, uint64_t interval) { - int64_t timeout_ticks = interval; - uint32_t timeout_ms; - if (timeout_ticks > 0) { - // Absolute time, based on January 1, 1601. - // TODO(benvanik): convert time to relative time. - assert_always(); - timeout_ms = 0; - } else if (timeout_ticks < 0) { - // Relative time. - timeout_ms = uint32_t(-timeout_ticks / 10000); // Ticks -> MS - } else { - timeout_ms = 0; - } - timeout_ms = Clock::ScaleGuestDurationMillis(timeout_ms); - if (alertable) { - auto result = - xe::threading::AlertableSleep(std::chrono::milliseconds(timeout_ms)); - switch (result) { - default: - case xe::threading::SleepResult::kSuccess: - return X_STATUS_SUCCESS; - case xe::threading::SleepResult::kAlerted: - return X_STATUS_USER_APC; - } - } else { - xe::threading::Sleep(std::chrono::milliseconds(timeout_ms)); - return X_STATUS_SUCCESS; - } + xenia_assert(GetKThread() == guest_object()); + return xboxkrnl::xeKeDelayExecutionThread(cpu::ThreadState::GetContext(), + processor_mode, alertable, + (int64_t*)&interval); } -struct ThreadSavedState { - uint32_t thread_id; - bool is_main_thread; // Is this the main thread? - bool is_running; - - uint32_t apc_head; - uint32_t tls_static_address; - uint32_t tls_dynamic_address; - uint32_t tls_total_size; - uint32_t pcr_address; - uint32_t stack_base; // High address - uint32_t stack_limit; // Low address - uint32_t stack_alloc_base; // Allocation address - uint32_t stack_alloc_size; // Allocation size - - // Context (invalid if not running) - struct { - uint64_t lr; - uint64_t ctr; - uint64_t r[32]; - double f[32]; - vec128_t v[128]; - uint32_t cr[8]; - uint32_t fpscr; - uint8_t xer_ca; - uint8_t xer_ov; - uint8_t xer_so; - uint8_t vscr_sat; - uint32_t pc; - } context; -}; - bool XThread::Save(ByteStream* stream) { - if (!guest_thread_) { - // Host XThreads are expected to be recreated on their own. - return false; - } - - XELOGD("XThread {:08X} serializing...", handle()); - - uint32_t pc = 0; - if (running_) { - pc = emulator()->processor()->StepToGuestSafePoint(thread_id_); - if (!pc) { - XELOGE("XThread {:08X} failed to save: could not step to a safe point!", - handle()); - assert_always(); - return false; - } - } - - if (!SaveObject(stream)) { - return false; - } - - stream->Write(kThreadSaveSignature); - stream->Write(thread_name_); - - ThreadSavedState state; - state.thread_id = thread_id_; - state.is_main_thread = main_thread_; - state.is_running = running_; - state.tls_static_address = tls_static_address_; - state.tls_dynamic_address = tls_dynamic_address_; - state.tls_total_size = tls_total_size_; - state.pcr_address = pcr_address_; - state.stack_base = stack_base_; - state.stack_limit = stack_limit_; - state.stack_alloc_base = stack_alloc_base_; - state.stack_alloc_size = stack_alloc_size_; - - if (running_) { - // Context information - auto context = thread_state_->context(); - state.context.lr = context->lr; - state.context.ctr = context->ctr; - std::memcpy(state.context.r, context->r, 32 * 8); - std::memcpy(state.context.f, context->f, 32 * 8); - std::memcpy(state.context.v, context->v, 128 * 16); - state.context.cr[0] = context->cr0.value; - state.context.cr[1] = context->cr1.value; - state.context.cr[2] = context->cr2.value; - state.context.cr[3] = context->cr3.value; - state.context.cr[4] = context->cr4.value; - state.context.cr[5] = context->cr5.value; - state.context.cr[6] = context->cr6.value; - state.context.cr[7] = context->cr7.value; - state.context.fpscr = context->fpscr.value; - state.context.xer_ca = context->xer_ca; - state.context.xer_ov = context->xer_ov; - state.context.xer_so = context->xer_so; - state.context.vscr_sat = context->vscr_sat; - state.context.pc = pc; - } - - stream->Write(&state, sizeof(ThreadSavedState)); - return true; + xe::FatalError("XThread::Save unimplemented"); + return false; } object_ref XThread::Restore(KernelState* kernel_state, ByteStream* stream) { - // Kind-of a hack, but we need to set the kernel state outside of the object - // constructor so it doesn't register a handle with the object table. - auto thread = new XThread(nullptr); - thread->kernel_state_ = kernel_state; - - if (!thread->RestoreObject(stream)) { - return nullptr; - } - - if (stream->Read() != kThreadSaveSignature) { - XELOGE("Could not restore XThread - invalid magic!"); - return nullptr; - } - - XELOGD("XThread {:08X}", thread->handle()); - - thread->thread_name_ = stream->Read(); - - ThreadSavedState state; - stream->Read(&state, sizeof(ThreadSavedState)); - thread->thread_id_ = state.thread_id; - thread->main_thread_ = state.is_main_thread; - thread->running_ = state.is_running; - thread->tls_static_address_ = state.tls_static_address; - thread->tls_dynamic_address_ = state.tls_dynamic_address; - thread->tls_total_size_ = state.tls_total_size; - thread->pcr_address_ = state.pcr_address; - thread->stack_base_ = state.stack_base; - thread->stack_limit_ = state.stack_limit; - thread->stack_alloc_base_ = state.stack_alloc_base; - thread->stack_alloc_size_ = state.stack_alloc_size; - - // Register now that we know our thread ID. - kernel_state->RegisterThread(thread); - - thread->thread_state_ = - new cpu::ThreadState(kernel_state->processor(), thread->thread_id_, - thread->stack_base_, thread->pcr_address_); - - if (state.is_running) { - auto context = thread->thread_state_->context(); - context->kernel_state = kernel_state; - context->lr = state.context.lr; - context->ctr = state.context.ctr; - std::memcpy(context->r, state.context.r, 32 * 8); - std::memcpy(context->f, state.context.f, 32 * 8); - std::memcpy(context->v, state.context.v, 128 * 16); - context->cr0.value = state.context.cr[0]; - context->cr1.value = state.context.cr[1]; - context->cr2.value = state.context.cr[2]; - context->cr3.value = state.context.cr[3]; - context->cr4.value = state.context.cr[4]; - context->cr5.value = state.context.cr[5]; - context->cr6.value = state.context.cr[6]; - context->cr7.value = state.context.cr[7]; - context->fpscr.value = state.context.fpscr; - context->xer_ca = state.context.xer_ca; - context->xer_ov = state.context.xer_ov; - context->xer_so = state.context.xer_so; - context->vscr_sat = state.context.vscr_sat; - - // Always retain when starting - the thread owns itself until exited. - thread->RetainHandle(); - - xe::threading::Thread::CreationParameters params; - params.create_suspended = true; // Not done restoring yet. - params.stack_size = 16_MiB; - thread->thread_ = xe::threading::Thread::Create(params, [thread, state]() { - // Set thread ID override. This is used by logging. - xe::threading::set_current_thread_id(thread->handle()); - - // Set name immediately, if we have one. - thread->thread_->set_name(thread->name()); - - // Profiler needs to know about the thread. - xe::Profiler::ThreadEnter(thread->name().c_str()); - - current_xthread_tls_ = thread; - current_thread_ = thread; - - // Acquire any mutants - for (auto mutant : thread->pending_mutant_acquires_) { - uint64_t timeout = 0; - auto status = mutant->Wait(0, 0, 0, &timeout); - assert_true(status == X_STATUS_SUCCESS); - } - thread->pending_mutant_acquires_.clear(); - - // Execute user code. - thread->running_ = true; - - uint32_t pc = state.context.pc; - thread->kernel_state_->processor()->ExecuteRaw(thread->thread_state_, pc); - - current_thread_ = nullptr; - current_xthread_tls_ = nullptr; - - xe::Profiler::ThreadExit(); - - // Release the self-reference to the thread. - thread->ReleaseHandle(); - }); - assert_not_null(thread->thread_); + xe::FatalError("XThread::Restore unimplemented"); - // Notify processor we were recreated. - thread->emulator()->processor()->OnThreadCreated( - thread->handle(), thread->thread_state(), thread); - } - - return object_ref(thread); + return object_ref(nullptr); +} +void XHostThread::XHostThreadForwarder(cpu::ppc::PPCContext* context, void* ud1, + void* ud2) { + auto host_thrd = reinterpret_cast(ud1); + context->r[3] = host_thrd->host_fn_(); } XHostThread::XHostThread(KernelState* kernel_state, uint32_t stack_size, @@ -1003,21 +805,16 @@ XHostThread::XHostThread(KernelState* kernel_state, uint32_t stack_size, : XThread(kernel_state, stack_size, 0, 0, 0, creation_flags, false, false, guest_process), host_fn_(host_fn) { - // By default host threads are not debugger suspendable. If the thread runs - // any guest code this must be overridden. - can_debugger_suspend_ = false; + host_trampoline = kernel_state->processor()->backend()->CreateGuestTrampoline( + &XHostThread::XHostThreadForwarder, this, nullptr, false); + creation_params_.start_address = host_trampoline; } - -void XHostThread::Execute() { - XELOGKERNEL( - "XThread::Execute thid {} (handle={:08X}, '{}', native={:08X}, )", - thread_id_, handle(), thread_name_, thread_->system_id()); - // Let the kernel know we are starting. - kernel_state()->OnThreadExecute(this); - int ret = host_fn_(); - - // Exit. - Exit(ret); +XHostThread::~XHostThread() { + if (host_trampoline) { + kernel_state()->processor()->backend()->FreeGuestTrampoline( + host_trampoline); + host_trampoline = 0U; + } } } // namespace kernel diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 5dd562afe4..235f29b3c4 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -20,8 +20,7 @@ #include "xenia/kernel/util/native_list.h" #include "xenia/kernel/xmutant.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" - +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { @@ -29,248 +28,21 @@ constexpr fourcc_t kThreadSaveSignature = make_fourcc("THRD"); class XEvent; -constexpr uint32_t X_CREATE_SUSPENDED = 0x00000001; - constexpr uint32_t X_TLS_OUT_OF_INDEXES = UINT32_MAX; -struct XDPC { - xe::be type; - uint8_t selected_cpu_number; - uint8_t desired_cpu_number; - X_LIST_ENTRY list_entry; - xe::be routine; - xe::be context; - xe::be arg1; - xe::be arg2; - - void Initialize(uint32_t guest_func, uint32_t guest_context) { - type = 19; - selected_cpu_number = 0; - desired_cpu_number = 0; - routine = guest_func; - context = guest_context; - } -}; - -struct XAPC { - static const uint32_t kSize = 40; - static const uint32_t kDummyKernelRoutine = 0xF00DFF00; - static const uint32_t kDummyRundownRoutine = 0xF00DFF01; - - // KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag) - // This is 4b shorter than NT - looks like the reserved dword at +4 is gone. - // NOTE: stored in guest memory. - uint16_t type; // +0 - uint8_t apc_mode; // +2 - uint8_t enqueued; // +3 - xe::be thread_ptr; // +4 - X_LIST_ENTRY list_entry; // +8 - xe::be kernel_routine; // +16 - xe::be rundown_routine; // +20 - xe::be normal_routine; // +24 - xe::be normal_context; // +28 - xe::be arg1; // +32 - xe::be arg2; // +36 -}; - -struct X_KSEMAPHORE { - X_DISPATCH_HEADER header; - xe::be limit; -}; -static_assert_size(X_KSEMAPHORE, 0x14); - -struct X_KTHREAD; -struct X_KPROCESS; -struct X_KPRCB { - TypedGuestPointer current_thread; // 0x0 - TypedGuestPointer unk_4; // 0x4 - TypedGuestPointer idle_thread; // 0x8 - uint8_t current_cpu; // 0xC - uint8_t unk_D[3]; // 0xD - // should only have 1 bit set, used for ipis - xe::be processor_mask; // 0x10 - // incremented in clock interrupt - xe::be dpc_clock; // 0x14 - xe::be interrupt_clock; // 0x18 - xe::be unk_1C; // 0x1C - xe::be unk_20; // 0x20 - // various fields used by KeIpiGenericCall - xe::be ipi_args[3]; // 0x24 - // looks like the target cpus clear their corresponding bit - // in this mask to signal completion to the initiator - xe::be targeted_ipi_cpus_mask; // 0x30 - xe::be ipi_function; // 0x34 - // used to synchronize? - TypedGuestPointer ipi_initiator_prcb; // 0x38 - xe::be unk_3C; // 0x3C - xe::be dpc_related_40; // 0x40 - // must be held to modify any dpc-related fields in the kprcb - xe::be dpc_lock; // 0x44 - X_LIST_ENTRY queued_dpcs_list_head; // 0x48 - xe::be dpc_active; // 0x50 - xe::be unk_54; // 0x54 - xe::be unk_58; // 0x58 - // definitely scheduler related - X_SINGLE_LIST_ENTRY unk_5C; // 0x5C - xe::be unk_60; // 0x60 - // i think the following mask has something to do with the array that comes - // after - xe::be unk_mask_64; // 0x64 - - X_LIST_ENTRY unk_68[32]; // 0x68 - // ExTerminateThread tail calls a function that does KeInsertQueueDpc of this - // dpc - XDPC thread_exit_dpc; // 0x168 - // thread_exit_dpc's routine drains this list and frees each threads threadid, - // kernel stack and dereferences the thread - X_LIST_ENTRY terminating_threads_list; // 0x184 - XDPC unk_18C; // 0x18C -}; -// Processor Control Region -struct X_KPCR { - xe::be tls_ptr; // 0x0 - xe::be msr_mask; // 0x4 - union { - xe::be software_interrupt_state; // 0x8 - struct { - uint8_t unknown_8; // 0x8 - uint8_t apc_software_interrupt_state; // 0x9 - }; - }; - uint8_t unk_0A[2]; // 0xA - uint8_t processtype_value_in_dpc; // 0xC - uint8_t unk_0D[3]; // 0xD - // used in KeSaveFloatingPointState / its vmx counterpart - xe::be thread_fpu_related; // 0x10 - xe::be thread_vmx_related; // 0x14 - uint8_t current_irql; // 0x18 - uint8_t unk_19[0x17]; // 0x19 - xe::be pcr_ptr; // 0x30 - - // this seems to be just garbage data? we can stash a pointer to context here - // as a hack for now - union { - uint8_t unk_38[8]; // 0x38 - uint64_t host_stash; // 0x38 - }; - uint8_t unk_40[28]; // 0x40 - xe::be unk_stack_5c; // 0x5C - uint8_t unk_60[12]; // 0x60 - xe::be use_alternative_stack; // 0x6C - xe::be stack_base_ptr; // 0x70 Stack base address (high addr) - xe::be stack_end_ptr; // 0x74 Stack end (low addr) - - // maybe these are the stacks used in apcs? - // i know they're stacks, RtlGetStackLimits returns them if another var here - // is set - - xe::be alt_stack_base_ptr; // 0x78 - xe::be alt_stack_end_ptr; // 0x7C - // if bit 1 is set in a handler pointer, it actually points to a KINTERRUPT - // otherwise, it points to a function to execute - xe::be interrupt_handlers[32]; // 0x80 - X_KPRCB prcb_data; // 0x100 - // pointer to KPCRB? - TypedGuestPointer prcb; // 0x2A8 - uint8_t unk_2AC[0x2C]; // 0x2AC -}; -struct X_KTHREAD { - X_DISPATCH_HEADER header; // 0x0 - xe::be unk_10; // 0x10 - xe::be unk_14; // 0x14 - uint8_t unk_18[0x28]; // 0x10 - xe::be unk_40; // 0x40 - xe::be unk_44; // 0x44 - xe::be unk_48; // 0x48 - xe::be unk_4C; // 0x4C - uint8_t unk_50[0x4]; // 0x50 - xe::be unk_54; // 0x54 - xe::be unk_56; // 0x56 - uint8_t unk_58[0x4]; // 0x58 - xe::be stack_base; // 0x5C - xe::be stack_limit; // 0x60 - xe::be stack_kernel; // 0x64 - xe::be tls_address; // 0x68 - // state = is thread running, suspended, etc - uint8_t thread_state; // 0x6C - // 0x70 = priority? - uint8_t unk_6D[0x3]; // 0x6D - uint8_t priority; // 0x70 - uint8_t fpu_exceptions_on; // 0x71 - // these two process types both get set to the same thing, process_type is - // referenced most frequently, however process_type_dup gets referenced a few - // times while the process is being created - uint8_t process_type_dup; - uint8_t process_type; - //apc_mode determines which list an apc goes into - util::X_TYPED_LIST apc_lists[2]; - TypedGuestPointer process; // 0x84 - uint8_t unk_88[0x3]; // 0x88 - uint8_t may_queue_apcs; // 0x8B - X_KSPINLOCK apc_lock; // 0x8C - uint8_t unk_90[0xC]; // 0x90 - xe::be msr_mask; // 0x9C - uint8_t unk_A0[4]; // 0xA0 - uint8_t unk_A4; // 0xA4 - uint8_t unk_A5[0xB]; // 0xA5 - int32_t apc_disable_count; // 0xB0 - uint8_t unk_B4[4]; // 0xB4 - uint8_t unk_B8; // 0xB8 - uint8_t unk_B9; // 0xB9 - uint8_t unk_BA; // 0xBA - uint8_t boost_disabled; // 0xBB - uint8_t suspend_count; // 0xBC - uint8_t unk_BD; // 0xBD - uint8_t terminated; // 0xBE - uint8_t current_cpu; // 0xBF - // these two pointers point to KPRCBs, but seem to be rarely referenced, if at - // all - TypedGuestPointer a_prcb_ptr; // 0xC0 - TypedGuestPointer another_prcb_ptr; // 0xC4 - uint8_t unk_C8[8]; // 0xC8 - xe::be stack_alloc_base; // 0xD0 - // uint8_t unk_D4[0x5C]; // 0xD4 - XAPC on_suspend; // 0xD4 - X_KSEMAPHORE unk_FC; // 0xFC - // this is an entry in - X_LIST_ENTRY process_threads; // 0x110 - xe::be unk_118; // 0x118 - xe::be unk_11C; // 0x11C - xe::be unk_120; // 0x120 - xe::be unk_124; // 0x124 - xe::be unk_128; // 0x128 - xe::be unk_12C; // 0x12C - xe::be create_time; // 0x130 - xe::be exit_time; // 0x138 - xe::be exit_status; // 0x140 - xe::be unk_144; // 0x144 - xe::be unk_148; // 0x148 - xe::be thread_id; // 0x14C - xe::be start_address; // 0x150 - xe::be unk_154; // 0x154 - xe::be unk_158; // 0x158 - uint8_t unk_15C[0x4]; // 0x15C - xe::be last_error; // 0x160 - xe::be fiber_ptr; // 0x164 - uint8_t unk_168[0x4]; // 0x168 - xe::be creation_flags; // 0x16C - uint8_t unk_170[0xC]; // 0x170 - xe::be unk_17C; // 0x17C - uint8_t unk_180[0x930]; // 0x180 - - // This struct is actually quite long... so uh, not filling this out! -}; -static_assert_size(X_KTHREAD, 0xAB0); +X_KPCR* GetKPCR(); +X_KPCR* GetKPCR(cpu::ppc::PPCContext* context); +X_KTHREAD* GetKThread(); +X_KTHREAD* GetKThread(cpu::ppc::PPCContext* context); +X_KTHREAD* GetKThread(cpu::ppc::PPCContext* context, X_KPCR* pcr); -class XThread : public XObject, public cpu::Thread { +class XThread : public XObject { public: static const XObject::Type kObjectType = XObject::Type::Thread; static constexpr uint32_t kStackAddressRangeBegin = 0x70000000; static constexpr uint32_t kStackAddressRangeEnd = 0x7F000000; - static constexpr uint32_t kThreadKernelStackSize = 0xF0; - struct CreationParams { uint32_t stack_size; uint32_t xapi_thread_startup; @@ -293,18 +65,18 @@ class XThread : public XObject, public cpu::Thread { static uint32_t GetCurrentThreadHandle(); static uint32_t GetCurrentThreadId(); + static XThread* FromGuest(X_KTHREAD* guest_thread); static uint32_t GetLastError(); static void SetLastError(uint32_t error_code); const CreationParams* creation_params() const { return &creation_params_; } uint32_t tls_ptr() const { return tls_static_address_; } - uint32_t pcr_ptr() const { return pcr_address_; } // True if the thread is created by the guest app. bool is_guest_thread() const { return guest_thread_; } bool main_thread() const { return main_thread_; } bool is_running() const { return running_; } - uint32_t thread_id() const { return thread_id_; } + uint32_t thread_id() const; uint32_t last_error(); void set_last_error(uint32_t error_code); void set_name(const std::string_view name); @@ -317,16 +89,9 @@ class XThread : public XObject, public cpu::Thread { virtual void Reenter(uint32_t address); - void EnterCriticalRegion(); - void LeaveCriticalRegion(); - void EnqueueApc(uint32_t normal_routine, uint32_t normal_context, uint32_t arg1, uint32_t arg2); - int32_t priority() const { return priority_; } - int32_t QueryPriority(); - void SetPriority(int32_t increment); - // Xbox thread IDs: // 0 - core 0, thread 0 - user // 1 - core 0, thread 1 - user @@ -336,28 +101,31 @@ class XThread : public XObject, public cpu::Thread { // 5 - core 2, thread 1 - user void SetAffinity(uint32_t affinity); uint8_t active_cpu() const; - void SetActiveCpu(uint8_t cpu_index); - bool GetTLSValue(uint32_t slot, uint32_t* value_out); - bool SetTLSValue(uint32_t slot, uint32_t value); + void assert_valid(); uint32_t suspend_count(); - X_STATUS Resume(uint32_t* out_suspend_count = nullptr); X_STATUS Suspend(uint32_t* out_suspend_count = nullptr); X_STATUS Delay(uint32_t processor_mode, uint32_t alertable, uint64_t interval); - xe::threading::Thread* thread() { return thread_.get(); } + xe::threading::Thread* thread() { return nullptr; } virtual bool Save(ByteStream* stream) override; static object_ref Restore(KernelState* kernel_state, ByteStream* stream); - - // Internal - do not use. - void AcquireMutantOnStartup(object_ref mutant) { - pending_mutant_acquires_.push_back(mutant); - } + static void SetCurrentThread(XThread* thrd); void SetCurrentThread(); + void Schedule(); + + void SwitchToDirect(); + + + cpu::HWThread* HWThread(); + cpu::ThreadState* thread_state() { return thread_state_; } + bool can_debugger_suspend() { return false; } + threading::Fiber* fiber() { return fiber_.get(); } + protected: bool AllocateStack(uint32_t size); void FreeStack(); @@ -366,17 +134,17 @@ class XThread : public XObject, public cpu::Thread { void DeliverAPCs(); void RundownAPCs(); - xe::threading::WaitHandle* GetWaitHandle() override { return thread_.get(); } + xe::threading::WaitHandle* GetWaitHandle() override { return fiber_.get(); } CreationParams creation_params_ = {0}; - std::vector> pending_mutant_acquires_; + std::unique_ptr fiber_; + cpu::ThreadState* thread_state_; - uint32_t thread_id_ = 0; + // uint32_t thread_id_ = 0; uint32_t tls_static_address_ = 0; uint32_t tls_dynamic_address_ = 0; uint32_t tls_total_size_ = 0; - uint32_t pcr_address_ = 0; uint32_t stack_alloc_base_ = 0; // Stack alloc base uint32_t stack_alloc_size_ = 0; // Stack alloc size uint32_t stack_base_ = 0; // High address @@ -384,19 +152,21 @@ class XThread : public XObject, public cpu::Thread { bool guest_thread_ = false; bool main_thread_ = false; // Entry-point thread bool running_ = false; - - int32_t priority_ = 0; + uint32_t thread_id_; }; class XHostThread : public XThread { public: XHostThread(KernelState* kernel_state, uint32_t stack_size, - uint32_t creation_flags, std::function host_fn, uint32_t guest_process=0); - - virtual void Execute(); + uint32_t creation_flags, std::function host_fn, + uint32_t guest_process = 0); + ~XHostThread(); private: + static void XHostThreadForwarder(cpu::ppc::PPCContext* context, void* ud1, void* ud2); std::function host_fn_; + uint32_t host_trampoline; + }; } // namespace kernel diff --git a/src/xenia/kernel/xtimer.cc b/src/xenia/kernel/xtimer.cc index b5b9530faf..3d54d14ac3 100644 --- a/src/xenia/kernel/xtimer.cc +++ b/src/xenia/kernel/xtimer.cc @@ -12,6 +12,8 @@ #include "xenia/base/chrono.h" #include "xenia/base/logging.h" #include "xenia/cpu/processor.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_ob.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xthread.h" namespace xe { @@ -20,80 +22,28 @@ namespace kernel { XTimer::XTimer(KernelState* kernel_state) : XObject(kernel_state, kObjectType) {} -XTimer::~XTimer() = default; - -void XTimer::Initialize(uint32_t timer_type) { - assert_false(timer_); - switch (timer_type) { - case 0: // NotificationTimer - timer_ = xe::threading::Timer::CreateManualResetTimer(); - break; - case 1: // SynchronizationTimer - timer_ = xe::threading::Timer::CreateSynchronizationTimer(); - break; - default: - assert_always(); - break; - } - assert_not_null(timer_); +XTimer::~XTimer() { + //delete_proc for X_KTIMER object type + xboxkrnl::xeKeCancelTimer(cpu::ThreadState::GetContext(), + guest_object()); } -X_STATUS XTimer::SetTimer(int64_t due_time, uint32_t period_ms, - uint32_t routine, uint32_t routine_arg, bool resume) { - using xe::chrono::WinSystemClock; - using xe::chrono::XSystemClock; - // Caller is checking for STATUS_TIMER_RESUME_IGNORED. - if (resume) { - return X_STATUS_TIMER_RESUME_IGNORED; - } - - period_ms = Clock::ScaleGuestDurationMillis(period_ms); - WinSystemClock::time_point due_tp; - if (due_time < 0) { - // Any timer implementation uses absolute times eventually, convert as early - // as possible for increased accuracy - auto after = xe::chrono::hundrednanoseconds(-due_time); - due_tp = date::clock_cast(XSystemClock::now() + after); - } else { - due_tp = date::clock_cast( - XSystemClock::from_file_time(due_time)); - } - - // Stash routine for callback. - callback_thread_ = XThread::GetCurrentThread(); - callback_routine_ = routine; - callback_routine_arg_ = routine_arg; - - // This callback will only be issued when the timer is fired. - std::function callback = nullptr; - if (callback_routine_) { - callback = [this]() { - // Queue APC to call back routine with (arg, low, high). - // It'll be executed on the thread that requested the timer. - uint64_t time = xe::Clock::QueryGuestSystemTime(); - uint32_t time_low = static_cast(time); - uint32_t time_high = static_cast(time >> 32); - XELOGI( - "XTimer enqueuing timer callback to {:08X}({:08X}, {:08X}, {:08X})", - callback_routine_, callback_routine_arg_, time_low, time_high); - callback_thread_->EnqueueApc(callback_routine_, callback_routine_arg_, - time_low, time_high); - }; - } +void XTimer::Initialize(uint32_t timer_type) { + auto context = cpu::ThreadState::Get()->context(); + uint32_t guest_objptr = 0; + auto guest_globals = context->TranslateVirtual( + kernel_state()->GetKernelGuestGlobals()); - bool result; - if (!period_ms) { - result = timer_->SetOnceAt(due_tp, std::move(callback)); - } else { - result = timer_->SetRepeatingAt( - due_tp, std::chrono::milliseconds(period_ms), std::move(callback)); - } + X_STATUS create_status = + xboxkrnl::xeObCreateObject(&guest_globals->ExTimerObjectType, nullptr, + sizeof(X_EXTIMER), &guest_objptr, context); + xenia_assert(create_status == X_STATUS_SUCCESS); + xenia_assert(guest_objptr != 0); - return result ? X_STATUS_SUCCESS : X_STATUS_UNSUCCESSFUL; -} + auto guest_object = context->TranslateVirtual(guest_objptr); + xboxkrnl::xeKeInitializeExTimer(context, guest_object, timer_type); -X_STATUS XTimer::Cancel() { - return timer_->Cancel() ? X_STATUS_SUCCESS : X_STATUS_UNSUCCESSFUL; + SetNativePointer(guest_objptr); } } // namespace kernel diff --git a/src/xenia/kernel/xtimer.h b/src/xenia/kernel/xtimer.h index 0a3dee6180..1ee033c9d4 100644 --- a/src/xenia/kernel/xtimer.h +++ b/src/xenia/kernel/xtimer.h @@ -12,7 +12,7 @@ #include "xenia/base/threading.h" #include "xenia/kernel/xobject.h" -#include "xenia/xbox.h" +#include "xenia/kernel/kernel_guest_structures.h" namespace xe { namespace kernel { @@ -28,19 +28,11 @@ class XTimer : public XObject { void Initialize(uint32_t timer_type); - X_STATUS SetTimer(int64_t due_time, uint32_t period_ms, uint32_t routine, - uint32_t routine_arg, bool resume); - X_STATUS Cancel(); - protected: - xe::threading::WaitHandle* GetWaitHandle() override { return timer_.get(); } + xe::threading::WaitHandle* GetWaitHandle() override { return nullptr; } private: - std::unique_ptr timer_; - XThread* callback_thread_ = nullptr; - uint32_t callback_routine_ = 0; - uint32_t callback_routine_arg_ = 0; }; } // namespace kernel diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 6384f49960..851f4ee377 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -146,6 +146,7 @@ bool Memory::Initialize() { auto mapping_base = reinterpret_cast(1ull << n); if (!MapViews(mapping_base)) { mapping_base_ = mapping_base; + mapping_bit_ = static_cast(n); break; } } @@ -422,6 +423,9 @@ void Memory::GetHeapsPageStatsSummary(const BaseHeap* const* provided_heaps, } uint32_t Memory::HostToGuestVirtual(const void* host_address) const { +#if XE_COMPARISON_BUILD + return static_cast(reinterpret_cast(host_address)); +#else size_t virtual_address = reinterpret_cast(host_address) - reinterpret_cast(virtual_membase_); uint32_t vE0000000_host_offset = heaps_.vE0000000.host_address_offset(); @@ -433,6 +437,7 @@ uint32_t Memory::HostToGuestVirtual(const void* host_address) const { virtual_address -= vE0000000_host_offset; } return uint32_t(virtual_address); +#endif } uint32_t Memory::HostToGuestVirtualThunk(const void* context, @@ -608,13 +613,13 @@ uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, return address; } -void Memory::SystemHeapFree(uint32_t address) { +void Memory::SystemHeapFree(uint32_t address, uint32_t* out_region_size) { if (!address) { return; } // TODO(benvanik): lightweight pool. auto heap = LookupHeap(address); - heap->Release(address); + heap->Release(address, out_region_size); } void Memory::DumpMap() { @@ -963,7 +968,7 @@ bool BaseHeap::AllocFixed(uint32_t base_address, uint32_t size, return true; } -template +template static inline T QuickMod(T value, uint32_t modv) { if (xe::is_pow2(modv)) { return value & (modv - 1); diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 185b46cd73..5de5cf104b 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -347,12 +347,16 @@ class Memory { // Base address of virtual memory in the host address space. // This is often something like 0x100000000. inline uint8_t* virtual_membase() const { return virtual_membase_; } + inline unsigned char membase_bit() const { return mapping_bit_; } // Translates a guest virtual address to a host address that can be accessed // as a normal pointer. // Note that the contents at the specified host address are big-endian. template inline T TranslateVirtual(uint32_t guest_address) const { +#if XE_COMPARISON_BUILD == 1 + return reinterpret_cast(static_cast(guest_address)); +#else #if XE_PLATFORM_WIN32 == 1 uint8_t* host_address = virtual_membase_ + guest_address; if (guest_address >= 0xE0000000) { @@ -368,6 +372,7 @@ class Memory { return reinterpret_cast(host_address); #endif + #endif } template inline T* TranslateVirtual(TypedGuestPointer guest_address) { @@ -511,7 +516,7 @@ class Memory { uint32_t system_heap_flags = kSystemHeapDefault); // Frees memory allocated with SystemHeapAlloc. - void SystemHeapFree(uint32_t address); + void SystemHeapFree(uint32_t address, uint32_t* out_region_size = nullptr); // Gets the heap for the address space containing the given address. XE_NOALIAS @@ -563,6 +568,7 @@ class Memory { xe::memory::FileMappingHandle mapping_ = xe::memory::kFileMappingHandleInvalid; + unsigned char mapping_bit_ = 0; uint8_t* mapping_base_ = nullptr; union { struct { diff --git a/src/xenia/ui/windowed_app_main_win.cc b/src/xenia/ui/windowed_app_main_win.cc index 3f25da10a8..c4cc5d7ef5 100644 --- a/src/xenia/ui/windowed_app_main_win.cc +++ b/src/xenia/ui/windowed_app_main_win.cc @@ -13,6 +13,7 @@ #include "xenia/base/console.h" #include "xenia/base/cvar.h" #include "xenia/base/main_win.h" +#include "xenia/base/clock.h" #include "xenia/base/platform_win.h" #include "xenia/ui/windowed_app.h" #include "xenia/ui/windowed_app_context_win.h" @@ -354,6 +355,7 @@ int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, int result; SetUnhandledExceptionFilter(_UnhandledExceptionFilter); + xe::Clock::Initialize(); { xe::ui::Win32WindowedAppContext app_context(hinstance, show_cmd); // TODO(Triang3l): Initialize creates a window. Set DPI awareness via the diff --git a/src/xenia/xbox.h b/src/xenia/xbox.h index ccf88faff7..52be804a6b 100644 --- a/src/xenia/xbox.h +++ b/src/xenia/xbox.h @@ -13,12 +13,50 @@ #include #include "xenia/base/memory.h" - +#include "xenia/cpu/thread_state.h" +#include "xenia/guest_pointers.h" // TODO(benvanik): split this header, cleanup, etc. // clang-format off namespace xe { +template +struct EZPointer : public TypedGuestPointer { + using TypedGuestPointer::operator=; + T* xlat() { + #if XE_COMPARISON_BUILD + return reinterpret_cast(static_cast(this->m_ptr)); + #else + if(m_ptr){ + return cpu::ThreadState::GetContext()->TranslateVirtual(m_ptr); + } + else { + return nullptr; + } + #endif + } + inline T* operator->() { + return xlat(); + } + inline T& operator*() { + return *xlat(); + } + + inline EZPointer& operator = (T* ptr) { + #if XE_COMPARISON_BUILD + this->m_ptr = static_cast(reinterpret_cast(ptr)); + #else + this->m_ptr = cpu::ThreadState::GetContext()->HostToGuestVirtual(ptr); + #endif + return *this; + } + +}; +#if XE_COMPARISON_BUILD +#define XE_COMPARISON_NOINLINE XE_NOINLINE +#else +#define XE_COMPARISON_NOINLINE +#endif #pragma pack(push, 4) typedef uint32_t X_HANDLE; @@ -35,10 +73,12 @@ typedef uint32_t X_STATUS; #define X_STATUS_SUCCESS ((X_STATUS)0x00000000L) #define X_STATUS_ABANDONED_WAIT_0 ((X_STATUS)0x00000080L) #define X_STATUS_USER_APC ((X_STATUS)0x000000C0L) +#define X_STATUS_KERNEL_APC ((X_STATUS)0x00000100L) #define X_STATUS_ALERTED ((X_STATUS)0x00000101L) #define X_STATUS_TIMEOUT ((X_STATUS)0x00000102L) #define X_STATUS_PENDING ((X_STATUS)0x00000103L) #define X_STATUS_OBJECT_NAME_EXISTS ((X_STATUS)0x40000000L) +#define X_STATUS_NO_YIELD_PERFORMED ((X_STATUS)0x40000024L) #define X_STATUS_TIMER_RESUME_IGNORED ((X_STATUS)0x40000025L) #define X_STATUS_BUFFER_OVERFLOW ((X_STATUS)0x80000005L) #define X_STATUS_NO_MORE_FILES ((X_STATUS)0x80000006L) @@ -61,6 +101,7 @@ typedef uint32_t X_STATUS; #define X_STATUS_OBJECT_NAME_COLLISION ((X_STATUS)0xC0000035L) #define X_STATUS_INVALID_PAGE_PROTECTION ((X_STATUS)0xC0000045L) #define X_STATUS_MUTANT_NOT_OWNED ((X_STATUS)0xC0000046L) +#define X_STATUS_SEMAPHORE_LIMIT_EXCEEDED ((X_STATUS)0xC0000047L) #define X_STATUS_THREAD_IS_TERMINATING ((X_STATUS)0xC000004BL) #define X_STATUS_PROCEDURE_NOT_FOUND ((X_STATUS)0xC000007AL) #define X_STATUS_INVALID_IMAGE_FORMAT ((X_STATUS)0xC000007BL) @@ -277,9 +318,15 @@ struct X_VIDEO_MODE { static_assert_size(X_VIDEO_MODE, 48); // https://docs.microsoft.com/en-us/windows/win32/api/ntdef/ns-ntdef-list_entry +struct X_LIST_ENTRY; struct X_LIST_ENTRY { - be flink_ptr; // next entry / head - be blink_ptr; // previous entry / head + EZPointer flink_ptr; // next entry / head + EZPointer blink_ptr; // previous entry / head + + void Zero() { + flink_ptr = 0U; + blink_ptr = 0U; + } }; static_assert_size(X_LIST_ENTRY, 8); @@ -296,15 +343,6 @@ struct X_SLIST_HEADER { }; static_assert_size(X_SLIST_HEADER, 8); -// https://msdn.microsoft.com/en-us/library/windows/hardware/ff550671(v=vs.85).aspx -struct X_IO_STATUS_BLOCK { - union { - xe::be status; - xe::be pointer; - }; - xe::be information; -}; - struct X_EX_TITLE_TERMINATE_REGISTRATION { xe::be notification_routine; // 0x0 xe::be priority; // 0x4 @@ -312,84 +350,6 @@ struct X_EX_TITLE_TERMINATE_REGISTRATION { }; static_assert_size(X_EX_TITLE_TERMINATE_REGISTRATION, 16); - -enum X_OBJECT_HEADER_FLAGS : uint16_t { - OBJECT_HEADER_FLAG_NAMED_OBJECT = - 1, // if set, has X_OBJECT_HEADER_NAME_INFO prior to X_OBJECT_HEADER - OBJECT_HEADER_FLAG_IS_PERMANENT = 2, - OBJECT_HEADER_FLAG_CONTAINED_IN_DIRECTORY = - 4, // this object resides in an X_OBJECT_DIRECTORY - OBJECT_HEADER_IS_TITLE_OBJECT = 0x10, // used in obcreateobject - -}; - -// https://www.nirsoft.net/kernel_struct/vista/OBJECT_HEADER.html -struct X_OBJECT_HEADER { - xe::be pointer_count; - xe::be handle_count; - xe::be object_type_ptr; // -0x8 POBJECT_TYPE - xe::be flags; - uint8_t unknownE; - uint8_t unknownF; - // Object lives after this header. - // (There's actually a body field here which is the object itself) -}; -static_assert_size(X_OBJECT_HEADER, 0x10); - -struct X_OBJECT_DIRECTORY { - // each is a pointer to X_OBJECT_HEADER_NAME_INFO - // i believe offset 0 = pointer to next in bucket - xe::be name_buckets[13]; -}; -static_assert_size(X_OBJECT_DIRECTORY, 0x34); - -// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/ntos/ob/object_header_name_info.htm -// quite different, though -struct X_OBJECT_HEADER_NAME_INFO { - // i think that this is the next link in an X_OBJECT_DIRECTORY's buckets - xe::be next_in_directory; - xe::be object_directory; // pointer to X_OBJECT_DIRECTORY - X_ANSI_STRING name; -}; -struct X_OBJECT_ATTRIBUTES { - xe::be root_directory; // 0x0 - xe::be name_ptr; // 0x4 PANSI_STRING - xe::be attributes; // 0xC -}; -struct X_OBJECT_TYPE { - xe::be allocate_proc; // 0x0 - xe::be free_proc; // 0x4 - xe::be close_proc; // 0x8 - xe::be delete_proc; // 0xC - xe::be unknown_proc; // 0x10 - xe::be - unknown_size_or_object_; // this seems to be a union, it can be a pointer - // or it can be the size of the object - xe::be pool_tag; // 0x18 -}; -static_assert_size(X_OBJECT_TYPE, 0x1C); - -struct X_KSYMLINK { - xe::be refed_object_maybe; - X_ANSI_STRING refed_object_name_maybe; -}; -static_assert_size(X_KSYMLINK, 0xC); -// https://msdn.microsoft.com/en-us/library/windows/desktop/aa363082.aspx -typedef struct { - // Renamed due to a collision with exception_code from Windows excpt.h. - xe::be code; - xe::be exception_flags; - xe::be exception_record; - xe::be exception_address; - xe::be number_parameters; - xe::be exception_information[15]; -} X_EXCEPTION_RECORD; -static_assert_size(X_EXCEPTION_RECORD, 0x50); - -struct X_KSPINLOCK { - xe::be prcb_of_owner; -}; -static_assert_size(X_KSPINLOCK, 4); #pragma pack(pop) // Found by dumping the kSectionStringTable sections of various games: