Skip to content

Commit

Permalink
Merge branch 'xenia-canary:canary_experimental' into Custom
Browse files Browse the repository at this point in the history
  • Loading branch information
backgamon authored May 31, 2024
2 parents 9743a0c + b3f2ab0 commit f0705f7
Show file tree
Hide file tree
Showing 38 changed files with 3,072 additions and 1,907 deletions.
2 changes: 1 addition & 1 deletion src/xenia/apu/xma_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
auto in = reinterpret_cast<const float*>(samples[j]);

// Raw samples sometimes aren't within [-1, 1]
float scaled_sample = xe::saturate_signed(in[i]) * scale;
float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale;

// Convert the sample and output it in big endian.
auto sample = static_cast<int16_t>(scaled_sample);
Expand Down
28 changes: 12 additions & 16 deletions src/xenia/base/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
return (value + multiple - 1) / multiple * multiple;
}

// Using the same conventions as in shading languages, returning 0 for NaN.
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is
// always returned. Also -0 is not < +0, so +0 is also chosen for it.
// For NaN, returns min_value (or, if it's NaN too, max_value).
// If either of the boundaries is zero, and if the value is at that boundary or
// exceeds it, the result will have the sign of that boundary. If both
// boundaries are zero, which sign is selected among the argument signs is not
// explicitly defined.
template <typename T>
constexpr T saturate_unsigned(T value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(0.0f), value));
T clamp_float(T value, T min_value, T max_value) {
float clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
}

// This diverges from the GPU NaN rules for signed normalized formats (NaN
// should be converted to 0, not to -1), but this expectation is not needed most
// of time, and cannot be met for free (unlike for 0...1 clamping).
// Using the same conventions as in shading languages, returning 0 for NaN.
// 0 is always returned as positive.
template <typename T>
constexpr T saturate_signed(T value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(-1.0f), value));
T saturate(T value) {
return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
}

// Gets the next power of two value that is greater than or equal to the given
Expand Down Expand Up @@ -365,12 +367,6 @@ inline uint64_t rotate_right(uint64_t v, uint8_t sh) {
}
#endif // XE_PLATFORM_WIN32

template <typename T>
T clamp(T value, T min_value, T max_value) {
const T t = value < min_value ? min_value : value;
return t > max_value ? max_value : t;
}

#if XE_ARCH_AMD64
// Utilities for SSE values.
template <int N>
Expand Down
25 changes: 25 additions & 0 deletions src/xenia/base/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,37 @@
#include <functional>
#include <string>
#include <string_view>
#include <type_traits>

#include "xenia/base/byte_order.h"

namespace xe {
namespace memory {

// For variable declarations (not return values or `this` pointer).
// Not propagated.
#define XE_RESTRICT_VAR __restrict

// Aliasing-safe bit reinterpretation.
// For more complex cases such as non-trivially-copyable types, write copying
// code respecting the requirements for them externally instead of using these
// functions.

template <typename Dst, typename Src>
void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) {
static_assert(sizeof(Dst) == sizeof(Src));
static_assert(std::is_trivially_copyable_v<Dst>);
static_assert(std::is_trivially_copyable_v<Src>);
std::memcpy(&dst, &src, sizeof(Dst));
}

template <typename Dst, typename Src>
Dst Reinterpret(const Src& XE_RESTRICT_VAR src) {
Dst dst;
Reinterpret(dst, src);
return dst;
}

#if XE_PLATFORM_ANDROID
void AndroidInitialize();
void AndroidShutdown();
Expand Down
9 changes: 5 additions & 4 deletions src/xenia/base/testing/chrono_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") {
auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch();
auto error3 = wsys - wxsys;

REQUIRE(error1 < 10ms);
REQUIRE(error1 > -10ms);
REQUIRE(error2 < 10ms);
REQUIRE(error2 > -10ms);
// In AppVeyor, the difference often can be as large as roughly 16ms.
REQUIRE(error1 < 20ms);
REQUIRE(error1 > -20ms);
REQUIRE(error2 < 20ms);
REQUIRE(error2 > -20ms);
REQUIRE(error3 < duration);
REQUIRE(error3 > -duration);
}
Expand Down
12 changes: 7 additions & 5 deletions src/xenia/debug/ui/debug_window.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
function_pane_width += io.MouseDelta.x;
function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX);
function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##source_pane",
Expand All @@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
source_pane_width += io.MouseDelta.x;
source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX);
source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##registers_pane",
Expand All @@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
registers_pane_width += io.MouseDelta.x;
registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX);
registers_pane_width =
xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true);
Expand Down Expand Up @@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth));
if (ImGui::IsItemActive()) {
bottom_panes_height -= io.MouseDelta.y;
bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX);
bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX);
}
ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height),
true);
Expand All @@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, bottom_panes_height));
if (ImGui::IsItemActive()) {
breakpoints_pane_width -= io.MouseDelta.x;
breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX);
breakpoints_pane_width =
xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true);
Expand Down
60 changes: 37 additions & 23 deletions src/xenia/gpu/command_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,9 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
// Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
// Enabled - write to address.
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
}
Expand All @@ -467,7 +467,7 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
// This will block the command processor the next time it WAIT_MEM_REGs
// and allow us to synchronize the memory.
case XE_GPU_REG_COHER_STATUS_HOST: {
regs.values[index].u32 |= UINT32_C(0x80000000);
regs.values[index] |= UINT32_C(0x80000000);
} break;

case XE_GPU_REG_DC_LUT_RW_INDEX: {
Expand All @@ -478,12 +478,12 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,

case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
// enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
Expand All @@ -505,7 +505,11 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
}
if (++gamma_ramp_rw_component_ >= 3) {
gamma_ramp_rw_component_ = 0;
++gamma_ramp_rw_index.rw_index;
reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
++new_gamma_ramp_rw_index.rw_index;
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
}
if (write_gamma_ramp_component) {
OnGammaRamp256EntryTableValueWritten();
Expand All @@ -514,14 +518,14 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,

case XE_GPU_REG_DC_LUT_PWL_DATA: {
// Should be in the PWL writing mode.
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// Bit 7 of the index is ignored for PWL.
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
// DC_LUT_PWL_DATA is likely in the red, green, blue order because
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
Expand All @@ -534,13 +538,17 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
}
if (++gamma_ramp_rw_component_ >= 3) {
gamma_ramp_rw_component_ = 0;
reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
// TODO(Triang3l): Should this increase beyond 7 bits for PWL?
// Direct3D 9 explicitly sets rw_index to 0x80 after writing the last
// PWL entry. However, the DC_LUT_RW_INDEX documentation says that for
// PWL, the bit 7 is ignored.
gamma_ramp_rw_index.rw_index =
new_gamma_ramp_rw_index.rw_index =
(gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) |
((gamma_ramp_rw_index_pwl + 1) & 0x7F);
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
}
if (write_gamma_ramp_component) {
OnGammaRampPWLValueWritten();
Expand All @@ -549,10 +557,10 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,

case XE_GPU_REG_DC_LUT_30_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
uint32_t gamma_ramp_write_enable_mask =
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
if (gamma_ramp_write_enable_mask) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
Expand All @@ -567,11 +575,16 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red;
}
}
++gamma_ramp_rw_index.rw_index;
// TODO(Triang3l): Should this reset the component write index? If this
// increase is assumed to behave like a full DC_LUT_RW_INDEX write, it
// probably should.
// probably should. Currently this also calls WriteRegister for
// DC_LUT_RW_INDEX, which resets gamma_ramp_rw_component_ as well.
gamma_ramp_rw_component_ = 0;
reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index;
++new_gamma_ramp_rw_index.rw_index;
WriteRegister(
XE_GPU_REG_DC_LUT_RW_INDEX,
xe::memory::Reinterpret<uint32_t>(new_gamma_ramp_rw_index));
if (gamma_ramp_write_enable_mask) {
OnGammaRamp256EntryTableValueWritten();
}
Expand All @@ -583,7 +596,7 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// chrispy: rearrange check order, place set after checks

if (XE_LIKELY(index < RegisterFile::kRegisterCount)) {
register_file_->values[index].u32 = value;
register_file_->values[index] = value;

// quick pre-test
// todo: figure out just how unlikely this is. if very (it ought to be,
Expand Down Expand Up @@ -708,10 +721,11 @@ void CommandProcessor::MakeCoherent() {
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454

RegisterFile* regs = register_file_;
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
volatile uint32_t* regs_volatile = register_file_->values;
auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];

if (!status_host.status) {
return;
Expand All @@ -731,7 +745,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action);

// Mark coherent.
status_host.status = 0;
regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
}

void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
Expand All @@ -752,4 +766,4 @@ void CommandProcessor::InitializeTrace() {
#define COMMAND_PROCESSOR CommandProcessor
#include "pm4_command_processor_implement.h"
} // namespace gpu
} // namespace xe
} // namespace xe
Loading

0 comments on commit f0705f7

Please sign in to comment.