Skip to content

Commit

Permalink
sh4: split xffr array. Fix FPCB_PAD on windows
Browse files Browse the repository at this point in the history
Split xffr[32] into xf[16] and fr[16].
Set FPCB_PAD size to 64_KB. Issue #1736
Get rid of shil_param::reg_aofs
  • Loading branch information
flyinghead committed Nov 13, 2024
1 parent 129673a commit 76638df
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 112 deletions.
4 changes: 2 additions & 2 deletions core/hw/sh4/dyna/shil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ u32 getRegOffset(Sh4RegType reg)
}
if (reg >= reg_fr_0 && reg <= reg_fr_15) {
const size_t regofs = (reg - reg_fr_0) * sizeof(float);
return offsetof(Sh4Context, xffr[16]) + regofs;
return offsetof(Sh4Context, fr[0]) + regofs;
}
if (reg >= reg_xf_0 && reg <= reg_xf_15) {
const size_t regofs = (reg - reg_xf_0) * sizeof(float);
return offsetof(Sh4Context, xffr[0]) + regofs;
return offsetof(Sh4Context, xf[0]) + regofs;
}
switch (reg)
{
Expand Down
1 change: 0 additions & 1 deletion core/hw/sh4/dyna/shil.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ struct shil_param
u32* reg_ptr(Sh4Context& ctx) const { verify(is_reg()); return GetRegPtr(ctx, _reg); }
u32 reg_offset() const { verify(is_reg()); return getRegOffset(_reg); }
s32 reg_nofs() const { verify(is_reg()); return (int)getRegOffset(_reg) - sizeof(Sh4Context); }
u32 reg_aofs() const { return -reg_nofs(); }

u32 imm_value() const { verify(is_imm()); return _imm; }

Expand Down
116 changes: 58 additions & 58 deletions core/hw/sh4/interpr/sh4_fpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ sh4op(i1111_nnnn_mmmm_0000)
{
u32 n = GetN(op);
u32 m = GetM(op);
ctx->fr(n) += ctx->fr(m);
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] += ctx->fr[m];
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -53,8 +53,8 @@ sh4op(i1111_nnnn_mmmm_0001)
u32 n = GetN(op);
u32 m = GetM(op);

ctx->fr(n) -= ctx->fr(m);
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] -= ctx->fr[m];
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -70,8 +70,8 @@ sh4op(i1111_nnnn_mmmm_0010)
{
u32 n = GetN(op);
u32 m = GetM(op);
ctx->fr(n) *= ctx->fr(m);
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] *= ctx->fr[m];
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -88,9 +88,9 @@ sh4op(i1111_nnnn_mmmm_0011)
u32 n = GetN(op);
u32 m = GetM(op);

ctx->fr(n) /= ctx->fr(m);
ctx->fr[n] /= ctx->fr[m];

CHECK_FPU_32(ctx->fr(n));
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -107,7 +107,7 @@ sh4op(i1111_nnnn_mmmm_0100)
u32 n = GetN(op);
u32 m = GetM(op);

ctx->sr.T = ctx->fr(m) == ctx->fr(n);
ctx->sr.T = ctx->fr[m] == ctx->fr[n];
}
else
{
Expand All @@ -122,7 +122,7 @@ sh4op(i1111_nnnn_mmmm_0101)
u32 n = GetN(op);
u32 m = GetM(op);

if (ctx->fr(n) > ctx->fr(m))
if (ctx->fr[n] > ctx->fr[m])
ctx->sr.T = 1;
else
ctx->sr.T = 0;
Expand Down Expand Up @@ -281,7 +281,7 @@ sh4op(i1111_nnnn_mmmm_1100)
{
u32 n = GetN(op);
u32 m = GetM(op);
ctx->fr(n) = ctx->fr(m);
ctx->fr[n] = ctx->fr[m];
}
else
{
Expand Down Expand Up @@ -339,14 +339,14 @@ sh4op(i1111_nnn0_1111_1101)
#ifdef NATIVE_FSCA
float rads = pi_index / (65536.0f / 2) * float(M_PI);

ctx->fr(n + 0) = sinf(rads);
ctx->fr(n + 1) = cosf(rads);
ctx->fr[n + 0] = sinf(rads);
ctx->fr[n + 1] = cosf(rads);

CHECK_FPU_32(ctx->fr(n));
CHECK_FPU_32(ctx->fr(n + 1));
CHECK_FPU_32(ctx->fr[n]);
CHECK_FPU_32(ctx->fr[n + 1]);
#else
ctx->fr(n + 0) = sin_table[pi_index].u[0];
ctx->fr(n + 1) = sin_table[pi_index].u[1];
ctx->fr[n + 0] = sin_table[pi_index].u[0];
ctx->fr[n + 1] = sin_table[pi_index].u[1];
#endif

}
Expand All @@ -360,8 +360,8 @@ sh4op(i1111_nnnn_0111_1101)
u32 n = GetN(op);
if (ctx->fpscr.PR==0)
{
ctx->fr(n) = 1.f / sqrtf(ctx->fr(n));
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] = 1.f / sqrtf(ctx->fr[n]);
CHECK_FPU_32(ctx->fr[n]);
}
else
iNimp("FSRRA : Double precision mode");
Expand Down Expand Up @@ -404,12 +404,12 @@ sh4op(i1111_nnmm_1110_1101)
int m=(GetN(op)&0x3)<<2;
if (ctx->fpscr.PR == 0)
{
double idp = (double)ctx->fr(n + 0) * ctx->fr(m + 0);
idp += (double)ctx->fr(n + 1) * ctx->fr(m + 1);
idp += (double)ctx->fr(n + 2) * ctx->fr(m + 2);
idp += (double)ctx->fr(n + 3) * ctx->fr(m + 3);
double idp = (double)ctx->fr[n + 0] * ctx->fr[m + 0];
idp += (double)ctx->fr[n + 1] * ctx->fr[m + 1];
idp += (double)ctx->fr[n + 2] * ctx->fr[m + 2];
idp += (double)ctx->fr[n + 3] * ctx->fr[m + 3];

ctx->fr(n + 3) = fixNaN((float)idp);
ctx->fr[n + 3] = fixNaN((float)idp);
}
else
{
Expand All @@ -425,7 +425,7 @@ sh4op(i1111_nnnn_1000_1101)

u32 n = GetN(op);

ctx->fr(n) = 0.0f;
ctx->fr[n] = 0.0f;

}

Expand All @@ -437,7 +437,7 @@ sh4op(i1111_nnnn_1001_1101)

u32 n = GetN(op);

ctx->fr(n) = 1.0f;
ctx->fr[n] = 1.0f;
}

//flds <FREG_N>,FPUL
Expand All @@ -461,7 +461,7 @@ sh4op(i1111_nnnn_0010_1101)
if (ctx->fpscr.PR == 0)
{
u32 n = GetN(op);
ctx->fr(n) = (float)(int)ctx->fpul;
ctx->fr[n] = (float)(int)ctx->fpul;
}
else
{
Expand Down Expand Up @@ -503,8 +503,8 @@ sh4op(i1111_nnnn_0110_1101)
{
u32 n = GetN(op);

ctx->fr(n) = sqrtf(ctx->fr(n));
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] = sqrtf(ctx->fr[n]);
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -519,14 +519,14 @@ sh4op(i1111_nnnn_0011_1101)
if (ctx->fpscr.PR == 0)
{
u32 n = GetN(op);
ctx->fpul = (u32)(s32)ctx->fr(n);
ctx->fpul = (u32)(s32)ctx->fr[n];

if ((s32)ctx->fpul > 0x7fffff80)
ctx->fpul = 0x7fffffff;
// Intel CPUs convert out of range float numbers to 0x80000000. Manually set the correct sign
else if (ctx->fpul == 0x80000000 && ctx->fr(n) == ctx->fr(n))
else if (ctx->fpul == 0x80000000 && ctx->fr[n] == ctx->fr[n])
{
if (*(int *)&ctx->fr(n) > 0) // Using integer math to avoid issues with Inf and NaN
if (*(int *)&ctx->fr[n] > 0) // Using integer math to avoid issues with Inf and NaN
ctx->fpul--;
}
}
Expand Down Expand Up @@ -554,8 +554,8 @@ sh4op(i1111_nnnn_mmmm_1110)
u32 n = GetN(op);
u32 m = GetM(op);

ctx->fr(n) = std::fma(ctx->fr(0), ctx->fr(m), ctx->fr(n));
CHECK_FPU_32(ctx->fr(n));
ctx->fr[n] = std::fma(ctx->fr[0], ctx->fr[m], ctx->fr[n]);
CHECK_FPU_32(ctx->fr[n]);
}
else
{
Expand All @@ -578,30 +578,30 @@ sh4op(i1111_nn01_1111_1101)

if (ctx->fpscr.PR==0)
{
double v1 = (double)ctx->xf(0) * ctx->fr(n + 0) +
(double)ctx->xf(4) * ctx->fr(n + 1) +
(double)ctx->xf(8) * ctx->fr(n + 2) +
(double)ctx->xf(12) * ctx->fr(n + 3);

double v2 = (double)ctx->xf(1) * ctx->fr(n + 0) +
(double)ctx->xf(5) * ctx->fr(n + 1) +
(double)ctx->xf(9) * ctx->fr(n + 2) +
(double)ctx->xf(13) * ctx->fr(n + 3);

double v3 = (double)ctx->xf(2) * ctx->fr(n + 0) +
(double)ctx->xf(6) * ctx->fr(n + 1) +
(double)ctx->xf(10) * ctx->fr(n + 2) +
(double)ctx->xf(14) * ctx->fr(n + 3);

double v4 = (double)ctx->xf(3) * ctx->fr(n + 0) +
(double)ctx->xf(7) * ctx->fr(n + 1) +
(double)ctx->xf(11) * ctx->fr(n + 2) +
(double)ctx->xf(15) * ctx->fr(n + 3);

ctx->fr(n + 0) = fixNaN((float)v1);
ctx->fr(n + 1) = fixNaN((float)v2);
ctx->fr(n + 2) = fixNaN((float)v3);
ctx->fr(n + 3) = fixNaN((float)v4);
double v1 = (double)ctx->xf[0] * ctx->fr[n + 0] +
(double)ctx->xf[4] * ctx->fr[n + 1] +
(double)ctx->xf[8] * ctx->fr[n + 2] +
(double)ctx->xf[12] * ctx->fr[n + 3];

double v2 = (double)ctx->xf[1] * ctx->fr[n + 0] +
(double)ctx->xf[5] * ctx->fr[n + 1] +
(double)ctx->xf[9] * ctx->fr[n + 2] +
(double)ctx->xf[13] * ctx->fr[n + 3];

double v3 = (double)ctx->xf[2] * ctx->fr[n + 0] +
(double)ctx->xf[6] * ctx->fr[n + 1] +
(double)ctx->xf[10] * ctx->fr[n + 2] +
(double)ctx->xf[14] * ctx->fr[n + 3];

double v4 = (double)ctx->xf[3] * ctx->fr[n + 0] +
(double)ctx->xf[7] * ctx->fr[n + 1] +
(double)ctx->xf[11] * ctx->fr[n + 2] +
(double)ctx->xf[15] * ctx->fr[n + 3];

ctx->fr[n + 0] = fixNaN((float)v1);
ctx->fr[n + 1] = fixNaN((float)v2);
ctx->fr[n + 2] = fixNaN((float)v3);
ctx->fr[n + 3] = fixNaN((float)v4);
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion core/hw/sh4/sh4_core_regs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ void DYNACALL Sh4Context::UpdateFPSCR(Sh4Context *ctx)
{
if (ctx->fpscr.FR != ctx->old_fpscr.FR)
// FPU bank change
std::swap((f32 (&)[16])ctx->xffr, *(f32 (*)[16])&ctx->xffr[16]);
std::swap(ctx->xf, ctx->fr);

ctx->old_fpscr = ctx->fpscr;
setHostRoundingMode(ctx->fpscr.RM, ctx->fpscr.DN);
Expand Down
38 changes: 16 additions & 22 deletions core/hw/sh4/sh4_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct fpscr_t
class Sh4Executor
{
public:
virtual ~Sh4Executor() {}
virtual ~Sh4Executor() = default;
virtual void Run() = 0;
virtual void Start() = 0;
virtual void Stop() = 0;
Expand Down Expand Up @@ -131,7 +131,8 @@ struct alignas(64) Sh4Context
{
SQBuffer sq_buffer[2];

f32 xffr[32];
float xf[16];
float fr[16];
u32 r[16];

union
Expand Down Expand Up @@ -170,44 +171,36 @@ struct alignas(64) Sh4Context
u64 raw[64];
};

f32& fr(int idx) {
assert(idx >= 0 && idx <= 15);
return xffr[idx + 16];
}
f32& xf(int idx) {
assert(idx >= 0 && idx <= 15);
return xffr[idx];
}
u32& fr_hex(int idx) {
assert(idx >= 0 && idx <= 15);
return reinterpret_cast<u32&>(fr(idx));
return reinterpret_cast<u32&>(fr[idx]);
}
u64& dr_hex(int idx) {
assert(idx >= 0 && idx <= 7);
return *reinterpret_cast<u64 *>(&fr(idx * 2));
return *reinterpret_cast<u64 *>(&fr[idx * 2]);
}
u64& xd_hex(int idx) {
assert(idx >= 0 && idx <= 7);
return *reinterpret_cast<u64 *>(&xf(idx * 2));
return *reinterpret_cast<u64 *>(&xf[idx * 2]);
}

f64 getDR(u32 n)
double getDR(u32 n)
{
assert(n <= 7);
DoubleReg t;
t.sgl[1] = fr(n * 2);
t.sgl[0] = fr(n * 2 + 1);
t.sgl[1] = fr[n * 2];
t.sgl[0] = fr[n * 2 + 1];

return t.dbl;
}

void setDR(u32 n, f64 val)
void setDR(u32 n, double val)
{
assert(n <= 7);
DoubleReg t;
t.dbl = val;
fr(n * 2) = t.sgl[1];
fr(n * 2 + 1) = t.sgl[0];
fr[n * 2] = t.sgl[1];
fr[n * 2 + 1] = t.sgl[0];
}

static void DYNACALL UpdateFPSCR(Sh4Context *ctx);
Expand All @@ -216,8 +209,8 @@ struct alignas(64) Sh4Context
private:
union DoubleReg
{
f64 dbl;
f32 sgl[2];
double dbl;
float sgl[2];
};
};
static_assert(sizeof(Sh4Context) == 512, "Invalid Sh4Context size");
Expand All @@ -230,7 +223,8 @@ static_assert(sizeof(Sh4Context) == 512, "Invalid Sh4Context size");
// want to be an i8r4 value that can be substracted in one op (such as 0x4100000)
#define FPCB_PAD 0x100000
#else
#define FPCB_PAD PAGE_SIZE
// For other systems we could use PAGE_SIZE, except on windows that has a 64 KB granularity for memory mapping
#define FPCB_PAD 64_KB
#endif
struct alignas(PAGE_SIZE) Sh4RCB
{
Expand Down
8 changes: 4 additions & 4 deletions core/hw/sh4/sh4_opcode_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,25 +461,25 @@ std::string disassemble_op(const char* tx1, u32 pc, u16 opcode)
}
else if (strcmp2(tx1,"FREG_N>"))
{
sprintf(buf,"FR%d=%f ", GetN(opcode), p_sh4rcb->cntx.xffr[16 + GetN(opcode)]);
sprintf(buf,"FR%d=%f ", GetN(opcode), p_sh4rcb->cntx.fr[GetN(opcode)]);
regs += buf;
sprintf(buf,"FR%d",GetN(opcode));
}
else if (strcmp2(tx1,"FREG_M>"))
{
sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.xffr[16 + GetM(opcode)]);
sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.fr[GetM(opcode)]);
regs += buf;
sprintf(buf,"FR%d",GetM(opcode));
}
else if (strcmp2(tx1, "FREG_M_SD_F>"))
{
sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.xffr[16 + GetM(opcode)]);
sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.fr[GetM(opcode)]);
regs += buf;
sprintf(buf,"FR%d", GetM(opcode));
}
else if (strcmp2(tx1,"FREG_0>"))
{
sprintf(buf,"FR0=%f ", p_sh4rcb->cntx.xffr[16]);
sprintf(buf,"FR0=%f ", p_sh4rcb->cntx.fr[0]);
regs += buf;
sprintf(buf,"FR0");
}
Expand Down
Loading

0 comments on commit 76638df

Please sign in to comment.