sh4: split xffr array. Fix FPCB_PAD on windows

Split xffr[32] into xf[16] and fr[16]. Set FPCB_PAD size to 64_KB. Issue #1736 Get rid of shil_param::reg_aofs
flyinghead · Nov 13, 2024 · 76638df · 76638df
1 parent 129673a
commit 76638df
Show file tree

Hide file tree

Showing 9 changed files with 111 additions and 112 deletions.
diff --git a/core/hw/sh4/dyna/shil.cpp b/core/hw/sh4/dyna/shil.cpp
@@ -25,11 +25,11 @@ u32 getRegOffset(Sh4RegType reg)
 	}
 	if (reg >= reg_fr_0 && reg <= reg_fr_15) {
 		const size_t regofs = (reg - reg_fr_0) * sizeof(float);
-		return offsetof(Sh4Context, xffr[16]) + regofs;
+		return offsetof(Sh4Context, fr[0]) + regofs;
 	}
 	if (reg >= reg_xf_0 && reg <= reg_xf_15) {
 		const size_t regofs = (reg - reg_xf_0) * sizeof(float);
-		return offsetof(Sh4Context, xffr[0]) + regofs;
+		return offsetof(Sh4Context, xf[0]) + regofs;
 	}
 	switch (reg)
 	{

diff --git a/core/hw/sh4/dyna/shil.h b/core/hw/sh4/dyna/shil.h
@@ -234,7 +234,6 @@ struct shil_param
 	u32* reg_ptr(Sh4Context& ctx) const { verify(is_reg()); return GetRegPtr(ctx, _reg); }
 	u32  reg_offset() const { verify(is_reg()); return getRegOffset(_reg); }
 	s32  reg_nofs() const { verify(is_reg()); return (int)getRegOffset(_reg) - sizeof(Sh4Context); }
-	u32  reg_aofs() const { return -reg_nofs(); }
 
 	u32 imm_value() const { verify(is_imm()); return _imm; }
 

diff --git a/core/hw/sh4/interpr/sh4_fpu.cpp b/core/hw/sh4/interpr/sh4_fpu.cpp
@@ -34,8 +34,8 @@ sh4op(i1111_nnnn_mmmm_0000)
 	{
 		u32 n = GetN(op);
 		u32 m = GetM(op);
-		ctx->fr(n) += ctx->fr(m);
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] += ctx->fr[m];
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -53,8 +53,8 @@ sh4op(i1111_nnnn_mmmm_0001)
 		u32 n = GetN(op);
 		u32 m = GetM(op);
 
-		ctx->fr(n) -= ctx->fr(m);
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] -= ctx->fr[m];
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -70,8 +70,8 @@ sh4op(i1111_nnnn_mmmm_0010)
 	{
 		u32 n = GetN(op);
 		u32 m = GetM(op);
-		ctx->fr(n) *= ctx->fr(m);
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] *= ctx->fr[m];
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -88,9 +88,9 @@ sh4op(i1111_nnnn_mmmm_0011)
 		u32 n = GetN(op);
 		u32 m = GetM(op);
 
-		ctx->fr(n) /= ctx->fr(m);
+		ctx->fr[n] /= ctx->fr[m];
 
-		CHECK_FPU_32(ctx->fr(n));
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -107,7 +107,7 @@ sh4op(i1111_nnnn_mmmm_0100)
 		u32 n = GetN(op);
 		u32 m = GetM(op);
 
-		ctx->sr.T = ctx->fr(m) == ctx->fr(n);
+		ctx->sr.T = ctx->fr[m] == ctx->fr[n];
 	}
 	else
 	{
@@ -122,7 +122,7 @@ sh4op(i1111_nnnn_mmmm_0101)
 		u32 n = GetN(op);
 		u32 m = GetM(op);
 
-		if (ctx->fr(n) > ctx->fr(m))
+		if (ctx->fr[n] > ctx->fr[m])
 			ctx->sr.T = 1;
 		else
 			ctx->sr.T = 0;
@@ -281,7 +281,7 @@ sh4op(i1111_nnnn_mmmm_1100)
 	{
 		u32 n = GetN(op);
 		u32 m = GetM(op);
-		ctx->fr(n) = ctx->fr(m);
+		ctx->fr[n] = ctx->fr[m];
 	}
 	else
 	{
@@ -339,14 +339,14 @@ sh4op(i1111_nnn0_1111_1101)
 	#ifdef NATIVE_FSCA
 			float rads = pi_index / (65536.0f / 2) * float(M_PI);
 
-			ctx->fr(n + 0) = sinf(rads);
-			ctx->fr(n + 1) = cosf(rads);
+			ctx->fr[n + 0] = sinf(rads);
+			ctx->fr[n + 1] = cosf(rads);
 
-			CHECK_FPU_32(ctx->fr(n));
-			CHECK_FPU_32(ctx->fr(n + 1));
+			CHECK_FPU_32(ctx->fr[n]);
+			CHECK_FPU_32(ctx->fr[n + 1]);
 	#else
-			ctx->fr(n + 0) = sin_table[pi_index].u[0];
-			ctx->fr(n + 1) = sin_table[pi_index].u[1];
+			ctx->fr[n + 0] = sin_table[pi_index].u[0];
+			ctx->fr[n + 1] = sin_table[pi_index].u[1];
 	#endif
 
 	}
@@ -360,8 +360,8 @@ sh4op(i1111_nnnn_0111_1101)
 	u32 n = GetN(op);
 	if (ctx->fpscr.PR==0)
 	{
-		ctx->fr(n) = 1.f / sqrtf(ctx->fr(n));
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] = 1.f / sqrtf(ctx->fr[n]);
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 		iNimp("FSRRA : Double precision mode");
@@ -404,12 +404,12 @@ sh4op(i1111_nnmm_1110_1101)
 	int m=(GetN(op)&0x3)<<2;
 	if (ctx->fpscr.PR == 0)
 	{
-		double idp = (double)ctx->fr(n + 0) * ctx->fr(m + 0);
-		idp += (double)ctx->fr(n + 1) * ctx->fr(m + 1);
-		idp += (double)ctx->fr(n + 2) * ctx->fr(m + 2);
-		idp += (double)ctx->fr(n + 3) * ctx->fr(m + 3);
+		double idp = (double)ctx->fr[n + 0] * ctx->fr[m + 0];
+		idp += (double)ctx->fr[n + 1] * ctx->fr[m + 1];
+		idp += (double)ctx->fr[n + 2] * ctx->fr[m + 2];
+		idp += (double)ctx->fr[n + 3] * ctx->fr[m + 3];
 
-		ctx->fr(n + 3) = fixNaN((float)idp);
+		ctx->fr[n + 3] = fixNaN((float)idp);
 	}
 	else
 	{
@@ -425,7 +425,7 @@ sh4op(i1111_nnnn_1000_1101)
 
 	u32 n = GetN(op);
 
-	ctx->fr(n) = 0.0f;
+	ctx->fr[n] = 0.0f;
 
 }
 
@@ -437,7 +437,7 @@ sh4op(i1111_nnnn_1001_1101)
 
 	u32 n = GetN(op);
 
-	ctx->fr(n) = 1.0f;
+	ctx->fr[n] = 1.0f;
 }
 
 //flds <FREG_N>,FPUL
@@ -461,7 +461,7 @@ sh4op(i1111_nnnn_0010_1101)
 	if (ctx->fpscr.PR == 0)
 	{
 		u32 n = GetN(op);
-		ctx->fr(n) = (float)(int)ctx->fpul;
+		ctx->fr[n] = (float)(int)ctx->fpul;
 	}
 	else
 	{
@@ -503,8 +503,8 @@ sh4op(i1111_nnnn_0110_1101)
 	{
 		u32 n = GetN(op);
 
-		ctx->fr(n) = sqrtf(ctx->fr(n));
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] = sqrtf(ctx->fr[n]);
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -519,14 +519,14 @@ sh4op(i1111_nnnn_0011_1101)
 	if (ctx->fpscr.PR == 0)
 	{
 		u32 n = GetN(op);
-		ctx->fpul = (u32)(s32)ctx->fr(n);
+		ctx->fpul = (u32)(s32)ctx->fr[n];
 
 		if ((s32)ctx->fpul > 0x7fffff80)
 			ctx->fpul = 0x7fffffff;
 		// Intel CPUs convert out of range float numbers to 0x80000000. Manually set the correct sign
-		else if (ctx->fpul == 0x80000000 && ctx->fr(n) == ctx->fr(n))
+		else if (ctx->fpul == 0x80000000 && ctx->fr[n] == ctx->fr[n])
 		{
-			if (*(int *)&ctx->fr(n) > 0) // Using integer math to avoid issues with Inf and NaN
+			if (*(int *)&ctx->fr[n] > 0) // Using integer math to avoid issues with Inf and NaN
 				ctx->fpul--;
 		}
 	}
@@ -554,8 +554,8 @@ sh4op(i1111_nnnn_mmmm_1110)
 		u32 n = GetN(op);
 		u32 m = GetM(op);
 
-		ctx->fr(n) = std::fma(ctx->fr(0), ctx->fr(m), ctx->fr(n));
-		CHECK_FPU_32(ctx->fr(n));
+		ctx->fr[n] = std::fma(ctx->fr[0], ctx->fr[m], ctx->fr[n]);
+		CHECK_FPU_32(ctx->fr[n]);
 	}
 	else
 	{
@@ -578,30 +578,30 @@ sh4op(i1111_nn01_1111_1101)
 
 	if (ctx->fpscr.PR==0)
 	{
-		double v1 = (double)ctx->xf(0)  * ctx->fr(n + 0) +
-					(double)ctx->xf(4)  * ctx->fr(n + 1) +
-					(double)ctx->xf(8)  * ctx->fr(n + 2) +
-					(double)ctx->xf(12) * ctx->fr(n + 3);
-
-		double v2 = (double)ctx->xf(1)  * ctx->fr(n + 0) +
-					(double)ctx->xf(5)  * ctx->fr(n + 1) +
-					(double)ctx->xf(9)  * ctx->fr(n + 2) +
-					(double)ctx->xf(13) * ctx->fr(n + 3);
-
-		double v3 = (double)ctx->xf(2)  * ctx->fr(n + 0) +
-					(double)ctx->xf(6)  * ctx->fr(n + 1) +
-					(double)ctx->xf(10) * ctx->fr(n + 2) +
-					(double)ctx->xf(14) * ctx->fr(n + 3);
-
-		double v4 = (double)ctx->xf(3)  * ctx->fr(n + 0) +
-					(double)ctx->xf(7)  * ctx->fr(n + 1) +
-					(double)ctx->xf(11) * ctx->fr(n + 2) +
-					(double)ctx->xf(15) * ctx->fr(n + 3);
-
-		ctx->fr(n + 0) = fixNaN((float)v1);
-		ctx->fr(n + 1) = fixNaN((float)v2);
-		ctx->fr(n + 2) = fixNaN((float)v3);
-		ctx->fr(n + 3) = fixNaN((float)v4);
+		double v1 = (double)ctx->xf[0]  * ctx->fr[n + 0] +
+					(double)ctx->xf[4]  * ctx->fr[n + 1] +
+					(double)ctx->xf[8]  * ctx->fr[n + 2] +
+					(double)ctx->xf[12] * ctx->fr[n + 3];
+
+		double v2 = (double)ctx->xf[1]  * ctx->fr[n + 0] +
+					(double)ctx->xf[5]  * ctx->fr[n + 1] +
+					(double)ctx->xf[9]  * ctx->fr[n + 2] +
+					(double)ctx->xf[13] * ctx->fr[n + 3];
+
+		double v3 = (double)ctx->xf[2]  * ctx->fr[n + 0] +
+					(double)ctx->xf[6]  * ctx->fr[n + 1] +
+					(double)ctx->xf[10] * ctx->fr[n + 2] +
+					(double)ctx->xf[14] * ctx->fr[n + 3];
+
+		double v4 = (double)ctx->xf[3]  * ctx->fr[n + 0] +
+					(double)ctx->xf[7]  * ctx->fr[n + 1] +
+					(double)ctx->xf[11] * ctx->fr[n + 2] +
+					(double)ctx->xf[15] * ctx->fr[n + 3];
+
+		ctx->fr[n + 0] = fixNaN((float)v1);
+		ctx->fr[n + 1] = fixNaN((float)v2);
+		ctx->fr[n + 2] = fixNaN((float)v3);
+		ctx->fr[n + 3] = fixNaN((float)v4);
 	}
 	else
 	{

diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp
@@ -133,7 +133,7 @@ void DYNACALL Sh4Context::UpdateFPSCR(Sh4Context *ctx)
 {
 	if (ctx->fpscr.FR != ctx->old_fpscr.FR)
 		// FPU bank change
-		std::swap((f32 (&)[16])ctx->xffr, *(f32 (*)[16])&ctx->xffr[16]);
+		std::swap(ctx->xf, ctx->fr);
 
 	ctx->old_fpscr = ctx->fpscr;
 	setHostRoundingMode(ctx->fpscr.RM, ctx->fpscr.DN);

diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h
@@ -103,7 +103,7 @@ struct fpscr_t
 class Sh4Executor
 {
 public:
-	virtual ~Sh4Executor() {}
+	virtual ~Sh4Executor() = default;
 	virtual void Run() = 0;
 	virtual void Start() = 0;
 	virtual void Stop() = 0;
@@ -131,7 +131,8 @@ struct alignas(64) Sh4Context
 		{
 			SQBuffer sq_buffer[2];
 
-			f32 xffr[32];
+			float xf[16];
+			float fr[16];
 			u32 r[16];
 
 			union
@@ -170,44 +171,36 @@ struct alignas(64) Sh4Context
 		u64 raw[64];
 	};
 
-	f32& fr(int idx) {
-		assert(idx >= 0 && idx <= 15);
-		return xffr[idx + 16];
-	}
-	f32& xf(int idx) {
-		assert(idx >= 0 && idx <= 15);
-		return xffr[idx];
-	}
 	u32& fr_hex(int idx) {
 		assert(idx >= 0 && idx <= 15);
-		return reinterpret_cast<u32&>(fr(idx));
+		return reinterpret_cast<u32&>(fr[idx]);
 	}
 	u64& dr_hex(int idx) {
 		assert(idx >= 0 && idx <= 7);
-		return *reinterpret_cast<u64 *>(&fr(idx * 2));
+		return *reinterpret_cast<u64 *>(&fr[idx * 2]);
 	}
 	u64& xd_hex(int idx) {
 		assert(idx >= 0 && idx <= 7);
-		return *reinterpret_cast<u64 *>(&xf(idx * 2));
+		return *reinterpret_cast<u64 *>(&xf[idx * 2]);
 	}
 
-	f64 getDR(u32 n)
+	double getDR(u32 n)
 	{
 		assert(n <= 7);
 		DoubleReg t;
-		t.sgl[1] = fr(n * 2);
-		t.sgl[0] = fr(n * 2 + 1);
+		t.sgl[1] = fr[n * 2];
+		t.sgl[0] = fr[n * 2 + 1];
 
 		return t.dbl;
 	}
 
-	void setDR(u32 n, f64 val)
+	void setDR(u32 n, double val)
 	{
 		assert(n <= 7);
 		DoubleReg t;
 		t.dbl = val;
-		fr(n * 2) = t.sgl[1];
-		fr(n * 2 + 1) = t.sgl[0];
+		fr[n * 2] = t.sgl[1];
+		fr[n * 2 + 1] = t.sgl[0];
 	}
 
 	static void DYNACALL UpdateFPSCR(Sh4Context *ctx);
@@ -216,8 +209,8 @@ struct alignas(64) Sh4Context
 private:
 	union DoubleReg
 	{
-		f64 dbl;
-		f32 sgl[2];
+		double dbl;
+		float sgl[2];
 	};
 };
 static_assert(sizeof(Sh4Context) == 512, "Invalid Sh4Context size");
@@ -230,7 +223,8 @@ static_assert(sizeof(Sh4Context) == 512, "Invalid Sh4Context size");
 // want to be an i8r4 value that can be substracted in one op (such as 0x4100000)
 #define FPCB_PAD 0x100000
 #else
-#define FPCB_PAD PAGE_SIZE
+// For other systems we could use PAGE_SIZE, except on windows that has a 64 KB granularity for memory mapping
+#define FPCB_PAD 64_KB
 #endif
 struct alignas(PAGE_SIZE) Sh4RCB
 {

diff --git a/core/hw/sh4/sh4_opcode_list.cpp b/core/hw/sh4/sh4_opcode_list.cpp
@@ -461,25 +461,25 @@ std::string disassemble_op(const char* tx1, u32 pc, u16 opcode)
 			}
 			else if (strcmp2(tx1,"FREG_N>"))
 			{
-				sprintf(buf,"FR%d=%f ", GetN(opcode), p_sh4rcb->cntx.xffr[16 + GetN(opcode)]);
+				sprintf(buf,"FR%d=%f ", GetN(opcode), p_sh4rcb->cntx.fr[GetN(opcode)]);
 				regs += buf;
 				sprintf(buf,"FR%d",GetN(opcode));
 			}
 			else if (strcmp2(tx1,"FREG_M>"))
 			{
-				sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.xffr[16 + GetM(opcode)]);
+				sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.fr[GetM(opcode)]);
 				regs += buf;
 				sprintf(buf,"FR%d",GetM(opcode));
 			}
 			else if (strcmp2(tx1, "FREG_M_SD_F>"))
 			{
-				sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.xffr[16 + GetM(opcode)]);
+				sprintf(buf,"FR%d=%f ", GetM(opcode), p_sh4rcb->cntx.fr[GetM(opcode)]);
 				regs += buf;
 				sprintf(buf,"FR%d", GetM(opcode));
 			}
 			else if (strcmp2(tx1,"FREG_0>"))
 			{
-				sprintf(buf,"FR0=%f ", p_sh4rcb->cntx.xffr[16]);
+				sprintf(buf,"FR0=%f ", p_sh4rcb->cntx.fr[0]);
 				regs += buf;
 				sprintf(buf,"FR0");
 			}