diff --git a/core/debug/debug_agent.h b/core/debug/debug_agent.h index 51b0ccf74..d2e03310c 100644 --- a/core/debug/debug_agent.h +++ b/core/debug/debug_agent.h @@ -122,9 +122,9 @@ class DebugAgent for (u32 i = 0; i < Sh4RegList.size(); i++) { if (Sh4RegList[i] == reg_sr_status) - allregs[i] = p_sh4rcb->cntx.sr.getFull(); + allregs[i] = Sh4cntx.sr.getFull(); else if (Sh4RegList[i] != NoReg) - allregs[i] = *GetRegPtr(Sh4RegList[i]); + allregs[i] = *GetRegPtr(Sh4cntx, Sh4RegList[i]); } *regs = &allregs[0]; return allregs.size(); @@ -134,7 +134,7 @@ class DebugAgent { for (u32 i = 0; i < Sh4RegList.size(); i++) if (Sh4RegList[i] != NoReg) - *GetRegPtr(Sh4RegList[i]) = regs[i]; + *GetRegPtr(Sh4cntx, Sh4RegList[i]) = regs[i]; } u32 readReg(u32 regNum) @@ -143,9 +143,9 @@ class DebugAgent return 0; Sh4RegType reg = Sh4RegList[regNum]; if (reg == reg_sr_status) - return p_sh4rcb->cntx.sr.getFull(); + return Sh4cntx.sr.getFull(); if (reg != NoReg) - return *GetRegPtr(reg); + return *GetRegPtr(Sh4cntx, reg); return 0; } void writeReg(u32 regNum, u32 value) @@ -154,9 +154,9 @@ class DebugAgent return; Sh4RegType reg = Sh4RegList[regNum]; if (reg == reg_sr_status) - p_sh4rcb->cntx.sr.setFull(value); + Sh4cntx.sr.setFull(value); else if (reg != NoReg) - *GetRegPtr(reg) = value; + *GetRegPtr(Sh4cntx, reg) = value; } const u8 *readMem(u32 addr, u32 len) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 55289b3ac..4e987f8e5 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -7,6 +7,7 @@ #include "serialize.h" #include "hw/holly/holly_intc.h" #include "hw/sh4/sh4_if.h" +#include "hw/sh4/sh4_core.h" #include "profiler/fc_profiler.h" #include "network/ggpo.h" @@ -90,15 +91,12 @@ class PvrMessageQueue } else { - void setDefaultRoundingMode(); - void RestoreHostRoundingMode(); - setDefaultRoundingMode(); // drain the queue after switching to !threaded rendering while (!queue.empty()) waitAndExecute(); execute(msg); - RestoreHostRoundingMode(); + Sh4cntx.restoreHostRoundingMode(); } } diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index d53700a49..ea8cce072 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -40,7 +40,7 @@ static bm_Map blkmap; u32 protected_blocks; u32 unprotected_blocks; -#define FPCA(x) ((DynarecCodeEntryPtr&)sh4rcb.fpcb[(x>>1)&FPCB_MASK]) +#define FPCA(x) ((DynarecCodeEntryPtr&)p_sh4rcb->fpcb[(x>>1)&FPCB_MASK]) // addr must be a physical address // This returns an executable address diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 7981513ce..78009e77b 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -101,10 +101,10 @@ void Sh4Recompiler::ResetCache() void Sh4Recompiler::Run() { - RestoreHostRoundingMode(); + getContext()->restoreHostRoundingMode(); - u8 *sh4_dyna_rcb = (u8 *)getContext() + sizeof(Sh4cntx); - INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb, + u8 *sh4_dyna_rcb = (u8 *)getContext() + sizeof(Sh4Context); + INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)p_sh4rcb->fpcb - sh4_dyna_rcb, (u8*)&getContext()->pc - sh4_dyna_rcb, getContext()->pc); sh4Dynarec->mainloop(sh4_dyna_rcb); diff --git a/core/hw/sh4/dyna/shil.cpp b/core/hw/sh4/dyna/shil.cpp index c044173c0..3e3131c99 100644 --- a/core/hw/sh4/dyna/shil.cpp +++ b/core/hw/sh4/dyna/shil.cpp @@ -58,9 +58,9 @@ u32 getRegOffset(Sh4RegType reg) } } -u32* GetRegPtr(u32 reg) +u32* GetRegPtr(Sh4Context& ctx, u32 reg) { - return (u32 *)((u8 *)&p_sh4rcb->cntx + getRegOffset((Sh4RegType)reg)); + return (u32 *)((u8 *)&ctx + getRegOffset((Sh4RegType)reg)); } std::string name_reg(Sh4RegType reg) diff --git a/core/hw/sh4/dyna/shil.h b/core/hw/sh4/dyna/shil.h index cec8498c6..a53cf0adb 100644 --- a/core/hw/sh4/dyna/shil.h +++ b/core/hw/sh4/dyna/shil.h @@ -128,7 +128,7 @@ enum Sh4RegType }; u32 getRegOffset(Sh4RegType reg); -u32* GetRegPtr(u32 reg); +u32* GetRegPtr(Sh4Context& ctx, u32 reg); enum shil_param_type { @@ -231,7 +231,8 @@ struct shil_param bool is_imm_s8() const { return is_imm() && (int8_t)_imm == (int32_t)_imm; } - u32* reg_ptr() const { verify(is_reg()); return GetRegPtr(_reg); } + u32* reg_ptr(Sh4Context& ctx) const { verify(is_reg()); return GetRegPtr(ctx, _reg); } + u32 reg_offset() const { verify(is_reg()); return getRegOffset(_reg); } s32 reg_nofs() const { verify(is_reg()); return (int)getRegOffset(_reg) - sizeof(Sh4Context); } u32 reg_aofs() const { return -reg_nofs(); } diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 3e45e9d88..6b826463e 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -41,7 +41,7 @@ u16 Sh4Interpreter::ReadNexOp() void Sh4Interpreter::Run() { Instance = this; - RestoreHostRoundingMode(); + ctx->restoreHostRoundingMode(); try { do @@ -83,7 +83,7 @@ void Sh4Interpreter::Step() verify(!ctx->CpuRunning); Instance = this; - RestoreHostRoundingMode(); + ctx->restoreHostRoundingMode(); try { u32 op = ReadNexOp(); ExecuteOpcode(op); diff --git a/core/hw/sh4/sh4_core.h b/core/hw/sh4/sh4_core.h index 4e9902c39..868616dc3 100644 --- a/core/hw/sh4/sh4_core.h +++ b/core/hw/sh4/sh4_core.h @@ -5,7 +5,6 @@ int UpdateSystem_INTC(); bool UpdateSR(); -void RestoreHostRoundingMode(); void setDefaultRoundingMode(); struct SH4ThrownException diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index 365ddbeaf..0ff120f1c 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -139,11 +139,11 @@ void DYNACALL Sh4Context::UpdateFPSCR(Sh4Context *ctx) setHostRoundingMode(ctx->fpscr.RM, ctx->fpscr.DN); } -void RestoreHostRoundingMode() +void Sh4Context::restoreHostRoundingMode() { old_rm = 0xFF; old_dn = 0xFF; - setHostRoundingMode(p_sh4rcb->cntx.fpscr.RM, p_sh4rcb->cntx.fpscr.DN); + setHostRoundingMode(fpscr.RM, fpscr.DN); } void setDefaultRoundingMode() diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index a06a37a90..32b5d682c 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -211,6 +211,7 @@ struct alignas(64) Sh4Context } static void DYNACALL UpdateFPSCR(Sh4Context *ctx); + void restoreHostRoundingMode(); private: union DoubleReg @@ -240,8 +241,7 @@ struct alignas(PAGE_SIZE) Sh4RCB static_assert((sizeof(Sh4RCB) % PAGE_SIZE) == 0, "sizeof(Sh4RCB) not multiple of PAGE_SIZE"); extern Sh4RCB* p_sh4rcb; -#define sh4rcb (*p_sh4rcb) -#define Sh4cntx (sh4rcb.cntx) +#define Sh4cntx (p_sh4rcb->cntx) //Get an interface to sh4 interpreter Sh4Executor *Get_Sh4Interpreter(); diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 9203f110e..727d0e5fd 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -613,7 +613,7 @@ void Arm32Assembler::canonCall(const shil_opcode *op, void *function) CC_PS& param = CC_pars[i]; if (param.type == CPT_ptr) { - Mov(rd, (u32)param.par->reg_ptr()); + Mov(rd, (u32)param.par->reg_ptr(sh4ctx)); } else if (param.type == CPT_sh4ctx) { diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index 8bf27b8e8..5d5ac174e 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -212,7 +212,7 @@ class Arm64Assembler : public MacroAssembler Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm); else { - Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1._reg)); Add(*ret_reg, *ret_reg, op.rs3._imm); } } @@ -222,8 +222,8 @@ class Arm64Assembler : public MacroAssembler Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3)); else { - Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); - Ldr(w8, sh4_context_mem_operand(op.rs3.reg_ptr())); + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1._reg)); + Ldr(w8, sh4_context_mem_operand(op.rs3._reg)); Add(*ret_reg, *ret_reg, w8); } } @@ -242,7 +242,7 @@ class Arm64Assembler : public MacroAssembler } else { - Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1._reg)); } } else @@ -782,7 +782,7 @@ class Arm64Assembler : public MacroAssembler Lsr(w1, regalloc.MapRegister(op.rs1), 26); else { - Ldr(w0, sh4_context_mem_operand(op.rs1.reg_ptr())); + Ldr(w0, sh4_context_mem_operand(op.rs1._reg)); Lsr(w1, w0, 26); } Cmp(w1, 0x38); @@ -905,17 +905,17 @@ class Arm64Assembler : public MacroAssembler else { Ldr(x2, MemOperand(x1)); - Str(x2, sh4_context_mem_operand(op.rd.reg_ptr())); + Str(x2, sh4_context_mem_operand(op.rd._reg)); } break; /* fall back to the canonical implementations for better precision case shop_fipr: - Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset()); + Add(x9, x28, op.rs1.reg_offset()); Ld1(v0.V4S(), MemOperand(x9)); if (op.rs1._reg != op.rs2._reg) { - Add(x9, x28, sh4_context_mem_operand(op.rs2.reg_ptr()).GetOffset()); + Add(x9, x28, op.rs2.reg_offset()); Ld1(v1.V4S(), MemOperand(x9)); Fmul(v0.V4S(), v0.V4S(), v1.V4S()); } @@ -926,9 +926,9 @@ class Arm64Assembler : public MacroAssembler break; case shop_ftrv: - Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset()); + Add(x9, x28, op.rs1.reg_offset()); Ld1(v0.V4S(), MemOperand(x9)); - Add(x9, x28, sh4_context_mem_operand(op.rs2.reg_ptr()).GetOffset()); + Add(x9, x28, op.rs2.reg_offset()); Ld1(v1.V4S(), MemOperand(x9, 16, PostIndex)); Ld1(v2.V4S(), MemOperand(x9, 16, PostIndex)); Ld1(v3.V4S(), MemOperand(x9, 16, PostIndex)); @@ -937,14 +937,14 @@ class Arm64Assembler : public MacroAssembler Fmla(v5.V4S(), v2.V4S(), s0, 1); Fmla(v5.V4S(), v3.V4S(), s0, 2); Fmla(v5.V4S(), v4.V4S(), s0, 3); - Add(x9, x28, sh4_context_mem_operand(op.rd.reg_ptr()).GetOffset()); + Add(x9, x28, op.rd.reg_offset()); St1(v5.V4S(), MemOperand(x9)); break; */ case shop_frswap: - Add(x9, x28, sh4_context_mem_operand(op.rs1.reg_ptr()).GetOffset()); - Add(x10, x28, sh4_context_mem_operand(op.rd.reg_ptr()).GetOffset()); + Add(x9, x28, op.rs1.reg_offset()); + Add(x10, x28, op.rd.reg_offset()); Ld4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x9)); Ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x10)); St4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x9)); @@ -1042,7 +1042,7 @@ class Arm64Assembler : public MacroAssembler case CPT_ptr: verify(prm.is_reg()); // push the ptr itself - Mov(*call_regs64[regused++], reinterpret_cast(prm.reg_ptr())); + Mov(*call_regs64[regused++], reinterpret_cast(prm.reg_ptr(sh4ctx))); break; case CPT_sh4ctx: @@ -1064,8 +1064,8 @@ class Arm64Assembler : public MacroAssembler if (ccParam.type == CPT_ptr && prm.count() == 2 && regalloc.IsAllocf(prm) && (op->rd._reg == prm._reg || op->rd2._reg == prm._reg)) { // fsca rd param is a pointer to a 64-bit reg so reload the regs if allocated - Ldr(regalloc.MapVRegister(prm, 0), sh4_context_mem_operand(GetRegPtr(prm._reg))); - Ldr(regalloc.MapVRegister(prm, 1), sh4_context_mem_operand(GetRegPtr(prm._reg + 1))); + Ldr(regalloc.MapVRegister(prm, 0), sh4_context_mem_operand(prm._reg)); + Ldr(regalloc.MapVRegister(prm, 1), sh4_context_mem_operand((Sh4RegType)(prm._reg + 1))); } } } @@ -1076,6 +1076,12 @@ class Arm64Assembler : public MacroAssembler verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760 return MemOperand(x28, offset); } + MemOperand sh4_context_mem_operand(Sh4RegType reg) + { + u32 offset = getRegOffset(reg); + verify((offset & 3) == 0 && offset <= 16380); // FIXME 64-bit regs need multiple of 8 up to 32760 + return MemOperand(x28, offset); + } void GenReadMemorySlow(u32 size) { @@ -1785,9 +1791,9 @@ class Arm64Assembler : public MacroAssembler break; } if (op.size == 8) - Str(x1, sh4_context_mem_operand(op.rd.reg_ptr())); + Str(x1, sh4_context_mem_operand(op.rd._reg)); else - Str(w1, sh4_context_mem_operand(op.rd.reg_ptr())); + Str(w1, sh4_context_mem_operand(op.rd._reg)); } } else @@ -1801,14 +1807,14 @@ class Arm64Assembler : public MacroAssembler if (regalloc.IsAllocf(op.rd)) Fmov(regalloc.MapVRegister(op.rd, 0), w0); else - Str(w0, sh4_context_mem_operand(op.rd.reg_ptr())); + Str(w0, sh4_context_mem_operand(op.rd._reg)); Mov(w0, addr + 4); GenCallRuntime((void (*)())ptr); if (regalloc.IsAllocf(op.rd)) Fmov(regalloc.MapVRegister(op.rd, 1), w0); else - Str(w0, sh4_context_mem_operand((u8*)op.rd.reg_ptr() + 4)); + Str(w0, sh4_context_mem_operand((Sh4RegType)(op.rd._reg + 1))); } else { @@ -2100,14 +2106,14 @@ class Arm64Assembler : public MacroAssembler { if (param.is_r64f() && !regalloc.IsAllocf(param)) { - Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); + Ldr(reg, sh4_context_mem_operand(param._reg)); } else if (param.is_r32f() || param.is_r64f()) { if (regalloc.IsAllocf(param)) Fmov(reg.W(), regalloc.MapVRegister(param, 0)); else - Ldr(reg.W(), sh4_context_mem_operand(param.reg_ptr())); + Ldr(reg.W(), sh4_context_mem_operand(param._reg)); if (param.is_r64f()) { Fmov(w15, regalloc.MapVRegister(param, 1)); @@ -2119,7 +2125,7 @@ class Arm64Assembler : public MacroAssembler if (regalloc.IsAllocg(param)) Mov(reg.W(), regalloc.MapRegister(param)); else - Ldr(reg.W(), sh4_context_mem_operand(param.reg_ptr())); + Ldr(reg.W(), sh4_context_mem_operand(param._reg)); } } else @@ -2141,7 +2147,7 @@ class Arm64Assembler : public MacroAssembler } else { - Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr())); + Str((const Register&)reg, sh4_context_mem_operand(param._reg)); } } else if (regalloc.IsAllocg(param)) @@ -2160,7 +2166,7 @@ class Arm64Assembler : public MacroAssembler } else { - Str(reg, sh4_context_mem_operand(param.reg_ptr())); + Str(reg, sh4_context_mem_operand(param._reg)); } } @@ -2389,18 +2395,18 @@ u32 DynaRBI::Relink() void Arm64RegAlloc::Preload(u32 reg, eReg nreg) { - assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); + assembler->Ldr(Register(nreg, 32), assembler->sh4_context_mem_operand((Sh4RegType)reg)); } void Arm64RegAlloc::Writeback(u32 reg, eReg nreg) { - assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); + assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand((Sh4RegType)reg)); } void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg) { - assembler->Ldr(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); + assembler->Ldr(VRegister(nreg, 32), assembler->sh4_context_mem_operand((Sh4RegType)reg)); } void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg) { - assembler->Str(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); + assembler->Str(VRegister(nreg, 32), assembler->sh4_context_mem_operand((Sh4RegType)reg)); } #endif // FEAT_SHREC == DYNAREC_JIT diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index 2941f1982..08e5df24e 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -64,7 +64,7 @@ static void ngen_blockcheckfail(u32 pc) { rdv_BlockCheckFail(pc); } -static void handle_sh4_exception(SH4ThrownException& ex, u32 pc) +static void handle_sh4_exception(Sh4Context *ctx, SH4ThrownException& ex, u32 pc) { if (pc & 1) { @@ -73,7 +73,7 @@ static void handle_sh4_exception(SH4ThrownException& ex, u32 pc) pc--; } Do_Exception(pc, ex.expEvn); - p_sh4rcb->cntx.cycle_counter += 4; // probably more is needed + ctx->cycle_counter += 4; // probably more is needed handleException(); } @@ -82,7 +82,7 @@ static void interpreter_fallback(Sh4Context *ctx, u16 op, OpCallFP *oph, u32 pc) try { oph(ctx, op); } catch (SH4ThrownException& ex) { - handle_sh4_exception(ex, pc); + handle_sh4_exception(ctx, ex, pc); } } @@ -91,7 +91,7 @@ static void do_sqw_mmu_no_ex(u32 addr, Sh4Context *ctx, u32 pc) try { ctx->doSqWrite(addr, ctx); } catch (SH4ThrownException& ex) { - handle_sh4_exception(ex, pc); + handle_sh4_exception(ctx, ex, pc); } } @@ -187,9 +187,9 @@ class BlockCompiler : public BaseXbyakRec verify(op.rs1.is_r64f()); #if ALLOC_F64 == false - mov(rax, (uintptr_t)op.rs1.reg_ptr()); + mov(rax, (uintptr_t)op.rs1.reg_ptr(sh4ctx)); mov(rax, qword[rax]); - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(qword[rcx], rax); #else Xbyak::Xmm rd0 = regalloc.MapXRegister(op.rd, 0); @@ -226,7 +226,7 @@ class BlockCompiler : public BaseXbyakRec add(call_regs[0], regalloc.MapRegister(op.rs3)); else { - mov(rax, (uintptr_t)op.rs3.reg_ptr()); + mov(rax, (uintptr_t)op.rs3.reg_ptr(sh4ctx)); add(call_regs[0], dword[rax]); } } @@ -238,7 +238,7 @@ class BlockCompiler : public BaseXbyakRec #if ALLOC_F64 == false if (size == MemSize::S64) { - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(qword[rcx], rax); } else @@ -263,7 +263,7 @@ class BlockCompiler : public BaseXbyakRec add(call_regs[0], regalloc.MapRegister(op.rs3)); else { - mov(rax, (uintptr_t)op.rs3.reg_ptr()); + mov(rax, (uintptr_t)op.rs3.reg_ptr(sh4ctx)); add(call_regs[0], dword[rax]); } } @@ -272,7 +272,7 @@ class BlockCompiler : public BaseXbyakRec #if ALLOC_F64 == false if (op.size == 8) { - mov(rax, (uintptr_t)op.rs2.reg_ptr()); + mov(rax, (uintptr_t)op.rs2.reg_ptr(sh4ctx)); mov(call_regs64[1], qword[rax]); } else @@ -361,7 +361,7 @@ class BlockCompiler : public BaseXbyakRec } else { - mov(rax, (uintptr_t)op.rs1.reg_ptr()); + mov(rax, (uintptr_t)op.rs1.reg_ptr(sh4ctx)); mov(eax, dword[rax]); rn = eax; } @@ -390,8 +390,8 @@ class BlockCompiler : public BaseXbyakRec break; case shop_frswap: - mov(rax, (uintptr_t)op.rs1.reg_ptr()); - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rax, (uintptr_t)op.rs1.reg_ptr(sh4ctx)); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); if (cpu.has(Cpu::tAVX512F)) { vmovaps(zmm0, zword[rax]); @@ -608,7 +608,7 @@ class BlockCompiler : public BaseXbyakRec //push the ptr itself case CPT_ptr: verify(prm.is_reg()); - mov(call_regs64[regused++], (size_t)prm.reg_ptr()); + mov(call_regs64[regused++], (size_t)prm.reg_ptr(sh4ctx)); break; case CPT_sh4ctx: @@ -627,9 +627,9 @@ class BlockCompiler : public BaseXbyakRec const shil_param& prm = *ccParam.prm; if (ccParam.type == CPT_ptr && prm.count() == 2 && regalloc.IsAllocf(prm) && (op.rd._reg == prm._reg || op.rd2._reg == prm._reg)) { // fsca rd param is a pointer to a 64-bit reg so reload the regs if allocated - mov(rax, (size_t)GetRegPtr(prm._reg)); + mov(rax, (size_t)GetRegPtr(sh4ctx, prm._reg)); movss(regalloc.MapXRegister(prm, 0), dword[rax]); - mov(rax, (size_t)GetRegPtr(prm._reg + 1)); + mov(rax, (size_t)GetRegPtr(sh4ctx, prm._reg + 1)); movss(regalloc.MapXRegister(prm, 1), dword[rax]); } } @@ -638,22 +638,22 @@ class BlockCompiler : public BaseXbyakRec void RegPreload(u32 reg, Xbyak::Operand::Code nreg) { - mov(rax, (size_t)GetRegPtr(reg)); + mov(rax, (size_t)GetRegPtr(sh4ctx, reg)); mov(Xbyak::Reg32(nreg), dword[rax]); } void RegWriteback(u32 reg, Xbyak::Operand::Code nreg) { - mov(rax, (size_t)GetRegPtr(reg)); + mov(rax, (size_t)GetRegPtr(sh4ctx, reg)); mov(dword[rax], Xbyak::Reg32(nreg)); } void RegPreload_FPU(u32 reg, s8 nreg) { - mov(rax, (size_t)GetRegPtr(reg)); + mov(rax, (size_t)GetRegPtr(sh4ctx, reg)); movss(Xbyak::Xmm(nreg), dword[rax]); } void RegWriteback_FPU(u32 reg, s8 nreg) { - mov(rax, (size_t)GetRegPtr(reg)); + mov(rax, (size_t)GetRegPtr(sh4ctx, reg)); movss(dword[rax], Xbyak::Xmm(nreg)); } @@ -867,7 +867,7 @@ class BlockCompiler : public BaseXbyakRec else { movsx(eax, byte[rax]); - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(dword[rcx], eax); } break; @@ -878,7 +878,7 @@ class BlockCompiler : public BaseXbyakRec else { movsx(eax, word[rax]); - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(dword[rcx], eax); } break; @@ -891,7 +891,7 @@ class BlockCompiler : public BaseXbyakRec else { mov(eax, dword[rax]); - mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(rcx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(dword[rcx], eax); } break; @@ -899,7 +899,7 @@ class BlockCompiler : public BaseXbyakRec case 8: #if ALLOC_F64 == false mov(rcx, qword[rax]); - mov(rax, (uintptr_t)op.rd.reg_ptr()); + mov(rax, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(qword[rax], rcx); #else movd(regalloc.MapXRegister(op.rd, 0), dword[rax]); @@ -921,7 +921,7 @@ class BlockCompiler : public BaseXbyakRec mov(call_regs[0], addr); GenCall((void (*)())ptr); #if ALLOC_F64 == false - mov(rcx, (size_t)op.rd.reg_ptr()); + mov(rcx, (size_t)op.rd.reg_ptr(sh4ctx)); mov(dword[rcx], eax); #else movd(regalloc.MapXRegister(op.rd, 0), eax); @@ -930,7 +930,7 @@ class BlockCompiler : public BaseXbyakRec mov(call_regs[0], addr + 4); GenCall((void (*)())ptr); #if ALLOC_F64 == false - mov(rcx, (size_t)op.rd.reg_ptr() + 4); + mov(rcx, (size_t)op.rd.reg_ptr(sh4ctx) + 4); mov(dword[rcx], eax); #else movd(regalloc.MapXRegister(op.rd, 1), eax); @@ -990,7 +990,7 @@ class BlockCompiler : public BaseXbyakRec mov(byte[rax], (u8)op.rs2._imm); else { - mov(rcx, (uintptr_t)op.rs2.reg_ptr()); + mov(rcx, (uintptr_t)op.rs2.reg_ptr(sh4ctx)); mov(cl, byte[rcx]); mov(byte[rax], cl); } @@ -1003,7 +1003,7 @@ class BlockCompiler : public BaseXbyakRec mov(word[rax], (u16)op.rs2._imm); else { - mov(rcx, (uintptr_t)op.rs2.reg_ptr()); + mov(rcx, (uintptr_t)op.rs2.reg_ptr(sh4ctx)); mov(cx, word[rcx]); mov(word[rax], cx); } @@ -1018,7 +1018,7 @@ class BlockCompiler : public BaseXbyakRec mov(dword[rax], op.rs2._imm); else { - mov(rcx, (uintptr_t)op.rs2.reg_ptr()); + mov(rcx, (uintptr_t)op.rs2.reg_ptr(sh4ctx)); mov(ecx, dword[rcx]); mov(dword[rax], ecx); } @@ -1026,7 +1026,7 @@ class BlockCompiler : public BaseXbyakRec case 8: #if ALLOC_F64 == false - mov(rcx, (uintptr_t)op.rs2.reg_ptr()); + mov(rcx, (uintptr_t)op.rs2.reg_ptr(sh4ctx)); mov(rcx, qword[rcx]); mov(qword[rax], rcx); #else diff --git a/core/rec-x64/xbyak_base.h b/core/rec-x64/xbyak_base.h index 1fd69bf4a..cdbc01ad7 100644 --- a/core/rec-x64/xbyak_base.h +++ b/core/rec-x64/xbyak_base.h @@ -583,7 +583,7 @@ class BaseXbyakRec : public Xbyak::CodeGenerator mov(rcx, (uintptr_t)&sin_table); mov(rcx, qword[rcx + rax * 8]); #if ALLOC_F64 == false - mov(rdx, (uintptr_t)op.rd.reg_ptr()); + mov(rdx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); mov(qword[rdx], rcx); #else movd(mapXRegister(op.rd, 0), ecx); @@ -601,8 +601,8 @@ class BaseXbyakRec : public Xbyak::CodeGenerator verify(!isAllocAny(op.rd)); mov(ecx, dword[(size_t)&sin_table + eax * 8]); mov(edx, dword[(size_t)&sin_table[0].u[1] + eax * 8]); - mov(dword[op.rd.reg_ptr()], ecx); - mov(dword[op.rd.reg_ptr() + 1], edx); + mov(dword[op.rd.reg_ptr(sh4ctx)], ecx); + mov(dword[op.rd.reg_ptr(sh4ctx) + 1], edx); #endif } break; @@ -680,13 +680,13 @@ class BaseXbyakRec : public Xbyak::CodeGenerator if (ArchX64) { #ifndef XBYAK32 - mov(rax, (size_t)param.reg_ptr()); + mov(rax, (size_t)param.reg_ptr(sh4ctx)); mov(reg.cvt32(), dword[rax]); #endif } else { - mov(reg.cvt32(), dword[param.reg_ptr()]); + mov(reg.cvt32(), dword[param.reg_ptr(sh4ctx)]); } } } @@ -703,7 +703,7 @@ class BaseXbyakRec : public Xbyak::CodeGenerator if (ArchX64) { #ifndef XBYAK32 - mov(rax, (size_t)param.reg_ptr()); + mov(rax, (size_t)param.reg_ptr(sh4ctx)); if (!reg.isXMM()) mov(reg.cvt32(), dword[rax]); else @@ -713,9 +713,9 @@ class BaseXbyakRec : public Xbyak::CodeGenerator else { if (!reg.isXMM()) - mov(reg.cvt32(), dword[param.reg_ptr()]); + mov(reg.cvt32(), dword[param.reg_ptr(sh4ctx)]); else - movss((const Xbyak::Xmm &)reg, dword[param.reg_ptr()]); + movss((const Xbyak::Xmm &)reg, dword[param.reg_ptr(sh4ctx)]); } } } @@ -757,7 +757,7 @@ class BaseXbyakRec : public Xbyak::CodeGenerator if (ArchX64) { #ifndef XBYAK32 - mov(rax, (size_t)param.reg_ptr()); + mov(rax, (size_t)param.reg_ptr(sh4ctx)); if (!reg.isXMM()) mov(dword[rax], reg.cvt32()); else @@ -767,9 +767,9 @@ class BaseXbyakRec : public Xbyak::CodeGenerator else { if (!reg.isXMM()) - mov(dword[param.reg_ptr()], reg.cvt32()); + mov(dword[param.reg_ptr(sh4ctx)], reg.cvt32()); else - movss(dword[param.reg_ptr()], (const Xbyak::Xmm &)reg); + movss(dword[param.reg_ptr(sh4ctx)], (const Xbyak::Xmm &)reg); } } } diff --git a/core/rec-x86/rec_x86.cpp b/core/rec-x86/rec_x86.cpp index bb23a11c2..6d422fc89 100644 --- a/core/rec-x86/rec_x86.cpp +++ b/core/rec-x86/rec_x86.cpp @@ -182,7 +182,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) mov(ecx, block->NextBlock); - cmp(dword[GetRegPtr(block->has_jcond ? reg_pc_dyn : reg_sr_T)], (u32)block->BlockType & 1); + cmp(dword[block->has_jcond ? &sh4ctx.jdyn : &sh4ctx.sr.T], (u32)block->BlockType & 1); Xbyak::Label branch_not_taken; jne(branch_not_taken, T_SHORT); @@ -195,7 +195,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) case BET_DynamicCall: case BET_DynamicRet: //next_pc = *jdyn; - mov(ecx, dword[GetRegPtr(reg_pc_dyn)]); + mov(ecx, dword[&sh4ctx.jdyn]); break; case BET_DynamicIntr: @@ -203,16 +203,16 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) if (block->BlockType == BET_DynamicIntr) { //next_pc = *jdyn; - mov(ecx, dword[GetRegPtr(reg_pc_dyn)]); - mov(dword[&next_pc], ecx); + mov(ecx, dword[&sh4ctx.jdyn]); + mov(dword[&sh4ctx.pc], ecx); } else { //next_pc = next_pc_value; - mov(dword[&next_pc], block->NextBlock); + mov(dword[&sh4ctx.pc], block->NextBlock); } call(UpdateINTC); - mov(ecx, dword[&next_pc]); + mov(ecx, dword[&sh4ctx.pc]); break; default: @@ -227,7 +227,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) { case BET_Cond_0: case BET_Cond_1: - cmp(dword[GetRegPtr(block->has_jcond ? reg_pc_dyn : reg_sr_T)], (u32)block->BlockType & 1); + cmp(dword[block->has_jcond ? &sh4ctx.jdyn : &sh4ctx.sr.T], (u32)block->BlockType & 1); if (mmu_enabled()) { @@ -269,7 +269,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) case BET_DynamicRet: case BET_DynamicCall: case BET_DynamicJump: - mov(ecx, dword[GetRegPtr(reg_pc_dyn)]); + mov(ecx, dword[&sh4ctx.jdyn]); jmp((const void *)no_update); break; @@ -303,7 +303,7 @@ u32 X86Compiler::relinkBlock(RuntimeBlockInfo* block) } else { - mov(eax, dword[GetRegPtr(reg_pc_dyn)]); + mov(eax, dword[&sh4ctx.jdyn]); mov(dword[&sh4ctx.pc], eax); } call(UpdateINTC); @@ -360,7 +360,7 @@ void X86Compiler::ngen_CC_param(const shil_opcode& op, const shil_param& param, //push the ptr itself case CPT_ptr: verify(param.is_reg()); - push((uintptr_t)param.reg_ptr()); + push((uintptr_t)param.reg_ptr(sh4ctx)); CC_stackSize += 4; unwinder.allocStackPtr(getCurr(), 4); break; @@ -384,8 +384,8 @@ void X86Compiler::ngen_CC_param(const shil_opcode& op, const shil_param& param, // store from ST(0) case CPT_f32rv: - fstp(dword[param.reg_ptr()]); - movss(regalloc.MapXRegister(param), dword[param.reg_ptr()]); + fstp(dword[param.reg_ptr(sh4ctx)]); + movss(regalloc.MapXRegister(param), dword[param.reg_ptr(sh4ctx)]); break; } } @@ -460,7 +460,7 @@ void X86Compiler::genMainloop() if (!mmu_enabled()) { mov(esi, ecx); // save sh4 pc in ESI, used below if FPCB is still empty for this address - mov(eax, (size_t)&p_sh4rcb->fpcb[0]); + mov(eax, (uintptr_t)&sh4ctx + sizeof(Sh4Context) - sizeof(Sh4RCB) + offsetof(Sh4RCB, fpcb)); // address of fpcb[0] and_(ecx, RAM_SIZE_MAX - 2); jmp(dword[eax + ecx * 2]); } @@ -627,7 +627,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b else { movsx(eax, byte[ptr]); - mov(dword[op.rd.reg_ptr()], eax); + mov(dword[op.rd.reg_ptr(sh4ctx)], eax); } break; @@ -637,7 +637,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b else { movsx(eax, word[ptr]); - mov(dword[op.rd.reg_ptr()], eax); + mov(dword[op.rd.reg_ptr(sh4ctx)], eax); } break; @@ -649,7 +649,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b else { mov(eax, dword[ptr]); - mov(dword[op.rd.reg_ptr()], eax); + mov(dword[op.rd.reg_ptr(sh4ctx)], eax); } break; @@ -662,7 +662,7 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b else { movq(xmm0, qword[ptr]); - movq(qword[op.rd.reg_ptr()], xmm0); + movq(qword[op.rd.reg_ptr(sh4ctx)], xmm0); } break; @@ -681,11 +681,11 @@ bool X86Compiler::genReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* b // Need to call the handler twice mov(ecx, addr); genCall((void (DYNACALL *)())ptr); - mov(dword[op.rd.reg_ptr()], eax); + mov(dword[op.rd.reg_ptr(sh4ctx)], eax); mov(ecx, addr + 4); genCall((void (DYNACALL *)())ptr); - mov(dword[op.rd.reg_ptr() + 1], eax); + mov(dword[op.rd.reg_ptr(sh4ctx) + 1], eax); } else { @@ -749,7 +749,7 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* mov(byte[ptr], (u8)op.rs2.imm_value()); else { - mov(al, byte[op.rs2.reg_ptr()]); + mov(al, byte[op.rs2.reg_ptr(sh4ctx)]); mov(byte[ptr], al); } break; @@ -761,7 +761,7 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* mov(word[ptr], (u16)op.rs2.imm_value()); else { - mov(cx, word[op.rs2.reg_ptr()]); + mov(cx, word[op.rs2.reg_ptr(sh4ctx)]); mov(word[ptr], cx); } break; @@ -775,7 +775,7 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* mov(dword[ptr], op.rs2.imm_value()); else { - mov(ecx, dword[op.rs2.reg_ptr()]); + mov(ecx, dword[op.rs2.reg_ptr(sh4ctx)]); mov(dword[ptr], ecx); } break; @@ -788,7 +788,7 @@ bool X86Compiler::genWriteMemImmediate(const shil_opcode& op, RuntimeBlockInfo* } else { - movq(xmm0, qword[op.rs2.reg_ptr()]); + movq(xmm0, qword[op.rs2.reg_ptr(sh4ctx)]); movq(qword[ptr], xmm0); } break; diff --git a/core/rec-x86/rec_x86.h b/core/rec-x86/rec_x86.h index ff9133f99..cfd41096b 100644 --- a/core/rec-x86/rec_x86.h +++ b/core/rec-x86/rec_x86.h @@ -50,22 +50,22 @@ class X86Compiler : public BaseXbyakRec void regPreload(u32 reg, Xbyak::Operand::Code nreg) { DEBUG_LOG(DYNAREC, "RegPreload reg %d -> %s", reg, Xbyak::Reg32(nreg).toString()); - mov(Xbyak::Reg32(nreg), dword[GetRegPtr(reg)]); + mov(Xbyak::Reg32(nreg), dword[GetRegPtr(sh4ctx, reg)]); } void regWriteback(u32 reg, Xbyak::Operand::Code nreg) { DEBUG_LOG(DYNAREC, "RegWriteback reg %d <- %s", reg, Xbyak::Reg32(nreg).toString()); - mov(dword[GetRegPtr(reg)], Xbyak::Reg32(nreg)); + mov(dword[GetRegPtr(sh4ctx, reg)], Xbyak::Reg32(nreg)); } void regPreload_FPU(u32 reg, s8 nreg) { DEBUG_LOG(DYNAREC, "RegPreload_FPU reg %d -> xmm%d", reg, nreg); - movss(Xbyak::Xmm(nreg), dword[GetRegPtr(reg)]); + movss(Xbyak::Xmm(nreg), dword[GetRegPtr(sh4ctx, reg)]); } void regWriteback_FPU(u32 reg, s8 nreg) { DEBUG_LOG(DYNAREC, "RegWriteback_FPU reg %d <- xmm%d", reg, nreg); - movss(dword[GetRegPtr(reg)], Xbyak::Xmm(nreg)); + movss(dword[GetRegPtr(sh4ctx, reg)], Xbyak::Xmm(nreg)); } void genMainloop(); diff --git a/core/rec-x86/x86_ops.cpp b/core/rec-x86/x86_ops.cpp index c7c14c8ae..e539389fd 100644 --- a/core/rec-x86/x86_ops.cpp +++ b/core/rec-x86/x86_ops.cpp @@ -283,7 +283,7 @@ void X86Compiler::genMmuLookup(RuntimeBlockInfo* block, const shil_opcode& op, u } [[noreturn]] -static void DYNACALL handle_sh4_exception(SH4ThrownException& ex, u32 pc) +static void DYNACALL handle_sh4_exception(Sh4Context *ctx, SH4ThrownException& ex, u32 pc) { if (pc & 1) { @@ -292,7 +292,7 @@ static void DYNACALL handle_sh4_exception(SH4ThrownException& ex, u32 pc) pc--; } Do_Exception(pc, ex.expEvn); - p_sh4rcb->cntx.cycle_counter += 4; // probably more is needed + ctx->cycle_counter += 4; // probably more is needed X86Compiler::handleException(); // not reached std::abort(); @@ -303,7 +303,7 @@ static void DYNACALL interpreter_fallback(Sh4Context *ctx, u16 op, OpCallFP *oph try { oph(ctx, op); } catch (SH4ThrownException& ex) { - handle_sh4_exception(ex, pc); + handle_sh4_exception(ctx, ex, pc); } } @@ -312,7 +312,7 @@ static void DYNACALL do_sqw_mmu_no_ex(u32 addr, Sh4Context *ctx, u32 pc) try { ctx->doSqWrite(addr, ctx); } catch (SH4ThrownException& ex) { - handle_sh4_exception(ex, pc); + handle_sh4_exception(ctx, ex, pc); } } @@ -346,8 +346,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& movss(regalloc.MapXRegister(op.rd, 1), regalloc.MapXRegister(op.rs1, 1)); #else verify(!regalloc.IsAllocAny(op.rd)); - movq(xmm0, qword[op.rs1.reg_ptr()]); - movq(qword[op.rd.reg_ptr()], xmm0); + movq(xmm0, qword[op.rs1.reg_ptr(sh4ctx)]); + movq(qword[op.rd.reg_ptr(sh4ctx)], xmm0); #endif break; @@ -363,7 +363,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& else if (regalloc.IsAllocg(op.rs3)) add(ecx, regalloc.MapRegister(op.rs3)); else - add(ecx, dword[op.rs3.reg_ptr()]); + add(ecx, dword[op.rs3.reg_ptr(sh4ctx)]); } int memOpSize; @@ -410,8 +410,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& else { verify(!regalloc.IsAllocAny(op.rd)); - movss(dword[op.rd.reg_ptr()], xmm0); - movss(dword[op.rd.reg_ptr() + 1], xmm1); + movss(dword[op.rd.reg_ptr(sh4ctx)], xmm0); + movss(dword[op.rd.reg_ptr(sh4ctx) + 1], xmm1); } } } @@ -428,7 +428,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& else if (regalloc.IsAllocg(op.rs3)) add(ecx, regalloc.MapRegister(op.rs3)); else - add(ecx, dword[op.rs3.reg_ptr()]); + add(ecx, dword[op.rs3.reg_ptr(sh4ctx)]); } int memOpSize; @@ -464,8 +464,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& } else { - movd(xmm0, dword[op.rs2.reg_ptr()]); - movd(xmm1, dword[op.rs2.reg_ptr() + 1]); + movd(xmm0, dword[op.rs2.reg_ptr(sh4ctx)]); + movd(xmm1, dword[op.rs2.reg_ptr(sh4ctx) + 1]); } } const u8 *start = getCurr(); @@ -511,7 +511,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& } else { - mov(eax, dword[op.rs1.reg_ptr()]); + mov(eax, dword[op.rs1.reg_ptr(sh4ctx)]); rn = eax; } mov(ecx, rn); @@ -521,7 +521,7 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& mov(ecx, rn); } - mov(edx, (size_t)&sh4rcb.cntx); + mov(edx, (uintptr_t)&sh4ctx); if (mmu_enabled()) { push(block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc @@ -549,8 +549,8 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& break; case shop_frswap: - mov(eax, (uintptr_t)op.rs1.reg_ptr()); - mov(ecx, (uintptr_t)op.rd.reg_ptr()); + mov(eax, (uintptr_t)op.rs1.reg_ptr(sh4ctx)); + mov(ecx, (uintptr_t)op.rd.reg_ptr(sh4ctx)); for (int i = 0; i < 4; i++) { movaps(xmm0, xword[eax + (i * 16)]);