diff --git a/src/assembler/X86Assembler.cpp b/src/assembler/X86Assembler.cpp index 1132acf..de66596 100644 --- a/src/assembler/X86Assembler.cpp +++ b/src/assembler/X86Assembler.cpp @@ -194,3 +194,24 @@ void X86Assembler::mov(X86Register reg, std::string const& label) { this->write8(0x05 | regIdx(reg) << 3); this->label32(label); } + +void X86Assembler::fstps(X86Pointer ptr) { + // 32 bit fstp + this->write8(0xD9); + this->encodeModRM(ptr, 3); +} +void X86Assembler::flds(X86Pointer ptr) { + // 32 bit fld + this->write8(0xD9); + this->encodeModRM(ptr, 0); +} +void X86Assembler::fstpd(X86Pointer ptr) { + // 64 bit fstp + this->write8(0xDD); + this->encodeModRM(ptr, 3); +} +void X86Assembler::fldd(X86Pointer ptr) { + // 64 bit fld + this->write8(0xDD); + this->encodeModRM(ptr, 0); +} \ No newline at end of file diff --git a/src/assembler/X86Assembler.hpp b/src/assembler/X86Assembler.hpp index 3dd74af..a9bd77a 100644 --- a/src/assembler/X86Assembler.hpp +++ b/src/assembler/X86Assembler.hpp @@ -111,5 +111,10 @@ namespace tulip::hook { void mov(X86Pointer ptr, X86Register reg); void mov(X86Register reg, X86Register reg2); void mov(X86Register reg, std::string const& label); + + void fstps(X86Pointer ptr); + void flds(X86Pointer ptr); + void fstpd(X86Pointer ptr); + void fldd(X86Pointer ptr); }; } \ No newline at end of file diff --git a/src/convention/WindowsConvention.cpp b/src/convention/WindowsConvention.cpp index 8b03d58..4a5c999 100644 --- a/src/convention/WindowsConvention.cpp +++ b/src/convention/WindowsConvention.cpp @@ -18,6 +18,7 @@ enum class Register { XMM1, XMM2, XMM3, + ST0, }; using Stack = size_t; using Location = std::variant; @@ -37,6 +38,16 @@ class PushParameter final { }; static Location returnLocation(AbstractFunction const& function) { + // other + switch (function.m_return.m_kind) { + default: + case AbstractTypeKind::Primitive: return Register::EAX; + case AbstractTypeKind::FloatingPoint: return Register::ST0; + case AbstractTypeKind::Other: return Stack(0x4); + } +} + +static Location optimizedReturnLocation(AbstractFunction const& function) { // other switch (function.m_return.m_kind) { default: @@ -50,6 +61,8 @@ class PushParameters final { private: std::vector m_params; Location m_returnValueLocation; + AbstractType m_returnType; + // size of the original function's stack size_t m_originalStackSize = 0x0; // size of our converted function's stack @@ -93,6 +106,7 @@ class PushParameters final { public: static PushParameters fromCdecl(X86Assembler& a, AbstractFunction const& function) { auto res = PushParameters(a); + res.m_returnType = function.m_return; // structs are returned as pointer through first parameter res.m_returnValueLocation = returnLocation(function); @@ -120,6 +134,7 @@ class PushParameters final { static PushParameters fromThiscall(X86Assembler& a, AbstractFunction const& function) { auto res = PushParameters(a); + res.m_returnType = function.m_return; // structs are returned as pointer through first parameter res.m_returnValueLocation = returnLocation(function); @@ -150,6 +165,7 @@ class PushParameters final { static PushParameters fromFastcall(X86Assembler& a, AbstractFunction const& function) { auto res = PushParameters(a); + res.m_returnType = function.m_return; size_t registersUsed = 0; // structs are returned as pointer through first parameter @@ -184,7 +200,8 @@ class PushParameters final { size_t registersUsed = 0; // structs are returned as pointer through first parameter - res.m_returnValueLocation = returnLocation(function); + res.m_returnType = function.m_return; + res.m_returnValueLocation = optimizedReturnLocation(function); if (std::holds_alternative(res.m_returnValueLocation)) { res.push(AbstractType::from(), Register::ECX); registersUsed = 1; @@ -242,7 +259,8 @@ class PushParameters final { size_t registersUsed = 0; // structs are returned as pointer through first parameter - res.m_returnValueLocation = returnLocation(function); + res.m_returnType = function.m_return; + res.m_returnValueLocation = optimizedReturnLocation(function); if (std::holds_alternative(res.m_returnValueLocation)) { res.push(AbstractType::from()); } @@ -386,6 +404,22 @@ class PushParameters final { // clean up stack from the ones we added a.add(ESP, m_resultStackSize); + // in the original(gd) function, the return for floats is in xmm0 + if (std::holds_alternative(m_returnValueLocation) && std::get(m_returnValueLocation) == Register::XMM0) { + // move the st0 into xmm0 + auto size = m_returnType.m_size; + a.sub(ESP, size); + if (size == 4) { + a.fstps(m[ESP]); + a.movss(XMM0, m[ESP]); + } + else { + a.fstpd(m[ESP]); + a.movsd(XMM0, m[ESP]); + } + a.add(ESP, size); + } + // if the function is caller cleaned, then generateOriginalCleanup // or the original GD function cleans it up if (m_isCallerCleanup) { @@ -435,6 +469,22 @@ class PushParameters final { } void generateOriginalCleanup() { + // in the default(geode) function, the return for floats is in st0 + if (std::holds_alternative(m_returnValueLocation) && std::get(m_returnValueLocation) == Register::XMM0) { + // move the xmm into st0 + auto size = m_returnType.m_size; + a.sub(ESP, size); + if (size == 4) { + a.movss(m[ESP], XMM0); + a.flds(m[ESP]); + } + else { + a.movsd(m[ESP], XMM0); + a.fldd(m[ESP]); + } + a.add(ESP, size); + } + if (m_isCallerCleanup) { // for mat: comment this to make your tests work a.add(ESP, m_originalStackSize);