From 12ecd97e10191093be3c36cfd46e0f05ca1afbed Mon Sep 17 00:00:00 2001 From: altalk23 <45172705+altalk23@users.noreply.github.com> Date: Mon, 17 Jun 2024 23:27:07 +0300 Subject: [PATCH 1/7] initial windows stack frame impl --- include/tulip/FunctionData.hpp | 10 ++ include/tulip/TulipHook.hpp | 1 + src/Handler.cpp | 6 +- src/Handler.hpp | 4 - src/Wrapper.cpp | 4 +- src/Wrapper.hpp | 5 +- src/assembler/BaseAssembler.cpp | 7 + src/assembler/BaseAssembler.hpp | 2 + src/convention/Windows64Convention.cpp | 1 + src/generator/ArmV7Generator.cpp | 4 +- src/generator/ArmV7Generator.hpp | 2 +- src/generator/ArmV8Generator.cpp | 4 +- src/generator/ArmV8Generator.hpp | 2 +- src/generator/Generator.cpp | 16 +- src/generator/Generator.hpp | 9 +- src/generator/X64Generator.cpp | 231 +++++++++++++++++++++++-- src/generator/X64Generator.hpp | 12 +- src/generator/X86Generator.cpp | 8 +- src/generator/X86Generator.hpp | 4 +- src/target/Windows64Target.cpp | 54 ++++++ src/target/Windows64Target.hpp | 1 + 21 files changed, 338 insertions(+), 49 deletions(-) create mode 100644 include/tulip/FunctionData.hpp diff --git a/include/tulip/FunctionData.hpp b/include/tulip/FunctionData.hpp new file mode 100644 index 0000000..cd09e7d --- /dev/null +++ b/include/tulip/FunctionData.hpp @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace tulip::hook { + struct FunctionData { + void* m_address; + size_t m_size; + }; +} diff --git a/include/tulip/TulipHook.hpp b/include/tulip/TulipHook.hpp index 8a04a62..06540c5 100644 --- a/include/tulip/TulipHook.hpp +++ b/include/tulip/TulipHook.hpp @@ -3,6 +3,7 @@ #include "AbstractFunction.hpp" #include "AbstractType.hpp" #include "CallingConvention.hpp" +#include "FunctionData.hpp" #include "HandlerData.hpp" #include "HookData.hpp" #include "Platform.hpp" diff --git a/src/Handler.cpp b/src/Handler.cpp index ed59413..02ade6a 100644 --- a/src/Handler.cpp +++ b/src/Handler.cpp @@ -36,7 +36,8 @@ Result<> Handler::init() { auto generator = Target::get().getHandlerGenerator(m_address, m_trampoline, m_handler, m_content, m_metadata); - TULIP_HOOK_UNWRAP(generator->generateHandler()); + TULIP_HOOK_UNWRAP_INTO(auto handler, generator->generateHandler()); + m_handlerSize = handler.m_size; TULIP_HOOK_UNWRAP_INTO(m_modifiedBytes, generator->generateIntervener()); @@ -45,7 +46,8 @@ Result<> Handler::init() { auto address = reinterpret_cast(Target::get().getRealPtr(m_address)); m_originalBytes.insert(m_originalBytes.begin(), address, address + target); - TULIP_HOOK_UNWRAP(generator->generateTrampoline(target)); + TULIP_HOOK_UNWRAP_INTO(auto trampoline, generator->generateTrampoline(target)); + m_trampolineSize = trampoline.m_size; this->addOriginal(); diff --git a/src/Handler.hpp b/src/Handler.hpp index 21ba61a..2a297d9 100644 --- a/src/Handler.hpp +++ b/src/Handler.hpp @@ -64,9 +64,5 @@ namespace tulip::hook { Result<> interveneFunction(); Result<> restoreFunction(); - - void generateHandler(); - void generateIntervener(); - void generateTrampoline(); }; } \ No newline at end of file diff --git a/src/Wrapper.cpp b/src/Wrapper.cpp index 4c7e9fe..5d6b506 100644 --- a/src/Wrapper.cpp +++ b/src/Wrapper.cpp @@ -16,7 +16,7 @@ Result Wrapper::createWrapper(void* address, WrapperMetadata const& metad m_wrappers[address] = wrapped; } - return Ok(m_wrappers[address]); + return Ok(m_wrappers[address].m_address); } Result Wrapper::createReverseWrapper(void* address, WrapperMetadata const& metadata) { @@ -26,5 +26,5 @@ Result Wrapper::createReverseWrapper(void* address, WrapperMetadata const m_reverseWrappers[address] = wrapped; } - return Ok(m_reverseWrappers[address]); + return Ok(m_reverseWrappers[address].m_address); } diff --git a/src/Wrapper.hpp b/src/Wrapper.hpp index 1ac22da..7573b1a 100644 --- a/src/Wrapper.hpp +++ b/src/Wrapper.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,8 +11,8 @@ namespace tulip::hook { class Wrapper { public: - std::unordered_map m_wrappers; - std::unordered_map m_reverseWrappers; + std::unordered_map m_wrappers; + std::unordered_map m_reverseWrappers; static Wrapper& get(); diff --git a/src/assembler/BaseAssembler.cpp b/src/assembler/BaseAssembler.cpp index e2aff83..46d7343 100644 --- a/src/assembler/BaseAssembler.cpp +++ b/src/assembler/BaseAssembler.cpp @@ -79,4 +79,11 @@ void BaseAssembler::label(std::string const& name) { m_labels[name] = this->currentAddress(); } +void* BaseAssembler::getLabel(std::string const& name) const { + if (m_labels.find(name) == m_labels.end()) { + return nullptr; + } + return reinterpret_cast(m_labels.at(name)); +} + void BaseAssembler::updateLabels() {} \ No newline at end of file diff --git a/src/assembler/BaseAssembler.hpp b/src/assembler/BaseAssembler.hpp index 32dca0b..14e7e79 100644 --- a/src/assembler/BaseAssembler.hpp +++ b/src/assembler/BaseAssembler.hpp @@ -47,6 +47,8 @@ namespace tulip::hook { void label(std::string const& name); + void* getLabel(std::string const& name) const; + virtual void updateLabels(); }; } \ No newline at end of file diff --git a/src/convention/Windows64Convention.cpp b/src/convention/Windows64Convention.cpp index feba403..83ea916 100644 --- a/src/convention/Windows64Convention.cpp +++ b/src/convention/Windows64Convention.cpp @@ -48,6 +48,7 @@ void Windows64Convention::generateIntoDefault(BaseAssembler& a_, AbstractFunctio size_t stackParamSize = getStackParamSize(function); auto const paddedSize = (stackParamSize % 16) ? stackParamSize + 8 : stackParamSize; // + 0x20 for the shadow space before the first arg + a.label("convention-alloc-small"); a.sub(RSP, paddedSize + 0x20); if (stackParamSize > 0) { // theres stack args, so we need to copy them over diff --git a/src/generator/ArmV7Generator.cpp b/src/generator/ArmV7Generator.cpp index a4e3321..05737ca 100644 --- a/src/generator/ArmV7Generator.cpp +++ b/src/generator/ArmV7Generator.cpp @@ -105,7 +105,7 @@ std::vector ArmV7HandlerGenerator::intervenerBytes(uint64_t address) { return std::move(a.m_buffer); } -Result<> ArmV7HandlerGenerator::generateTrampoline(uint64_t target) { +Result ArmV7HandlerGenerator::generateTrampoline(uint64_t target) { auto origin = new CodeMemBlock((uint64_t)Target::get().getRealPtr(m_address), target); auto relocated = new CodeMemBlock(); // idk about arm thumb stuff help me @@ -129,5 +129,5 @@ Result<> ArmV7HandlerGenerator::generateTrampoline(uint64_t target) { if (relocated->size == 0) { return Err("Failed to relocate original function"); } - return Ok(); + return Ok(FunctionData{m_trampoline, relocated->size}); } diff --git a/src/generator/ArmV7Generator.hpp b/src/generator/ArmV7Generator.hpp index 7a9ce32..47e2bf0 100644 --- a/src/generator/ArmV7Generator.hpp +++ b/src/generator/ArmV7Generator.hpp @@ -10,7 +10,7 @@ namespace tulip::hook { public: using HandlerGenerator::HandlerGenerator; - Result<> generateTrampoline(uint64_t target) override; + Result generateTrampoline(uint64_t target) override; std::vector handlerBytes(uint64_t address) override; std::vector intervenerBytes(uint64_t address) override; diff --git a/src/generator/ArmV8Generator.cpp b/src/generator/ArmV8Generator.cpp index 4b94cb0..f210d7e 100644 --- a/src/generator/ArmV8Generator.cpp +++ b/src/generator/ArmV8Generator.cpp @@ -125,7 +125,7 @@ std::vector ArmV8HandlerGenerator::intervenerBytes(uint64_t address) { return std::move(a.m_buffer); } -Result<> ArmV8HandlerGenerator::generateTrampoline(uint64_t target) { +Result ArmV8HandlerGenerator::generateTrampoline(uint64_t target) { auto origin = new CodeMemBlock(reinterpret_cast(m_address), target); auto relocated = new CodeMemBlock(); auto originBuffer = m_address; @@ -148,5 +148,5 @@ Result<> ArmV8HandlerGenerator::generateTrampoline(uint64_t target) { if (relocated->size == 0) { return Err("Failed to relocate original function"); } - return Ok(); + return Ok(FunctionData{m_trampoline, relocated->size}); } diff --git a/src/generator/ArmV8Generator.hpp b/src/generator/ArmV8Generator.hpp index 5512389..78fd70b 100644 --- a/src/generator/ArmV8Generator.hpp +++ b/src/generator/ArmV8Generator.hpp @@ -10,7 +10,7 @@ namespace tulip::hook { public: using HandlerGenerator::HandlerGenerator; - Result<> generateTrampoline(uint64_t target) override; + Result generateTrampoline(uint64_t target) override; std::vector handlerBytes(uint64_t address) override; std::vector intervenerBytes(uint64_t address) override; diff --git a/src/generator/Generator.cpp b/src/generator/Generator.cpp index 8d1a2f2..6ee18f7 100644 --- a/src/generator/Generator.cpp +++ b/src/generator/Generator.cpp @@ -17,13 +17,13 @@ WrapperGenerator::WrapperGenerator(void* address, WrapperMetadata const& metadat m_address(address), m_metadata(metadata) {} -Result<> HandlerGenerator::generateHandler() { +Result HandlerGenerator::generateHandler() { auto address = reinterpret_cast(m_handler); auto encode = this->handlerBytes(address); TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_handler, encode.data(), encode.size())); - return Ok(); + return Ok(FunctionData{m_handler, encode.size()}); } Result> HandlerGenerator::generateIntervener() { @@ -33,7 +33,7 @@ Result> HandlerGenerator::generateIntervener() { return Ok(std::move(encode)); } -Result<> HandlerGenerator::generateTrampoline(uint64_t target) { +Result HandlerGenerator::generateTrampoline(uint64_t target) { TULIP_HOOK_UNWRAP_INTO(auto offsets, this->relocatedBytes(reinterpret_cast(m_trampoline), target)); auto address = reinterpret_cast(m_trampoline) + offsets.m_relocatedBytes.size(); @@ -45,7 +45,7 @@ Result<> HandlerGenerator::generateTrampoline(uint64_t target) { TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_trampoline, merge.data(), merge.size())); - return Ok(); + return Ok(FunctionData{m_trampoline, merge.size()}); } std::vector HandlerGenerator::handlerBytes(uint64_t address) { @@ -61,12 +61,12 @@ Result HandlerGenerator::relocatedBytes(uint64 return Ok(HandlerGenerator::RelocateReturn()); } -Result WrapperGenerator::generateWrapper() { - return Ok(m_address); // only windows needs the wrapper +Result WrapperGenerator::generateWrapper() { + return Ok(FunctionData{m_address, 0}); // only windows needs the wrapper } -Result WrapperGenerator::generateReverseWrapper() { - return Ok(m_address); // only windows needs the wrapper +Result WrapperGenerator::generateReverseWrapper() { + return Ok(FunctionData{m_address, 0}); // only windows needs the wrapper } std::vector WrapperGenerator::wrapperBytes(uint64_t address) { diff --git a/src/generator/Generator.hpp b/src/generator/Generator.hpp index 3fa0109..e21bcfe 100644 --- a/src/generator/Generator.hpp +++ b/src/generator/Generator.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -21,7 +22,7 @@ namespace tulip::hook { virtual ~HandlerGenerator() = default; - virtual Result<> generateHandler(); + virtual Result generateHandler(); virtual Result> generateIntervener(); struct RelocateReturn { @@ -29,7 +30,7 @@ namespace tulip::hook { int64_t m_originalOffset; }; - virtual Result<> generateTrampoline(uint64_t target); + virtual Result generateTrampoline(uint64_t target); virtual std::vector handlerBytes(uint64_t address); virtual std::vector intervenerBytes(uint64_t address); @@ -46,8 +47,8 @@ namespace tulip::hook { WrapperGenerator(void* address, WrapperMetadata const& metadata); - virtual Result generateWrapper(); - virtual Result generateReverseWrapper(); + virtual Result generateWrapper(); + virtual Result generateReverseWrapper(); virtual std::vector wrapperBytes(uint64_t address); virtual std::vector reverseWrapperBytes(uint64_t address); diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index ae5a921..79c2efe 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -128,7 +128,32 @@ void X64HandlerGenerator::restoreReturnRegisters(X64Assembler& a, size_t size) { #endif } -std::vector X64HandlerGenerator::handlerBytes(uint64_t address) { +#ifdef TULIP_HOOK_WINDOWS +namespace { + size_t getStackParamSize(AbstractFunction const& function) { + size_t stackParamSize = 0; + int regCount = 0; + if (function.m_return.m_kind == AbstractTypeKind::Other) { + regCount += 1; + } + for (auto& param : function.m_parameters) { + if (regCount < 4) { + regCount++; + } else { + stackParamSize += 8; + } + } + return stackParamSize; + } + size_t getPaddedStackParamSize(AbstractFunction const& function) { + auto stackParamSize = getStackParamSize(function); + return (stackParamSize % 16) ? stackParamSize + 8 : stackParamSize; + } +} +#endif + +Result X64HandlerGenerator::generateHandler() { + auto address = reinterpret_cast(m_handler); X64Assembler a(address); RegMem64 m; using enum X64Register; @@ -143,23 +168,21 @@ std::vector X64HandlerGenerator::handlerBytes(uint64_t address) { a.nop(); } + a.label("handler-push"); a.push(RBP); a.mov(RBP, RSP); + // shadow space + a.label("handler-alloc-small"); a.sub(RSP, 0x40); // preserve registers const auto preservedSize = preserveRegisters(a); - // shadow space - a.sub(RSP, 0x20); - // set the parameters a.mov(FIRST_PARAM, "content"); // call the pre handler, incrementing a.callip("handlerPre"); - a.add(RSP, 0x20); - // store rax (next function ptr) in the shadow space for a bit a.mov(m[RBP - 0x10], RAX); @@ -203,7 +226,70 @@ std::vector X64HandlerGenerator::handlerBytes(uint64_t address) { a.updateLabels(); - return std::move(a.m_buffer); + auto codeSize = a.m_buffer.size(); + + #ifdef TULIP_HOOK_WINDOWS + + // UNWIND_INFO structure & RUNTIME_FUNCTION structure + + { + auto const offsetBegin = address & 0xffff; + auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; + + auto const pushOffset = reinterpret_cast(a.getLabel("handler-push")) & 0xffff; + auto const allocOffset = reinterpret_cast(a.getLabel("handler-alloc-small")) & 0xffff; + auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const hasConvention = conventionOffset != 0; + auto const prologSize = static_cast(hasConvention ? conventionOffset : allocOffset); + + + // RUNTIME_FUNCTION + + a.write32(offsetBegin); // BeginAddress + a.write32(offsetEnd); // EndAddress + a.write32(offsetEnd + 0xc); // UnwindData + + // UNWIND_INFO + + a.write8( + 0x1 | // Version : 3 + 0x10 // Flags : 5 + ); + a.write8(prologSize); // SizeOfProlog + a.write8(hasConvention ? 3 : 2); // CountOfUnwindCodes + a.write8( + 0x0 | // FrameRegister : 4 + 0x0 // FrameOffset : 4 + ); + // UNWIND_CODE[] + + a.write8(pushOffset); // CodeOffset + a.write8( + 0x50 | // UnwindOp : 4 + 0x0 // OpInfo : 4 + ); + + a.write8(allocOffset); // CodeOffset + a.write8( + (((0x40 >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + + if (hasConvention) { + auto padded = getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset + a.write8( + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + } + } + + #endif + + TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_handler, a.m_buffer.data(), a.m_buffer.size())); + + return Ok(FunctionData{m_handler, codeSize}); } std::vector X64HandlerGenerator::intervenerBytes(uint64_t address) { @@ -371,10 +457,22 @@ Result<> X64HandlerGenerator::relocateRIPInstruction(cs_insn* insn, uint8_t* buf return X86HandlerGenerator::relocateRIPInstruction(insn, buffer, trampolineAddress, originalAddress, disp); } -std::vector X64WrapperGenerator::wrapperBytes(uint64_t address) { +Result X64WrapperGenerator::generateWrapper() { + if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { + return Ok(m_address); + } + + // this is silly, butt + auto codeSize = this->wrapperBytes(0).size(); + auto areaSize = (codeSize + (0x20 - codeSize) % 0x20); + + TULIP_HOOK_UNWRAP_INTO(auto area, Target::get().allocateArea(areaSize)); + auto address = reinterpret_cast(area); + X64Assembler a(address); using enum X64Register; + a.label("wrapper-push"); a.push(RBP); a.mov(RBP, RSP); @@ -398,7 +496,61 @@ std::vector X64WrapperGenerator::wrapperBytes(uint64_t address) { a.updateLabels(); - return std::move(a.m_buffer); + auto codeSize2 = a.m_buffer.size(); + +#ifdef TULIP_HOOK_WINDOWS + + { + auto const offsetBegin = address & 0xffff; + auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; + + auto const pushOffset = reinterpret_cast(a.getLabel("wrapper-push")) & 0xffff; + auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const hasConvention = conventionOffset != 0; + auto const prologSize = static_cast(hasConvention ? conventionOffset : pushOffset); + + + // RUNTIME_FUNCTION + + a.write32(offsetBegin); // BeginAddress + a.write32(offsetEnd); // EndAddress + a.write32(offsetEnd + 0xc); // UnwindData + + // UNWIND_INFO + + a.write8( + 0x1 | // Version : 3 + 0x10 // Flags : 5 + ); + a.write8(prologSize); // SizeOfProlog + a.write8(hasConvention ? 2 : 1); // CountOfUnwindCodes + a.write8( + 0x0 | // FrameRegister : 4 + 0x0 // FrameOffset : 4 + ); + // UNWIND_CODE[] + + a.write8(pushOffset); // CodeOffset + a.write8( + 0x50 | // UnwindOp : 4 + 0x0 // OpInfo : 4 + ); + + if (hasConvention) { + auto padded = getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset + a.write8( + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + } + } + +#endif + + TULIP_HOOK_UNWRAP(Target::get().writeMemory(area, a.m_buffer.data(), a.m_buffer.size())); + + return Ok(FunctionData{area, codeSize2}); } // std::vector X64WrapperGenerator::reverseWrapperBytes(uint64_t address) { @@ -420,11 +572,12 @@ std::vector X64WrapperGenerator::wrapperBytes(uint64_t address) { // return std::move(a.m_buffer); // } -Result<> X64HandlerGenerator::generateTrampoline(uint64_t target) { +Result X64HandlerGenerator::generateTrampoline(uint64_t target) { X64Assembler a(reinterpret_cast(m_trampoline)); using enum X64Register; if (m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { + a.label("trampoline-push"); a.push(RBP); a.mov(RBP, RSP); m_metadata.m_convention->generateIntoOriginal(a, m_metadata.m_abstract); @@ -457,11 +610,65 @@ Result<> X64HandlerGenerator::generateTrampoline(uint64_t target) { a.updateLabels(); auto codeSize = a.m_buffer.size(); - auto areaSize = (codeSize + (0x20 - codeSize) % 0x20); + + +#ifdef TULIP_HOOK_WINDOWS + + { + auto const address = reinterpret_cast(m_trampoline); + auto const offsetBegin = address & 0xffff; + auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; + + auto const pushOffset = reinterpret_cast(a.getLabel("trampoline-push")) & 0xffff; + auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const hasConvention = conventionOffset != 0; + auto const prologSize = static_cast(hasConvention ? conventionOffset : pushOffset); + + + // RUNTIME_FUNCTION + + a.write32(offsetBegin); // BeginAddress + a.write32(offsetEnd); // EndAddress + a.write32(offsetEnd + 0xc); // UnwindData + + // UNWIND_INFO + + a.write8( + 0x1 | // Version : 3 + 0x10 // Flags : 5 + ); + a.write8(prologSize); // SizeOfProlog + a.write8(hasConvention ? 2 : 1); // CountOfUnwindCodes + a.write8( + 0x0 | // FrameRegister : 4 + 0x0 // FrameOffset : 4 + ); + // UNWIND_CODE[] + + a.write8(pushOffset); // CodeOffset + a.write8( + 0x50 | // UnwindOp : 4 + 0x0 // OpInfo : 4 + ); + + if (hasConvention) { + auto padded = getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset + a.write8( + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + } + } + +#endif + + // auto codeSize = a.m_buffer.size(); + // auto areaSize = (codeSize + (0x20 - codeSize) % 0x20); TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_trampoline, a.m_buffer.data(), a.m_buffer.size())); - return Ok(); + return Ok(FunctionData{m_trampoline, codeSize}); } Result<> X64HandlerGenerator::relocateBranchInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress, int64_t targetAddress) { diff --git a/src/generator/X64Generator.hpp b/src/generator/X64Generator.hpp index 2b0876e..71ea97d 100644 --- a/src/generator/X64Generator.hpp +++ b/src/generator/X64Generator.hpp @@ -3,6 +3,8 @@ #include "X86Generator.hpp" #include +#include +#include namespace tulip::hook { class X64Assembler; @@ -11,10 +13,12 @@ namespace tulip::hook { public: using X86HandlerGenerator::X86HandlerGenerator; - std::vector handlerBytes(uint64_t address) override; + // std::vector handlerBytes(uint64_t address) override; std::vector intervenerBytes(uint64_t address) override; - Result<> generateTrampoline(uint64_t target) override; + Result generateHandler() override; + + Result generateTrampoline(uint64_t target) override; Result<> relocateRIPInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress, int64_t disp) override; Result<> relocateBranchInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress, int64_t targetAddress) override; @@ -31,7 +35,9 @@ namespace tulip::hook { public: using X86WrapperGenerator::X86WrapperGenerator; - std::vector wrapperBytes(uint64_t address) override; + // std::vector wrapperBytes(uint64_t address) override; + + Result generateWrapper() override; // std::vector reverseWrapperBytes(uint64_t address) override; }; } diff --git a/src/generator/X86Generator.cpp b/src/generator/X86Generator.cpp index 661ec82..7ab4a13 100644 --- a/src/generator/X86Generator.cpp +++ b/src/generator/X86Generator.cpp @@ -132,7 +132,7 @@ std::vector X86WrapperGenerator::wrapperBytes(uint64_t address) { // return std::move(a.m_buffer); // } -Result X86WrapperGenerator::generateWrapper() { +Result X86WrapperGenerator::generateWrapper() { if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { return Ok(m_address); } @@ -146,7 +146,7 @@ Result X86WrapperGenerator::generateWrapper() { TULIP_HOOK_UNWRAP(Target::get().writeMemory(area, code.data(), codeSize)); - return Ok(area); + return Ok(FunctionData{area, codeSize}); } // Result X86WrapperGenerator::generateReverseWrapper() { @@ -166,7 +166,7 @@ Result X86WrapperGenerator::generateWrapper() { // return Ok(area); // } -Result<> X86HandlerGenerator::generateTrampoline(uint64_t target) { +Result X86HandlerGenerator::generateTrampoline(uint64_t target) { X86Assembler a(reinterpret_cast(m_trampoline)); RegMem32 m; using enum X86Register; @@ -191,7 +191,7 @@ Result<> X86HandlerGenerator::generateTrampoline(uint64_t target) { TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_trampoline, a.m_buffer.data(), a.m_buffer.size())); - return Ok(); + return Ok(FunctionData{m_trampoline, codeSize}); } Result X86HandlerGenerator::relocatedBytes(uint64_t baseAddress, uint64_t target) { diff --git a/src/generator/X86Generator.hpp b/src/generator/X86Generator.hpp index 615abf9..1a7a085 100644 --- a/src/generator/X86Generator.hpp +++ b/src/generator/X86Generator.hpp @@ -16,7 +16,7 @@ namespace tulip::hook { std::vector handlerBytes(uint64_t address) override; std::vector intervenerBytes(uint64_t address) override; - Result<> generateTrampoline(uint64_t target) override; + Result generateTrampoline(uint64_t target) override; virtual Result<> relocateInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress); virtual Result<> relocateRIPInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress, int64_t disp); @@ -27,7 +27,7 @@ namespace tulip::hook { public: using WrapperGenerator::WrapperGenerator; - Result generateWrapper() override; + Result generateWrapper() override; // Result generateReverseWrapper() override; std::vector wrapperBytes(uint64_t address) override; diff --git a/src/target/Windows64Target.cpp b/src/target/Windows64Target.cpp index fd9a9ae..d5e17b9 100644 --- a/src/target/Windows64Target.cpp +++ b/src/target/Windows64Target.cpp @@ -7,11 +7,65 @@ using namespace tulip::hook; #if defined(TULIP_HOOK_WINDOWS) && defined(TULIP_HOOK_X64) +#include +#include "../Pool.hpp" +#include "../Handler.hpp" +#include "../Wrapper.hpp" + Target& Target::get() { static Windows64Target ret; return ret; } +Result<> Windows64Target::allocatePage() { + m_allocatedPage = VirtualAlloc(nullptr, 0x10000, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READ); + + if (!m_allocatedPage) { + return Err("Unable to allocate memory: " + std::to_string(GetLastError())); + } + + m_currentOffset = 0; + m_remainingOffset = 0x10000; + + RtlInstallFunctionTableCallback( + reinterpret_cast(m_allocatedPage) | 0x3, + reinterpret_cast(m_allocatedPage), + 0x10000, + +[](DWORD64 controlPc, PVOID context) -> PRUNTIME_FUNCTION { + for (auto& [handle, handler] : Pool::get().m_handlers) { + auto handlerBegin = reinterpret_cast(handler->m_handler); + auto handlerEnd = handlerBegin + handler->m_handlerSize; + + auto tramplineBegin = reinterpret_cast(handler->m_trampoline); + auto tramplineEnd = tramplineBegin + handler->m_trampolineSize; + + if (controlPc >= handlerBegin && controlPc < handlerEnd) { + return reinterpret_cast(handlerEnd); + } + + if (controlPc >= tramplineBegin && controlPc < tramplineEnd) { + return reinterpret_cast(tramplineEnd); + } + } + + for (auto& [handle, wrapper] : Wrapper::get().m_wrappers) { + auto wrapperBegin = reinterpret_cast(wrapper.m_address); + auto wrapperEnd = wrapperBegin + wrapper.m_size; + + if (controlPc >= wrapperBegin && controlPc < wrapperEnd) { + return reinterpret_cast(wrapperEnd); + } + } + + return nullptr; + }, + nullptr, + nullptr + ); + + return Ok(); +} + Result Windows64Target::openCapstone() { cs_err status; diff --git a/src/target/Windows64Target.hpp b/src/target/Windows64Target.hpp index 3276b7e..ca52e69 100644 --- a/src/target/Windows64Target.hpp +++ b/src/target/Windows64Target.hpp @@ -14,6 +14,7 @@ namespace tulip::hook { Result openCapstone() override; + Result<> allocatePage() override; std::unique_ptr getHandlerGenerator( void* address, void* trampoline, void* handler, void* content, HandlerMetadata const& metadata ) override; From 9d07ec8e84a7042c3d69b783ac3d5dd74c84364c Mon Sep 17 00:00:00 2001 From: altalk23 <45172705+altalk23@users.noreply.github.com> Date: Tue, 18 Jun 2024 00:59:41 +0300 Subject: [PATCH 2/7] test and fix until the crashlog appears --- src/assembler/X64Assembler.cpp | 7 ++ src/assembler/X64Assembler.hpp | 2 + src/assembler/X86Assembler.cpp | 26 +++- src/convention/Windows64Convention.cpp | 2 +- src/generator/X64Generator.cpp | 163 ++++++++++++------------- src/target/Windows64Target.cpp | 10 ++ 6 files changed, 121 insertions(+), 89 deletions(-) diff --git a/src/assembler/X64Assembler.cpp b/src/assembler/X64Assembler.cpp index 9b62c6c..03f9f19 100644 --- a/src/assembler/X64Assembler.cpp +++ b/src/assembler/X64Assembler.cpp @@ -203,4 +203,11 @@ void X64Assembler::shl(X64Register reg, uint8_t value) { void X64Assembler::xchg(X64Register reg, X64Register reg2) { rex(this, reg, reg2, true); X86Assembler::xchg(x86reg(reg), x86reg(reg2)); +} + +void X64Assembler::align16() { + auto align = 16 - (this->currentAddress() % 16); + for (auto i = 0; i < align; i++) { + this->write8(0x90); + } } \ No newline at end of file diff --git a/src/assembler/X64Assembler.hpp b/src/assembler/X64Assembler.hpp index 3a369c3..853cd0f 100644 --- a/src/assembler/X64Assembler.hpp +++ b/src/assembler/X64Assembler.hpp @@ -105,5 +105,7 @@ namespace tulip::hook { void shl(X64Register reg, uint8_t value); void xchg(X64Register reg, X64Register reg2); + + void align16(); }; } \ No newline at end of file diff --git a/src/assembler/X86Assembler.cpp b/src/assembler/X86Assembler.cpp index 6919898..bf5bec9 100644 --- a/src/assembler/X86Assembler.cpp +++ b/src/assembler/X86Assembler.cpp @@ -83,15 +83,29 @@ void X86Assembler::encodeModRM(X86Operand op, uint8_t digit) { } void X86Assembler::add(X86Register reg, int32_t value) { - this->write8(0x81); - this->write8(0xC0 | regIdx(reg)); - this->write32(value); + if (value > -0x80 && value < 0x7f) { + this->write8(0x83); + this->write8(0xC0 | regIdx(reg)); + this->write8(value); + } + else { + this->write8(0x81); + this->write8(0xC0 | regIdx(reg)); + this->write32(value); + } } void X86Assembler::sub(X86Register reg, int32_t value) { - this->write8(0x81); - this->write8(0xE8 | regIdx(reg)); - this->write32(value); + if (value > -0x80 && value < 0x7f) { + this->write8(0x83); + this->write8(0xE8 | regIdx(reg)); + this->write8(value); + } + else { + this->write8(0x81); + this->write8(0xE8 | regIdx(reg)); + this->write32(value); + } } void X86Assembler::push(X86Register reg) { diff --git a/src/convention/Windows64Convention.cpp b/src/convention/Windows64Convention.cpp index 83ea916..017ab91 100644 --- a/src/convention/Windows64Convention.cpp +++ b/src/convention/Windows64Convention.cpp @@ -48,8 +48,8 @@ void Windows64Convention::generateIntoDefault(BaseAssembler& a_, AbstractFunctio size_t stackParamSize = getStackParamSize(function); auto const paddedSize = (stackParamSize % 16) ? stackParamSize + 8 : stackParamSize; // + 0x20 for the shadow space before the first arg - a.label("convention-alloc-small"); a.sub(RSP, paddedSize + 0x20); + a.label("convention-alloc-small"); if (stackParamSize > 0) { // theres stack args, so we need to copy them over diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index 79c2efe..7b9d9ae 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -25,19 +25,19 @@ size_t X64HandlerGenerator::preserveRegisters(X64Assembler& a) { RegMem64 m; #ifdef TULIP_HOOK_WINDOWS constexpr auto PRESERVE_SIZE = 0x80; - a.sub(RSP, PRESERVE_SIZE); - - a.mov(m[RSP + 0x58], R9); - a.mov(m[RSP + 0x50], R8); - a.mov(m[RSP + 0x48], RDX); - a.mov(m[RSP + 0x40], RCX); - a.movaps(m[RSP + 0x30], XMM3); - a.movaps(m[RSP + 0x20], XMM2); - a.movaps(m[RSP + 0x10], XMM1); - a.movaps(m[RSP + 0x00], XMM0); + // a.sub(RSP, PRESERVE_SIZE); + + a.mov(m[RSP + 0x78], R9); + a.mov(m[RSP + 0x70], R8); + a.mov(m[RSP + 0x68], RDX); + a.mov(m[RSP + 0x60], RCX); + a.movaps(m[RSP + 0x50], XMM3); + a.movaps(m[RSP + 0x40], XMM2); + a.movaps(m[RSP + 0x30], XMM1); + a.movaps(m[RSP + 0x20], XMM0); #else constexpr auto PRESERVE_SIZE = 0xc0; - a.sub(RSP, PRESERVE_SIZE); + // a.sub(RSP, PRESERVE_SIZE); a.mov(m[RSP + 0xa8], R9); a.mov(m[RSP + 0xa0], R8); @@ -60,16 +60,16 @@ void X64HandlerGenerator::restoreRegisters(X64Assembler& a, size_t size) { using enum X64Register; RegMem64 m; #ifdef TULIP_HOOK_WINDOWS - a.movaps(XMM0, m[RSP + 0x00]); - a.movaps(XMM1, m[RSP + 0x10]); - a.movaps(XMM2, m[RSP + 0x20]); - a.movaps(XMM3, m[RSP + 0x30]); - a.mov(RCX, m[RSP + 0x40]); - a.mov(RDX, m[RSP + 0x48]); - a.mov(R8, m[RSP + 0x50]); - a.mov(R9, m[RSP + 0x58]); - - a.add(RSP, size); + a.movaps(XMM0, m[RSP + 0x20]); + a.movaps(XMM1, m[RSP + 0x30]); + a.movaps(XMM2, m[RSP + 0x40]); + a.movaps(XMM3, m[RSP + 0x50]); + a.mov(RCX, m[RSP + 0x60]); + a.mov(RDX, m[RSP + 0x68]); + a.mov(R8, m[RSP + 0x70]); + a.mov(R9, m[RSP + 0x78]); + + // a.add(RSP, size); #else a.movaps(XMM0, m[RSP + 0x00]); a.movaps(XMM1, m[RSP + 0x10]); @@ -86,7 +86,7 @@ void X64HandlerGenerator::restoreRegisters(X64Assembler& a, size_t size) { a.mov(R8, m[RSP + 0xa0]); a.mov(R9, m[RSP + 0xa8]); - a.add(RSP, size); + // a.add(RSP, size); #endif } @@ -95,13 +95,13 @@ size_t X64HandlerGenerator::preserveReturnRegisters(X64Assembler& a) { RegMem64 m; #ifdef TULIP_HOOK_WINDOWS constexpr auto PRESERVE_SIZE = 0x40; - a.sub(RSP, PRESERVE_SIZE); + // a.sub(RSP, PRESERVE_SIZE); - a.movaps(m[RSP + 0x00], XMM0); - a.mov(RSP + 0x10, RAX); + a.movaps(m[RSP + 0x20], XMM0); + a.mov(RSP + 0x30, RAX); #else constexpr auto PRESERVE_SIZE = 0x40; - a.sub(RSP, PRESERVE_SIZE); + // a.sub(RSP, PRESERVE_SIZE); a.movaps(m[RSP + 0x00], XMM0); a.movaps(m[RSP + 0x10], XMM1); @@ -114,17 +114,17 @@ void X64HandlerGenerator::restoreReturnRegisters(X64Assembler& a, size_t size) { using enum X64Register; RegMem64 m; #ifdef TULIP_HOOK_WINDOWS - a.mov(RAX, m[RSP + 0x10]); - a.movaps(XMM0, m[RSP + 0x00]); + a.mov(RAX, m[RSP + 0x30]); + a.movaps(XMM0, m[RSP + 0x20]); - a.add(RSP, size); + // a.add(RSP, size); #else a.mov(RDX, m[RSP + 0x28]); a.mov(RAX, m[RSP + 0x20]); a.movaps(XMM1, m[RSP + 0x10]); a.movaps(XMM0, m[RSP + 0x00]); - a.add(RSP, size); + // a.add(RSP, size); #endif } @@ -168,12 +168,12 @@ Result X64HandlerGenerator::generateHandler() { a.nop(); } - a.label("handler-push"); a.push(RBP); + a.label("handler-push"); a.mov(RBP, RSP); // shadow space - a.label("handler-alloc-small"); - a.sub(RSP, 0x40); + a.sub(RSP, 0xc0); + a.label("handler-alloc-mid"); // preserve registers const auto preservedSize = preserveRegisters(a); @@ -211,7 +211,7 @@ Result X64HandlerGenerator::generateHandler() { restoreReturnRegisters(a, returnPreservedSize); // done! - a.add(RSP, 0x40); + a.add(RSP, 0xc0); a.pop(RBP); a.ret(); @@ -226,6 +226,8 @@ Result X64HandlerGenerator::generateHandler() { a.updateLabels(); + a.align16(); + auto codeSize = a.m_buffer.size(); #ifdef TULIP_HOOK_WINDOWS @@ -237,10 +239,9 @@ Result X64HandlerGenerator::generateHandler() { auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; auto const pushOffset = reinterpret_cast(a.getLabel("handler-push")) & 0xffff; - auto const allocOffset = reinterpret_cast(a.getLabel("handler-alloc-small")) & 0xffff; + auto const allocOffset = reinterpret_cast(a.getLabel("handler-alloc-mid")) & 0xffff; auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const hasConvention = conventionOffset != 0; - auto const prologSize = static_cast(hasConvention ? conventionOffset : allocOffset); + auto const prologSize = conventionOffset; // RUNTIME_FUNCTION @@ -253,36 +254,35 @@ Result X64HandlerGenerator::generateHandler() { a.write8( 0x1 | // Version : 3 - 0x10 // Flags : 5 + 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(hasConvention ? 3 : 2); // CountOfUnwindCodes + a.write8(4); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] - a.write8(pushOffset); // CodeOffset + auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset a.write8( - 0x50 | // UnwindOp : 4 - 0x0 // OpInfo : 4 + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 ); a.write8(allocOffset); // CodeOffset a.write8( - (((0x40 >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 + 0x0 | // UnwindOp : 4 + 0x1 // OpInfo : 4 ); + a.write16(0xc0 >> 3); // UWOP_ALLOC_LARGE continuation - if (hasConvention) { - auto padded = getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset - a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 - ); - } + a.write8(pushOffset); // CodeOffset + a.write8( + 0x50 | // UnwindOp : 4 + 0x0 // OpInfo : 4 + ); } #endif @@ -472,8 +472,8 @@ Result X64WrapperGenerator::generateWrapper() { X64Assembler a(address); using enum X64Register; - a.label("wrapper-push"); a.push(RBP); + a.label("wrapper-push"); a.mov(RBP, RSP); m_metadata.m_convention->generateIntoOriginal(a, m_metadata.m_abstract); @@ -496,6 +496,8 @@ Result X64WrapperGenerator::generateWrapper() { a.updateLabels(); + a.align16(); + auto codeSize2 = a.m_buffer.size(); #ifdef TULIP_HOOK_WINDOWS @@ -506,8 +508,7 @@ Result X64WrapperGenerator::generateWrapper() { auto const pushOffset = reinterpret_cast(a.getLabel("wrapper-push")) & 0xffff; auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const hasConvention = conventionOffset != 0; - auto const prologSize = static_cast(hasConvention ? conventionOffset : pushOffset); + auto const prologSize = conventionOffset; // RUNTIME_FUNCTION @@ -520,30 +521,29 @@ Result X64WrapperGenerator::generateWrapper() { a.write8( 0x1 | // Version : 3 - 0x10 // Flags : 5 + 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(hasConvention ? 2 : 1); // CountOfUnwindCodes + a.write8(2); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] + auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset + a.write8( + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + a.write8(pushOffset); // CodeOffset a.write8( 0x50 | // UnwindOp : 4 0x0 // OpInfo : 4 ); - if (hasConvention) { - auto padded = getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset - a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 - ); - } } #endif @@ -577,8 +577,8 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { using enum X64Register; if (m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { - a.label("trampoline-push"); a.push(RBP); + a.label("trampoline-push"); a.mov(RBP, RSP); m_metadata.m_convention->generateIntoOriginal(a, m_metadata.m_abstract); @@ -589,6 +589,8 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.ret(); } + auto codeSizeFake = a.m_buffer.size(); + a.label("relocated"); TULIP_HOOK_UNWRAP_INTO(auto code, this->relocatedBytes(a.currentAddress(), target)); @@ -609,20 +611,19 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.updateLabels(); - auto codeSize = a.m_buffer.size(); + a.align16(); #ifdef TULIP_HOOK_WINDOWS - { + if (m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { auto const address = reinterpret_cast(m_trampoline); auto const offsetBegin = address & 0xffff; auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; auto const pushOffset = reinterpret_cast(a.getLabel("trampoline-push")) & 0xffff; auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const hasConvention = conventionOffset != 0; - auto const prologSize = static_cast(hasConvention ? conventionOffset : pushOffset); + auto const prologSize = conventionOffset; // RUNTIME_FUNCTION @@ -635,30 +636,28 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.write8( 0x1 | // Version : 3 - 0x10 // Flags : 5 + 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(hasConvention ? 2 : 1); // CountOfUnwindCodes + a.write8(2); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] + auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); + a.write8(conventionOffset); // CodeOffset + a.write8( + (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + 0x2 // OpInfo : 4 + ); + a.write8(pushOffset); // CodeOffset a.write8( 0x50 | // UnwindOp : 4 0x0 // OpInfo : 4 ); - - if (hasConvention) { - auto padded = getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset - a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 - ); - } } #endif @@ -668,7 +667,7 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { TULIP_HOOK_UNWRAP(Target::get().writeMemory(m_trampoline, a.m_buffer.data(), a.m_buffer.size())); - return Ok(FunctionData{m_trampoline, codeSize}); + return Ok(FunctionData{m_trampoline, codeSizeFake}); } Result<> X64HandlerGenerator::relocateBranchInstruction(cs_insn* insn, uint8_t* buffer, uint64_t& trampolineAddress, uint64_t& originalAddress, int64_t targetAddress) { diff --git a/src/target/Windows64Target.cpp b/src/target/Windows64Target.cpp index d5e17b9..45f33d2 100644 --- a/src/target/Windows64Target.cpp +++ b/src/target/Windows64Target.cpp @@ -11,6 +11,7 @@ using namespace tulip::hook; #include "../Pool.hpp" #include "../Handler.hpp" #include "../Wrapper.hpp" +#include Target& Target::get() { static Windows64Target ret; @@ -40,10 +41,16 @@ Result<> Windows64Target::allocatePage() { auto tramplineEnd = tramplineBegin + handler->m_trampolineSize; if (controlPc >= handlerBegin && controlPc < handlerEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << controlPc << " Handler Begin: " << handlerBegin << " Handler End: " << handlerEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); return reinterpret_cast(handlerEnd); } if (controlPc >= tramplineBegin && controlPc < tramplineEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << controlPc << " Trampline Begin: " << tramplineBegin << " Trampline End: " << tramplineEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); return reinterpret_cast(tramplineEnd); } } @@ -53,6 +60,9 @@ Result<> Windows64Target::allocatePage() { auto wrapperEnd = wrapperBegin + wrapper.m_size; if (controlPc >= wrapperBegin && controlPc < wrapperEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << controlPc << " Wrapper Begin: " << wrapperBegin << " Wrapper End: " << wrapperEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); return reinterpret_cast(wrapperEnd); } } From 6a86cd5c38936a12a73ebf9295c0a43bfec5918f Mon Sep 17 00:00:00 2001 From: altalk23 <45172705+altalk23@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:17:02 +0300 Subject: [PATCH 3/7] yeah someone has to cleanup this shit --- src/assembler/X86Assembler.cpp | 4 +- src/convention/Windows64Convention.cpp | 8 +-- src/generator/X64Generator.cpp | 67 +++++++++++++++----------- src/target/Windows64Target.cpp | 38 +++++++++++++++ 4 files changed, 84 insertions(+), 33 deletions(-) diff --git a/src/assembler/X86Assembler.cpp b/src/assembler/X86Assembler.cpp index bf5bec9..5a39223 100644 --- a/src/assembler/X86Assembler.cpp +++ b/src/assembler/X86Assembler.cpp @@ -83,7 +83,7 @@ void X86Assembler::encodeModRM(X86Operand op, uint8_t digit) { } void X86Assembler::add(X86Register reg, int32_t value) { - if (value > -0x80 && value < 0x7f) { + if (value >= -0x80 && value <= 0x7f) { this->write8(0x83); this->write8(0xC0 | regIdx(reg)); this->write8(value); @@ -96,7 +96,7 @@ void X86Assembler::add(X86Register reg, int32_t value) { } void X86Assembler::sub(X86Register reg, int32_t value) { - if (value > -0x80 && value < 0x7f) { + if (value >= -0x80 && value <= 0x7f) { this->write8(0x83); this->write8(0xE8 | regIdx(reg)); this->write8(value); diff --git a/src/convention/Windows64Convention.cpp b/src/convention/Windows64Convention.cpp index 017ab91..153a5cc 100644 --- a/src/convention/Windows64Convention.cpp +++ b/src/convention/Windows64Convention.cpp @@ -36,8 +36,8 @@ void Windows64Convention::generateDefaultCleanup(BaseAssembler& a_, AbstractFunc auto& a = static_cast(a_); using enum X64Register; - size_t paddedSize = getPaddedStackParamSize(function); - a.add(RSP, paddedSize + 0x20); + // size_t paddedSize = getPaddedStackParamSize(function); + // a.add(RSP, paddedSize + 0x20); } void Windows64Convention::generateIntoDefault(BaseAssembler& a_, AbstractFunction const& function) { @@ -48,8 +48,8 @@ void Windows64Convention::generateIntoDefault(BaseAssembler& a_, AbstractFunctio size_t stackParamSize = getStackParamSize(function); auto const paddedSize = (stackParamSize % 16) ? stackParamSize + 8 : stackParamSize; // + 0x20 for the shadow space before the first arg - a.sub(RSP, paddedSize + 0x20); - a.label("convention-alloc-small"); + // a.sub(RSP, paddedSize + 0x20); + // a.label("convention-alloc-small"); if (stackParamSize > 0) { // theres stack args, so we need to copy them over diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index 7b9d9ae..f08a7d6 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -164,9 +164,9 @@ Result X64HandlerGenerator::generateHandler() { constexpr auto FIRST_PARAM = RDI; #endif - for (size_t i = 0; i < 8; ++i) { - a.nop(); - } + // for (size_t i = 0; i < 8; ++i) { + // a.nop(); + // } a.push(RBP); a.label("handler-push"); @@ -240,8 +240,8 @@ Result X64HandlerGenerator::generateHandler() { auto const pushOffset = reinterpret_cast(a.getLabel("handler-push")) & 0xffff; auto const allocOffset = reinterpret_cast(a.getLabel("handler-alloc-mid")) & 0xffff; - auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const prologSize = conventionOffset; + // auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const prologSize = allocOffset; // RUNTIME_FUNCTION @@ -257,19 +257,19 @@ Result X64HandlerGenerator::generateHandler() { 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(4); // CountOfUnwindCodes + a.write8(3); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] - auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset - a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 - ); + // auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); + // a.write8(conventionOffset); // CodeOffset + // a.write8( + // (((padded >> 3) - 1) << 4) | // UnwindOp : 4 + // 0x2 // OpInfo : 4 + // ); a.write8(allocOffset); // CodeOffset a.write8( @@ -476,6 +476,10 @@ Result X64WrapperGenerator::generateWrapper() { a.label("wrapper-push"); a.mov(RBP, RSP); + // shadow space + a.sub(RSP, 0xc0); + a.label("wrapper-alloc-mid"); + m_metadata.m_convention->generateIntoOriginal(a, m_metadata.m_abstract); auto difference = a.currentAddress() - reinterpret_cast(m_address) + 5; @@ -507,8 +511,9 @@ Result X64WrapperGenerator::generateWrapper() { auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; auto const pushOffset = reinterpret_cast(a.getLabel("wrapper-push")) & 0xffff; - auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const prologSize = conventionOffset; + auto const allocOffset = reinterpret_cast(a.getLabel("wrapper-alloc-mid")) & 0xffff; + // auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const prologSize = allocOffset; // RUNTIME_FUNCTION @@ -524,26 +529,25 @@ Result X64WrapperGenerator::generateWrapper() { 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(2); // CountOfUnwindCodes + a.write8(3); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] - auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset + a.write8(allocOffset); // CodeOffset a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 + 0x0 | // UnwindOp : 4 + 0x1 // OpInfo : 4 ); + a.write16(0xc0 >> 3); // UWOP_ALLOC_LARGE continuation a.write8(pushOffset); // CodeOffset a.write8( 0x50 | // UnwindOp : 4 0x0 // OpInfo : 4 ); - } #endif @@ -580,11 +584,19 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.push(RBP); a.label("trampoline-push"); a.mov(RBP, RSP); + + // shadow space + a.sub(RSP, 0xc0); + a.label("trampoline-alloc-mid"); + m_metadata.m_convention->generateIntoOriginal(a, m_metadata.m_abstract); a.call("relocated"); m_metadata.m_convention->generateOriginalCleanup(a, m_metadata.m_abstract); + + a.add(RSP, 0xc0); + a.pop(RBP); a.ret(); } @@ -622,8 +634,9 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { auto const offsetEnd = (address + a.m_buffer.size()) & 0xffff; auto const pushOffset = reinterpret_cast(a.getLabel("trampoline-push")) & 0xffff; - auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; - auto const prologSize = conventionOffset; + auto const allocOffset = reinterpret_cast(a.getLabel("trampoline-alloc-mid")) & 0xffff; + // auto const conventionOffset = reinterpret_cast(a.getLabel("convention-alloc-small")) & 0xffff; + auto const prologSize = allocOffset; // RUNTIME_FUNCTION @@ -639,19 +652,19 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { 0x0 // Flags : 5 ); a.write8(prologSize); // SizeOfProlog - a.write8(2); // CountOfUnwindCodes + a.write8(3); // CountOfUnwindCodes a.write8( 0x0 | // FrameRegister : 4 0x0 // FrameOffset : 4 ); // UNWIND_CODE[] - auto padded = 0x20 + getPaddedStackParamSize(m_metadata.m_abstract); - a.write8(conventionOffset); // CodeOffset + a.write8(allocOffset); // CodeOffset a.write8( - (((padded >> 3) - 1) << 4) | // UnwindOp : 4 - 0x2 // OpInfo : 4 + 0x0 | // UnwindOp : 4 + 0x1 // OpInfo : 4 ); + a.write16(0xc0 >> 3); // UWOP_ALLOC_LARGE continuation a.write8(pushOffset); // CodeOffset a.write8( diff --git a/src/target/Windows64Target.cpp b/src/target/Windows64Target.cpp index 45f33d2..ed97a73 100644 --- a/src/target/Windows64Target.cpp +++ b/src/target/Windows64Target.cpp @@ -18,6 +18,44 @@ Target& Target::get() { return ret; } +PVOID CustomFunctionTableAccess64(HANDLE hProcess, DWORD64 AddrBase) { + for (auto& [handle, handler] : Pool::get().m_handlers) { + auto handlerBegin = reinterpret_cast(handler->m_handler); + auto handlerEnd = handlerBegin + handler->m_handlerSize; + + auto tramplineBegin = reinterpret_cast(handler->m_trampoline); + auto tramplineEnd = tramplineBegin + handler->m_trampolineSize; + + if (AddrBase >= handlerBegin && AddrBase < handlerEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << AddrBase << " Handler Begin: " << handlerBegin << " Handler End: " << handlerEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); + return reinterpret_cast(handlerEnd); + } + + if (AddrBase >= tramplineBegin && AddrBase < tramplineEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << AddrBase << " Trampline Begin: " << tramplineBegin << " Trampline End: " << tramplineEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); + return reinterpret_cast(tramplineEnd); + } + } + + for (auto& [handle, wrapper] : Wrapper::get().m_wrappers) { + auto wrapperBegin = reinterpret_cast(wrapper.m_address); + auto wrapperEnd = wrapperBegin + wrapper.m_size; + + if (AddrBase >= wrapperBegin && AddrBase < wrapperEnd) { + // std::stringstream ss; + // ss << "Control PC: " << std::hex << AddrBase << " Wrapper Begin: " << wrapperBegin << " Wrapper End: " << wrapperEnd; + // MessageBoxA(nullptr, ss.str().c_str(), "Error Loading Geode", MB_ICONERROR); + return reinterpret_cast(wrapperEnd); + } + } + + return nullptr; +} + Result<> Windows64Target::allocatePage() { m_allocatedPage = VirtualAlloc(nullptr, 0x10000, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READ); From f4ed8ee4d8a99087908799eca71425bf45a599f0 Mon Sep 17 00:00:00 2001 From: altalk23 <45172705+altalk23@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:27:28 +0300 Subject: [PATCH 4/7] fix include --- include/tulip/FunctionData.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/tulip/FunctionData.hpp b/include/tulip/FunctionData.hpp index cd09e7d..d8b9c0f 100644 --- a/include/tulip/FunctionData.hpp +++ b/include/tulip/FunctionData.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace tulip::hook { struct FunctionData { From aa66657cf0ce93fe1909f972b7a62e1cc2981bfd Mon Sep 17 00:00:00 2001 From: altalk23 <45172705+altalk23@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:29:55 +0300 Subject: [PATCH 5/7] fix oks --- src/generator/X64Generator.cpp | 2 +- src/generator/X86Generator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index f08a7d6..51be19a 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -459,7 +459,7 @@ Result<> X64HandlerGenerator::relocateRIPInstruction(cs_insn* insn, uint8_t* buf Result X64WrapperGenerator::generateWrapper() { if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { - return Ok(m_address); + return Ok(FunctionData{m_address, 0}); } // this is silly, butt diff --git a/src/generator/X86Generator.cpp b/src/generator/X86Generator.cpp index 7ab4a13..b92ff43 100644 --- a/src/generator/X86Generator.cpp +++ b/src/generator/X86Generator.cpp @@ -134,7 +134,7 @@ std::vector X86WrapperGenerator::wrapperBytes(uint64_t address) { Result X86WrapperGenerator::generateWrapper() { if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { - return Ok(m_address); + return Ok(FunctionData{m_address, 0}); } // this is silly, butt From e15aaee993af7e13b0b9cc1edb0a0d8a23128f99 Mon Sep 17 00:00:00 2001 From: dankmeme01 <42031238+dankmeme01@users.noreply.github.com> Date: Tue, 18 Jun 2024 20:26:33 +0200 Subject: [PATCH 6/7] silly alk forgot to add back the shadow space --- src/generator/X64Generator.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index 51be19a..3a3ee17 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -255,7 +255,7 @@ Result X64HandlerGenerator::generateHandler() { a.write8( 0x1 | // Version : 3 0x0 // Flags : 5 - ); + ); a.write8(prologSize); // SizeOfProlog a.write8(3); // CountOfUnwindCodes a.write8( @@ -461,14 +461,14 @@ Result X64WrapperGenerator::generateWrapper() { if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { return Ok(FunctionData{m_address, 0}); } - + // this is silly, butt auto codeSize = this->wrapperBytes(0).size(); auto areaSize = (codeSize + (0x20 - codeSize) % 0x20); TULIP_HOOK_UNWRAP_INTO(auto area, Target::get().allocateArea(areaSize)); auto address = reinterpret_cast(area); - + X64Assembler a(address); using enum X64Register; @@ -492,6 +492,8 @@ Result X64WrapperGenerator::generateWrapper() { m_metadata.m_convention->generateOriginalCleanup(a, m_metadata.m_abstract); + a.add(RSP, 0xc0); + a.pop(RBP); a.ret(); @@ -527,7 +529,7 @@ Result X64WrapperGenerator::generateWrapper() { a.write8( 0x1 | // Version : 3 0x0 // Flags : 5 - ); + ); a.write8(prologSize); // SizeOfProlog a.write8(3); // CountOfUnwindCodes a.write8( @@ -625,7 +627,7 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.align16(); - + #ifdef TULIP_HOOK_WINDOWS if (m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { @@ -650,7 +652,7 @@ Result X64HandlerGenerator::generateTrampoline(uint64_t target) { a.write8( 0x1 | // Version : 3 0x0 // Flags : 5 - ); + ); a.write8(prologSize); // SizeOfProlog a.write8(3); // CountOfUnwindCodes a.write8( From d869c7edd15d69341dc5d1c635afc665090b197b Mon Sep 17 00:00:00 2001 From: dankmeme01 <42031238+dankmeme01@users.noreply.github.com> Date: Wed, 19 Jun 2024 18:26:41 +0200 Subject: [PATCH 7/7] attempt to fix wrappers overlapping --- src/generator/X64Generator.cpp | 54 ++++++++++++++++++++++++---------- src/generator/X64Generator.hpp | 6 +++- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/generator/X64Generator.cpp b/src/generator/X64Generator.cpp index 3a3ee17..bf61494 100644 --- a/src/generator/X64Generator.cpp +++ b/src/generator/X64Generator.cpp @@ -457,18 +457,7 @@ Result<> X64HandlerGenerator::relocateRIPInstruction(cs_insn* insn, uint8_t* buf return X86HandlerGenerator::relocateRIPInstruction(insn, buffer, trampolineAddress, originalAddress, disp); } -Result X64WrapperGenerator::generateWrapper() { - if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { - return Ok(FunctionData{m_address, 0}); - } - - // this is silly, butt - auto codeSize = this->wrapperBytes(0).size(); - auto areaSize = (codeSize + (0x20 - codeSize) % 0x20); - - TULIP_HOOK_UNWRAP_INTO(auto area, Target::get().allocateArea(areaSize)); - auto address = reinterpret_cast(area); - +std::vector X64WrapperGenerator::wrapperBytes(uint64_t address) { X64Assembler a(address); using enum X64Register; @@ -504,9 +493,12 @@ Result X64WrapperGenerator::generateWrapper() { a.align16(); - auto codeSize2 = a.m_buffer.size(); + return std::move(a.m_buffer); +} #ifdef TULIP_HOOK_WINDOWS +std::vector X64WrapperGenerator::unwindInfoBytes(uint64_t address) { + X64Assembler a(address); { auto const offsetBegin = address & 0xffff; @@ -520,6 +512,8 @@ Result X64WrapperGenerator::generateWrapper() { // RUNTIME_FUNCTION + a.label("wrapper-unwind-info"); + a.write32(offsetBegin); // BeginAddress a.write32(offsetEnd); // EndAddress a.write32(offsetEnd + 0xc); // UnwindData @@ -552,11 +546,41 @@ Result X64WrapperGenerator::generateWrapper() { ); } + return std::move(a.m_buffer); +} +#endif + +Result X64WrapperGenerator::generateWrapper() { + if (!m_metadata.m_convention->needsWrapper(m_metadata.m_abstract)) { + return Ok(FunctionData{m_address, 0}); + } + + // this is silly, butt + auto codeSize = this->wrapperBytes(0).size(); + +#ifdef TULIP_HOOK_WINDOWS + auto unwindInfoSize = this->unwindInfoBytes(0).size(); + auto totalSize = codeSize + unwindInfoSize; +#else + auto totalSize = codeSize; +#endif + + auto areaSize = (totalSize + (0x20 - totalSize) % 0x20); + + TULIP_HOOK_UNWRAP_INTO(auto area, Target::get().allocateArea(areaSize)); + auto address = reinterpret_cast(area); + + auto code = this->wrapperBytes(address); + codeSize = code.size(); + +#ifdef TULIP_HOOK_WINDOWS + auto unwindInfo = this->unwindInfoBytes(address + codeSize); + code.insert(code.end(), unwindInfo.begin(), unwindInfo.end()); #endif - TULIP_HOOK_UNWRAP(Target::get().writeMemory(area, a.m_buffer.data(), a.m_buffer.size())); + TULIP_HOOK_UNWRAP(Target::get().writeMemory(area, code.data(), code.size())); - return Ok(FunctionData{area, codeSize2}); + return Ok(FunctionData{area, codeSize}); } // std::vector X64WrapperGenerator::reverseWrapperBytes(uint64_t address) { diff --git a/src/generator/X64Generator.hpp b/src/generator/X64Generator.hpp index 71ea97d..ec8ee78 100644 --- a/src/generator/X64Generator.hpp +++ b/src/generator/X64Generator.hpp @@ -35,7 +35,11 @@ namespace tulip::hook { public: using X86WrapperGenerator::X86WrapperGenerator; - // std::vector wrapperBytes(uint64_t address) override; + std::vector wrapperBytes(uint64_t address) override; + +#ifdef TULIP_HOOK_WINDOWS + std::vector unwindInfoBytes(uint64_t address); +#endif Result generateWrapper() override; // std::vector reverseWrapperBytes(uint64_t address) override;