diff --git a/.github/workflows/build-matrix.yaml b/.github/workflows/build-matrix.yaml index 59601c577fe..e96f7766c06 100644 --- a/.github/workflows/build-matrix.yaml +++ b/.github/workflows/build-matrix.yaml @@ -52,11 +52,11 @@ jobs: cmakePreset: "Release-macos-clang" cachePrefix: "" - # Q4 2023 there will hopefully be native arm64 runners - # https://github.com/github/roadmap/issues/528 - # build_macos_arm: - # name: "🍎 MacOS" - # uses: ./.github/workflows/macos-build-arm.yaml - # with: - # cmakePreset: "Release-macos-clang" - # cachePrefix: "" + # There are ARM64 macOS runners, but they aren't free _yet_ + # limited to the large runners right now + build_macos_arm: + name: "🍎 MacOS" + uses: ./.github/workflows/macos-build-arm.yaml + with: + cmakePreset: "Release-macos-arm64-clang" + cachePrefix: "" diff --git a/.github/workflows/macos-build-arm.yaml b/.github/workflows/macos-build-arm.yaml index cc7b56ac29f..9a387559508 100644 --- a/.github/workflows/macos-build-arm.yaml +++ b/.github/workflows/macos-build-arm.yaml @@ -13,7 +13,7 @@ on: jobs: build: name: ARM - runs-on: macos-latest + runs-on: macos-12 timeout-minutes: 120 env: # overrides: https://github.com/mbitsnbites/buildcache/blob/master/doc/configuration.md @@ -27,11 +27,13 @@ jobs: - name: Checkout Repository uses: actions/checkout@v4 - - name: Set up ARM64 environment - run: sudo softwareupdate --install-rosetta --agree-to-license + # TODO - not relevant on an intel runner + # - name: Set up ARM64 environment + # run: sudo softwareupdate --install-rosetta --agree-to-license - name: Install Package Dependencies - run: arch -arm64 brew install cmake ninja + # TODO - arch -arm64 + run: brew install cmake ninja - name: Setup Buildcache uses: mikehardy/buildcache-action@v2.1.0 @@ -51,8 +53,9 @@ jobs: - name: Build Project run: cmake --build build --parallel $((`sysctl -n hw.logicalcpu`)) - - name: Run Tests - run: ./test.sh + # TODO - soon TM + # - name: Run Tests + # run: ./test.sh - name: Upload artifact uses: actions/upload-artifact@v3 diff --git a/.vscode/launch.json b/.vscode/launch.json index 9d77fdb9505..ab489aa64ea 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,22 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Run C++ Tests LLDB", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/goalc-test", + "args": [ + "--gtest_brief=0", + "--gtest_filter=*CodeTester*", + "--gtest_break_on_failure" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "lldb" + }, { "name": "Append File Docs", "type": "python", @@ -11,7 +27,9 @@ "program": "${workspaceFolder}/scripts/ci/lint-characters.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}", - "args": ["--fix"] + "args": [ + "--fix" + ] }, ] -} +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 791677828ca..f434926f55c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,7 +103,6 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") message(STATUS "AppleClang detected - Setting Defaults") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ - -march=native \ -Wall \ -Winit-self \ -ggdb \ @@ -121,6 +120,11 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") -fdiagnostics-color=always" ) + # TODO - make a proper flag for arm compiling + if(NOT CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + endif() + # additional c++ flags for release mode for our projects if(CMAKE_BUILD_TYPE MATCHES "Release") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") diff --git a/CMakePresets.json b/CMakePresets.json index 2e75ce28cbf..3b2a9940e56 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -154,6 +154,16 @@ "description": "Build with Clang as Release without Debug Symbols", "inherits": ["base-macos-release", "base-clang"] }, + { + "name": "Release-macos-arm64-clang", + "displayName": "MacOS ARM64 Release (clang)", + "description": "Build with Clang, cross compiled for ARM64, as Release without Debug Symbols", + "inherits": ["base-macos-release", "base-clang"], + "cacheVariables": { + "CMAKE_C_COMPILER_TARGET": "arm64-apple-darwin", + "CMAKE_CXX_COMPILER_TARGET": "arm64-apple-darwin" + } + }, { "name": "Release-macos-clang-static", "displayName": "MacOS Static Release (clang)", diff --git a/Taskfile.yml b/Taskfile.yml index ab9394ac423..8715cacde5f 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -92,6 +92,14 @@ tasks: ignore_error: true - cmd: npx prettier --write ./decompiler/config/jak2/**/*.jsonc ignore_error: true + gen-cmake: + desc: "Generate the CMake" + cmds: + - "cmake -B build --preset={{.CMAKE_PRESET}}" + build: + desc: "Build the project using the generated CMake" + cmds: + - "cmake --build build --parallel {{.CMAKE_NUM_THREADS}}" # DECOMPILING decomp: cmds: @@ -172,3 +180,6 @@ tasks: type-test: cmds: - cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*Jak2TypeConsistency*" --gtest_break_on_failure' + tests-filtered: + cmds: + - cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*{{.FILTER}}*" --gtest_break_on_failure' diff --git a/common/util/os.cpp b/common/util/os.cpp index 0b49c25d4bd..56c33582c8b 100644 --- a/common/util/os.cpp +++ b/common/util/os.cpp @@ -30,6 +30,7 @@ void __cpuidex(int result[4], int eax, int ecx) { : "0"(eax), "2"(ecx)); } #else +// TODO - implement ARM64 detection, check for NEON instead of AVX // for now, just return 0's. void __cpuidex(int result[4], int eax, int ecx) { lg::warn("cpuid not implemented on this platform"); diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 30e2778c363..cfb503270f9 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -10,13 +10,13 @@ else() message(STATUS "Non-ARM64 architecture detected") endif() -if(ARM64_ARCH) +# TODO - kinda a gross hack, should pass in an explicit flag later +if(ARM64_ARCH OR CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin") # Add your ARM64-specific configuration or build options here set(OG_ASM_FUNCS_FILE kernel/asm_funcs_arm64.s) enable_language(ASM) set(CMAKE_ASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_SOURCE_FILE_EXTENSIONS} s) - # set(CMAKE_ASM_COMPILE_OBJECT "${CMAKE_ASM_COMPILER} -o ") - set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-g") + set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-arch arm64 -g") else() set(OG_ASM_FUNCS_FILE kernel/asm_funcs_x86_64.asm) enable_language(ASM_NASM) diff --git a/game/graphics/opengl_renderer/background/Shrub.cpp b/game/graphics/opengl_renderer/background/Shrub.cpp index 623d5bb4f0c..e57bad33fc0 100644 --- a/game/graphics/opengl_renderer/background/Shrub.cpp +++ b/game/graphics/opengl_renderer/background/Shrub.cpp @@ -293,7 +293,7 @@ void Shrub::render_tree(int idx, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif tree.perf.tod_time.add(interp_timer.getSeconds()); diff --git a/game/graphics/opengl_renderer/background/TFragment.cpp b/game/graphics/opengl_renderer/background/TFragment.cpp index a4efc782869..b92b7478dbb 100644 --- a/game/graphics/opengl_renderer/background/TFragment.cpp +++ b/game/graphics/opengl_renderer/background/TFragment.cpp @@ -430,7 +430,7 @@ void TFragment::render_tree(int geom, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture); diff --git a/game/graphics/opengl_renderer/background/Tie3.cpp b/game/graphics/opengl_renderer/background/Tie3.cpp index 51dc5894518..35eca4bf014 100644 --- a/game/graphics/opengl_renderer/background/Tie3.cpp +++ b/game/graphics/opengl_renderer/background/Tie3.cpp @@ -434,7 +434,7 @@ void Tie3::setup_tree(int idx, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif glActiveTexture(GL_TEXTURE10); diff --git a/game/runtime.cpp b/game/runtime.cpp index 88190e77077..c81b787ec1a 100644 --- a/game/runtime.cpp +++ b/game/runtime.cpp @@ -145,6 +145,13 @@ void deci2_runner(SystemThreadInterface& iface) { void ee_runner(SystemThreadInterface& iface) { prof().root_event(); // Allocate Main RAM. Must have execute enabled. + // TODO Apple Silicone - You cannot make a page be RWX, + // or more specifically it can't be both writable and executable at the same time + // + // https://github.com/zherczeg/sljit/issues/99 + // + // The solution to this is to flip-flop between permissions, or perhaps have two threads + // one that has writing permission, and another with executable permission if (EE_MEM_LOW_MAP) { g_ee_main_mem = (u8*)mmap((void*)0x10000000, EE_MAIN_MEM_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, diff --git a/game/system/hid/input_bindings.h b/game/system/hid/input_bindings.h index 20650849e3f..b2207ab9590 100644 --- a/game/system/hid/input_bindings.h +++ b/game/system/hid/input_bindings.h @@ -336,7 +336,7 @@ extern const InputBindingGroups DEFAULT_MOUSE_BINDS; // So there are some potential solutions but this doesn't feel high priority and this was always an // issue. struct CommandBinding { - enum Source { CONTROLLER, KEYBOARD, MOUSE }; + enum class Source { CONTROLLER, KEYBOARD, MOUSE }; CommandBinding(const u32 _host_key, std::function _command) : host_key(_host_key), command(_command){}; diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index f2ae00b0959..fdcfd61d3ad 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -53,6 +53,8 @@ add_library(compiler data_compiler/DataObjectGenerator.cpp debugger/Debugger.cpp debugger/DebugInfo.cpp + emitter/IGenX86.cpp + emitter/IGenARM64.cpp listener/Listener.cpp listener/MemoryMap.cpp make/MakeSystem.cpp diff --git a/goalc/compiler/CodeGenerator.cpp b/goalc/compiler/CodeGenerator.cpp index a39eb250283..51d7db76fe9 100644 --- a/goalc/compiler/CodeGenerator.cpp +++ b/goalc/compiler/CodeGenerator.cpp @@ -88,7 +88,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { // count how many xmm's we have to backup int n_xmm_backups = 0; for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { n_xmm_backups++; } } @@ -105,7 +105,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { // back up xmms int i = 0; for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { int offset = i * XMM_SIZE; m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset), InstructionInfo::Kind::PROLOGUE); @@ -116,7 +116,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { } else { // back up xmms (currently not aligned) for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::Kind::PROLOGUE); m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg), @@ -183,12 +183,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { m_gen.add_instr(IGen::load64_gpr64_plus_s32( op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP), i_rec); - } else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { + } else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { // load xmm32 off of the stack m_gen.add_instr(IGen::load_reg_offset_xmm32( op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), i_rec); - } else if (op.reg.is_xmm() && + } else if (op.reg.is_128bit_simd() && (op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { m_gen.add_instr(IGen::load128_xmm128_reg_offset( op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), @@ -210,12 +210,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { m_gen.add_instr(IGen::store64_gpr64_plus_s32( RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg), i_rec); - } else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { + } else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { // store xmm32 on the stack m_gen.add_instr(IGen::store_reg_offset_xmm32( RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), i_rec); - } else if (op.reg.is_xmm() && + } else if (op.reg.is_128bit_simd() && (op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { m_gen.add_instr(IGen::store128_xmm128_reg_offset( RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), @@ -254,7 +254,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { int j = n_xmm_backups; for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { j--; int offset = j * XMM_SIZE; m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset), @@ -268,7 +268,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { } else { for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP), InstructionInfo::Kind::EPILOGUE); m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 37d81a340ad..f9e954d7ac3 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -240,7 +240,7 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen, auto dest_reg = get_reg(m_dest, allocs, irec); if (m_name == "#f") { static_assert(false_symbol_offset() == 0, "false symbol location"); - if (dest_reg.is_xmm()) { + if (dest_reg.is_128bit_simd()) { gen->add_instr(IGen::movq_xmm64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); } else { gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); @@ -862,7 +862,8 @@ RegAllocInstr IR_ConditionalBranch::to_rai() { void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, const AllocationResult& allocs, emitter::IR_Record irec) { - Instruction jump_instr(0); +#ifndef __aarch64__ + Instruction jump_instr = InstructionX86(0); ASSERT(m_resolved); switch (condition.kind) { case ConditionKind::EQUAL: @@ -916,6 +917,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, auto jump_rec = gen->add_instr(jump_instr, irec); gen->link_instruction_jump(jump_rec, gen->get_future_ir_record_in_same_func(irec, label.idx)); +#else +// TODO - ARM64 +#endif } ///////////////////// diff --git a/goalc/compiler/compilation/Function.cpp b/goalc/compiler/compilation/Function.cpp index 1fb519a7fb5..b781f5bc333 100644 --- a/goalc/compiler/compilation/Function.cpp +++ b/goalc/compiler/compilation/Function.cpp @@ -591,7 +591,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, auto cc = get_function_calling_convention(function->type(), m_ts); RegClass ret_reg_class = RegClass::GPR_64; - if (cc.return_reg && cc.return_reg->is_xmm()) { + if (cc.return_reg && cc.return_reg->is_128bit_simd()) { ret_reg_class = RegClass::INT_128; } @@ -625,7 +625,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, const auto& arg = args.at(i); auto reg = cc.arg_regs.at(i); arg_outs.push_back( - env->make_ireg(arg->type(), reg.is_xmm() ? RegClass::INT_128 : RegClass::GPR_64)); + env->make_ireg(arg->type(), reg.is_128bit_simd() ? RegClass::INT_128 : RegClass::GPR_64)); arg_outs.back()->mark_as_settable(); env->emit_ir(form, arg_outs.back(), arg); } diff --git a/goalc/debugger/disassemble.h b/goalc/debugger/disassemble.h index c3679d56f04..0a04d6274c3 100644 --- a/goalc/debugger/disassemble.h +++ b/goalc/debugger/disassemble.h @@ -22,10 +22,10 @@ struct InstructionInfo { int ir_idx = -1; int offset = -1; - InstructionInfo(const emitter::Instruction& _instruction, Kind _kind) + InstructionInfo(const emitter::Instruction _instruction, Kind _kind) : instruction(_instruction), kind(_kind) {} - InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx) + InstructionInfo(const emitter::Instruction _instruction, Kind _kind, int _ir_idx) : instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {} }; diff --git a/goalc/emitter/CodeTester.cpp b/goalc/emitter/CodeTester.cpp index f8f1216f572..f2dd8d9cf91 100644 --- a/goalc/emitter/CodeTester.cpp +++ b/goalc/emitter/CodeTester.cpp @@ -50,7 +50,7 @@ std::string CodeTester::dump_to_hex_string(bool nospace) { /*! * Add an instruction to the buffer. */ -void CodeTester::emit(const Instruction& instr) { +void CodeTester::emit(const emitter::Instruction& instr) { code_buffer_size += instr.emit(code_buffer + code_buffer_size); ASSERT(code_buffer_size <= code_buffer_capacity); } @@ -67,11 +67,18 @@ void CodeTester::emit_return() { * Pops RSP always, which is weird, but doesn't cause issues. */ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { +#ifndef __aarch64__ for (int i = 16; i-- > 0;) { if (i != RAX || !exclude_rax) { emit(IGen::pop_gpr64(i)); } } +#else + // TODO find uses for excluding RAX + for (int i = 0; i < 32; i++) { + emit(IGen::pop_gpr64(i)); + } +#endif } /*! @@ -79,11 +86,18 @@ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { * Pushes RSP always, which is weird, but doesn't cause issues. */ void CodeTester::emit_push_all_gprs(bool exclude_rax) { +#ifndef __aarch64__ for (int i = 0; i < 16; i++) { if (i != RAX || !exclude_rax) { emit(IGen::push_gpr64(i)); } } +#else + // TODO find uses for excluding RAX + for (int i = 0; i < 32; i++) { + emit(IGen::push_gpr64(i)); + } +#endif } /*! @@ -119,7 +133,14 @@ void CodeTester::clear() { * Execute the buffered code with no arguments, return the value of RAX. */ u64 CodeTester::execute() { +#if defined(__APPLE__) && defined(__aarch64__) + mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ); + auto ret = ((u64(*)())code_buffer)(); + mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ); + return ret; +#else return ((u64(*)())code_buffer)(); +#endif } /*! @@ -127,15 +148,34 @@ u64 CodeTester::execute() { * arguments will appear in (will handle windows/linux differences) */ u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) { +#if defined(__APPLE__) && defined(__aarch64__) + mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ); + auto ret = ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); + mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ); + return ret; +#else return ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); +#endif } /*! * Allocate a code buffer of the given size. */ void CodeTester::init_code_buffer(int capacity) { +// TODO Apple Silicone - You cannot make a page be RWX, +// or more specifically it can't be both writable and executable at the same time +// +// https://github.com/zherczeg/sljit/issues/99 +// +// The solution to this is to flip-flop between permissions, or perhaps have two threads +// one that has writing permission, and another with executable permission +#if defined(__APPLE__) && defined(__aarch64__) + code_buffer = + (u8*)mmap(nullptr, capacity, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); +#else code_buffer = (u8*)mmap(nullptr, capacity, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); +#endif if (code_buffer == (u8*)(-1)) { ASSERT_MSG(false, "[CodeTester] Failed to map memory!"); } diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index 334666c62e0..cc79c996fec 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -8,2757 +8,804 @@ #include "common/util/Assert.h" namespace emitter { -class IGen { - public: - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // MOVES - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - /*! - * Move data from src to dst. Moves all 64-bits of the GPR. - */ - static Instruction mov_gpr64_gpr64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - Instruction instr(0x89); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - /*! - * Move a 64-bit constant into a register. - */ - static Instruction mov_gpr64_u64(Register dst, uint64_t val) { - ASSERT(dst.is_gpr()); - bool rex_b = false; - auto dst_hw_id = dst.hw_id(); - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - Instruction instr(0xb8 + dst_hw_id); - instr.set(REX(true, false, false, rex_b)); - instr.set(Imm(8, val)); - return instr; - } - - /*! - * Move a 32-bit constant into a register. Zeros the upper 32 bits. - */ - static Instruction mov_gpr64_u32(Register dst, uint64_t val) { - ASSERT(val <= UINT32_MAX); - ASSERT(dst.is_gpr()); - auto dst_hw_id = dst.hw_id(); - bool rex_b = false; - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - - Instruction instr(0xb8 + dst_hw_id); - if (rex_b) { - instr.set(REX(false, false, false, rex_b)); - } - instr.set(Imm(4, val)); - return instr; - } - - /*! - * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. - * When possible prefer mov_gpr64_u32. (use this only for negative values...) - * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. - */ - static Instruction mov_gpr64_s32(Register dst, int64_t val) { - ASSERT(val >= INT32_MIN && val <= INT32_MAX); - ASSERT(dst.is_gpr()); - Instruction instr(0xc7); - instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); - instr.set(Imm(4, val)); - return instr; - } - - /*! - * Move 32-bits of xmm to 32 bits of gpr (no sign extension). - */ - static Instruction movd_gpr32_xmm32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 32-bits of gpr to 32-bits of xmm (no sign extension) - */ - static Instruction movd_xmm32_gpr32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 64-bits of xmm to 64 bits of gpr (no sign extension). - */ - static Instruction movq_gpr64_xmm64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 64-bits of gpr to 64-bits of xmm (no sign extension) - */ - static Instruction movq_xmm64_gpr64(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 32-bits between xmm's - */ - static Instruction mov_xmm32_xmm32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - // todo - GPR64 -> XMM64 (zext) - // todo - XMM -> GPR64 - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // GOAL Loads and Stores - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * movsx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Move data from src to dst. Moves all 64-bits of the GPR. + */ +extern Instruction mov_gpr64_gpr64(Register dst, Register src); + +/*! + * Move a 64-bit constant into a register. + */ +extern Instruction mov_gpr64_u64(Register dst, uint64_t val); + +/*! + * Move a 32-bit constant into a register. Zeros the upper 32 bits. + */ +extern Instruction mov_gpr64_u32(Register dst, uint64_t val); + +/*! + * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. + * When possible prefer mov_gpr64_u32. (use this only for negative values...) + * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. + */ +extern Instruction mov_gpr64_s32(Register dst, int64_t val); + +/*! + * Move 32-bits of xmm to 32 bits of gpr (no sign extension). + */ +extern Instruction movd_gpr32_xmm32(Register dst, Register src); + +/*! + * Move 32-bits of gpr to 32-bits of xmm (no sign extension) + */ +extern Instruction movd_xmm32_gpr32(Register dst, Register src); + +/*! + * Move 64-bits of xmm to 64 bits of gpr (no sign extension). + */ +extern Instruction movq_gpr64_xmm64(Register dst, Register src); + +/*! + * Move 64-bits of gpr to 64-bits of xmm (no sign extension) + */ +extern Instruction movq_xmm64_gpr64(Register dst, Register src); + +/*! + * Move 32-bits between xmm's + */ +extern Instruction mov_xmm32_xmm32(Register dst, Register src); + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * movsx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +/*! + * movzx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +/*! + * movsx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - /*! - * movzx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +extern Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movsx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movzx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movsxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; - } - - static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - /*! - * movzxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; - } - - static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - /*! - * mov dst, QWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - true); - return instr; - } - - static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +/*! + * movzx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { - if (offset == 0) { - return storevf_gpr64_plus_gpr64(value, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); - } - ASSERT(false); - return {0}; - } - - static Instruction store_goal_gpr(Register addr, - Register value, - Register off, - int offset, - int size) { - switch (size) { - case 1: - if (offset == 0) { - return store8_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - return store16_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - return store32_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return store64_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - default: - ASSERT(false); - return {0}; - } - } - - static Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { - if (offset == 0) { - return loadvf_gpr64_plus_gpr64(dst, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - ASSERT(false); - return {0}; - } - } - - /*! - * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. - * This will pick the appropriate fancy addressing mode instruction. - */ - static Instruction load_goal_gpr(Register dst, - Register addr, - Register off, - int offset, - int size, - bool sign_extend) { - switch (size) { - case 1: - if (offset == 0) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); - - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - - } else { - ASSERT(false); - } - default: - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // LOADS n' STORES - XMM32 - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - static Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register xmm_value) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); + s64 offset); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); +/*! + * movsxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); - instr.swap_op0_rex(); - return instr; - } +extern Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); - static Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, - Register addr1, - Register addr2) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register xmm_value, - s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, +extern Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register xmm_value, - s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); - instr.set(Imm(4, offset)); - return instr; - } - - static Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); - instr.set(Imm(1, offset)); - return instr; - } - - static Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return lea_reg_plus_off8(dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return lea_reg_plus_off32(dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { - if (offset == 0) { - return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { - if (offset == 0) { - return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_value.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // LOADS n' STORES - XMM128 - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Store a 128-bit xmm into an address stored in a register, no offset - */ - static Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { - if (offset == 0) { - return load128_xmm128_gpr64(xmm_dest, base); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load128_xmm128_gpr64_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load128_xmm128_gpr64_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { - if (offset == 0) { - return store128_gpr64_xmm128(base, xmm_val); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store128_gpr64_xmm128_s8(base, xmm_val, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store128_gpr64_xmm128_s32(base, xmm_val, offset); - } else { - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // RIP loads and stores - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - static Instruction load64_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load32s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load32u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); - return instr; - } - - static Instruction load16u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load16s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load8u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load8s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { - switch (size) { - case 1: - if (sign_extend) { - return load8s_rip_s32(dest, offset); - } else { - return load8u_rip_s32(dest, offset); - } - break; - case 2: - if (sign_extend) { - return load16s_rip_s32(dest, offset); - } else { - return load16u_rip_s32(dest, offset); - } - break; - case 4: - if (sign_extend) { - return load32s_rip_s32(dest, offset); - } else { - return load32u_rip_s32(dest, offset); - } - break; - case 8: - return load64_rip_s32(dest, offset); - default: - ASSERT(false); - } - } - - static Instruction store64_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); - return instr; - } - - static Instruction store32_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - return instr; - } - - static Instruction store16_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store8_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - if (src.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction static_store(Register value, s64 offset, int size) { - switch (size) { - case 1: - return store8_rip_s32(value, offset); - case 2: - return store16_rip_s32(value, offset); - case 4: - return store32_rip_s32(value, offset); - case 8: - return store64_rip_s32(value, offset); - default: - ASSERT(false); - } - } - - static Instruction static_addr(Register dst, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8d); - instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); - return instr; - } - - static Instruction static_load_xmm32(Register xmm_dest, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction static_store_xmm32(Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - // TODO, special load/stores of 128 bit values. - - // TODO, consider specialized stack loads and stores? - static Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { - ASSERT(dst_reg.is_gpr()); - ASSERT(src_reg.is_gpr()); - Instruction instr(0x8b); - instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; - } - - /*! - * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. - */ - static Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { - ASSERT(addr.is_gpr()); - ASSERT(value.is_gpr()); - Instruction instr(0x89); - instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // FUNCTION STUFF - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - /*! - * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. - */ - static Instruction ret() { return Instruction(0xc3); } - - /*! - * Instruction to push gpr (64-bits) onto the stack - */ - static Instruction push_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = Instruction(0x50 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return Instruction(0x50 + reg.hw_id()); - } - - /*! - * Instruction to pop 64 bit gpr from the stack - */ - static Instruction pop_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = Instruction(0x58 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return Instruction(0x58 + reg.hw_id()); - } - - /*! - * Call a function stored in a 64-bit gpr - */ - static Instruction call_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - Instruction instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 2; - mrm.mod = 3; - instr.set(mrm); - return instr; - } - - /*! - * Jump to an x86-64 address stored in a 64-bit gpr. - */ - static Instruction jmp_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - Instruction instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 4; - mrm.mod = 3; - instr.set(mrm); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // INTEGER MATH - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - static Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); - // SUB r/m64, imm8 : REX.W + 83 /5 ib - Instruction instr(0x83); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); - Instruction instr(0x81); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(4, imm)); - return instr; - } - - static Instruction add_gpr64_imm8s(Register reg, int64_t v) { - ASSERT(v >= INT8_MIN && v <= INT8_MAX); - Instruction instr(0x83); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(1, v)); - return instr; - } - - static Instruction add_gpr64_imm32s(Register reg, int64_t v) { - ASSERT(v >= INT32_MIN && v <= INT32_MAX); - Instruction instr(0x81); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(4, v)); - return instr; - } - - static Instruction add_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return add_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return add_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } - } - - static Instruction sub_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return sub_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return sub_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } - } - - static Instruction add_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x01); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - static Instruction sub_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x29); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - /*! - * Multiply gprs (32-bit, signed). - * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) - */ - static Instruction imul_gpr32_gpr32(Register dst, Register src) { - Instruction instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - return instr; - } - - /*! - * Multiply gprs (64-bit, signed). - * DANGER - this treats all operands as 64-bit. This is not like the EE. - */ - static Instruction imul_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Divide (idiv, 32 bit) - */ - static Instruction idiv_gpr32(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); - return instr; - } - - static Instruction unsigned_div_gpr32(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); - return instr; - } - - /*! - * Convert doubleword to quadword for division. - */ - static Instruction cdq() { - Instruction instr(0x99); - return instr; - } - - /*! - * Move from gpr32 to gpr64, with sign extension. - * Needed for multiplication/divsion madness. - */ - static Instruction movsx_r64_r32(Register dst, Register src) { - Instruction instr(0x63); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Compare gpr64. This sets the flags for the jumps. - * todo UNTESTED - */ - static Instruction cmp_gpr64_gpr64(Register a, Register b) { - Instruction instr(0x3b); - ASSERT(a.is_gpr()); - ASSERT(b.is_gpr()); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // BIT STUFF - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Or of two gprs - */ - static Instruction or_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x0b); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * And of two gprs - */ - static Instruction and_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x23); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Xor of two gprs - */ - static Instruction xor_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x33); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Bitwise not a gpr - */ - static Instruction not_gpr64(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // SHIFTS - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Shift 64-bit gpr left by CL register - */ - static Instruction shl_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-bit gpr right (logical) by CL register - */ - static Instruction shr_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-bit gpr right (arithmetic) by CL register - */ - static Instruction sar_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-ptr left (logical) by the constant shift amount "sa". - */ - static Instruction shl_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - /*! - * Shift 64-ptr right (logical) by the constant shift amount "sa". - */ - static Instruction shr_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - /*! - * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". - */ - static Instruction sar_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // CONTROL FLOW - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. - */ - static Instruction jmp_32() { - Instruction instr(0xe9); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump if equal. - */ - static Instruction je_32() { - Instruction instr(0x0f); - instr.set_op2(0x84); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump not equal. - */ - static Instruction jne_32() { - Instruction instr(0x0f); - instr.set_op2(0x85); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump less than or equal. - */ - static Instruction jle_32() { - Instruction instr(0x0f); - instr.set_op2(0x8e); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump greater than or equal. - */ - static Instruction jge_32() { - Instruction instr(0x0f); - instr.set_op2(0x8d); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump less than - */ - static Instruction jl_32() { - Instruction instr(0x0f); - instr.set_op2(0x8c); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump greater than - */ - static Instruction jg_32() { - Instruction instr(0x0f); - instr.set_op2(0x8f); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump below or equal - */ - static Instruction jbe_32() { - Instruction instr(0x0f); - instr.set_op2(0x86); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump above or equal - */ - static Instruction jae_32() { - Instruction instr(0x0f); - instr.set_op2(0x83); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump below - */ - static Instruction jb_32() { - Instruction instr(0x0f); - instr.set_op2(0x82); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump above - */ - static Instruction ja_32() { - Instruction instr(0x0f); - instr.set_op2(0x87); - instr.set(Imm(4, 0)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // FLOAT MATH - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Compare two floats and set flag register for jump (ucomiss) - */ - static Instruction cmp_flt_flt(Register a, Register b) { - ASSERT(a.is_xmm()); - ASSERT(b.is_xmm()); - Instruction instr(0x0f); - instr.set_op2(0x2e); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); - return instr; - } - - static Instruction sqrts_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x51); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Multiply two floats in xmm's - */ - static Instruction mulss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x59); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Divide two floats in xmm's - */ - static Instruction divss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Subtract two floats in xmm's - */ - static Instruction subss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Add two floats in xmm's - */ - static Instruction addss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x58); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Floating point minimum. - */ - static Instruction minss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5d); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Floating point maximum. - */ - static Instruction maxss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5f); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Convert GPR int32 to XMM float (single precision) - */ - static Instruction int32_to_float(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2a); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Convert XMM float to GPR int32(single precision) (truncate) - */ - static Instruction float_to_int32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction nop() { - // NOP - Instruction instr(0x90); - return instr; - } - - // TODO - rsqrt / abs / sqrt - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // UTILITIES - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * A "null" instruction. This instruction does not generate any bytes - * but can be referred to by a label. Useful to insert in place of a real instruction - * if the real instruction has been optimized out. - */ - static Instruction null() { - Instruction i(0); - i.m_flags |= Instruction::kIsNull; - return i; - } - - ///////////////////////////// - // AVX (VF - Vector Float) // - ///////////////////////////// - - static Instruction nop_vf() { - Instruction instr(0xd9); // FNOP - instr.set_op2(0xd0); - return instr; - } - - static Instruction wait_vf() { - Instruction instr(0x9B); // FWAIT / WAIT - return instr; - } - - static Instruction mov_vf_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - - if (src.hw_id() >= 8 && dst.hw_id() < 8) { - // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the - // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. - // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. - Instruction instr(0x29); - instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } else { - Instruction instr(0x28); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } - } - - static Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +extern Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +/*! + * movzxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +/*! + * mov dst, QWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset); + +extern Instruction store_goal_gpr(Register addr, + Register value, + Register off, + int offset, + int size); + +extern Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset); + +/*! + * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. + * This will pick the appropriate fancy addressing mode instruction. + */ +extern Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +extern Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register xmm_value); + +extern Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2); + +extern Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset); + +extern Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset); + +extern Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset); + +extern Instruction lea_reg_plus_off(Register dest, Register base, s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset); + +extern Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset); + +extern Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset); + +extern Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset); + +extern Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM128 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Store a 128-bit xmm into an address stored in a register, no offset + */ +extern Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value); + +extern Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset); + +extern Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset); + +extern Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr); + +extern Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset); + +extern Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset); + +extern Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset); + +extern Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +extern Instruction load64_rip_s32(Register dest, s64 offset); + +extern Instruction load32s_rip_s32(Register dest, s64 offset); + +extern Instruction load32u_rip_s32(Register dest, s64 offset); + +extern Instruction load16u_rip_s32(Register dest, s64 offset); + +extern Instruction load16s_rip_s32(Register dest, s64 offset); + +extern Instruction load8u_rip_s32(Register dest, s64 offset); + +extern Instruction load8s_rip_s32(Register dest, s64 offset); + +extern Instruction static_load(Register dest, s64 offset, int size, bool sign_extend); + +extern Instruction store64_rip_s32(Register src, s64 offset); + +extern Instruction store32_rip_s32(Register src, s64 offset); + +extern Instruction store16_rip_s32(Register src, s64 offset); + +extern Instruction store8_rip_s32(Register src, s64 offset); + +extern Instruction static_store(Register value, s64 offset, int size); + +extern Instruction static_addr(Register dst, s64 offset); + +extern Instruction static_load_xmm32(Register xmm_dest, s64 offset); + +extern Instruction static_store_xmm32(Register xmm_value, s64 offset); + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +extern Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg); + +/*! + * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. + */ +extern Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. + */ +extern Instruction ret(); + +/*! + * Instruction to push gpr (64-bits) onto the stack + */ +extern Instruction push_gpr64(Register reg); + +/*! + * Instruction to pop 64 bit gpr from the stack + */ +extern Instruction pop_gpr64(Register reg); + +/*! + * Call a function stored in a 64-bit gpr + */ +extern Instruction call_r64(Register reg_); + +/*! + * Jump to an x86-64 address stored in a 64-bit gpr. + */ +extern Instruction jmp_r64(Register reg_); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +extern Instruction sub_gpr64_imm8s(Register reg, int64_t imm); + +extern Instruction sub_gpr64_imm32s(Register reg, int64_t imm); + +extern Instruction add_gpr64_imm8s(Register reg, int64_t v); + +extern Instruction add_gpr64_imm32s(Register reg, int64_t v); + +extern Instruction add_gpr64_imm(Register reg, int64_t imm); + +extern Instruction sub_gpr64_imm(Register reg, int64_t imm); + +extern Instruction add_gpr64_gpr64(Register dst, Register src); + +extern Instruction sub_gpr64_gpr64(Register dst, Register src); + +/*! + * Multiply gprs (32-bit, signed). + * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) + */ +extern Instruction imul_gpr32_gpr32(Register dst, Register src); + +/*! + * Multiply gprs (64-bit, signed). + * DANGER - this treats all operands as 64-bit. This is not like the EE. + */ +extern Instruction imul_gpr64_gpr64(Register dst, Register src); + +/*! + * Divide (idiv, 32 bit) + */ +extern Instruction idiv_gpr32(Register reg); + +extern Instruction unsigned_div_gpr32(Register reg); + +/*! + * Convert doubleword to quadword for division. + */ +extern Instruction cdq(); + +/*! + * Move from gpr32 to gpr64, with sign extension. + * Needed for multiplication/divsion madness. + */ +extern Instruction movsx_r64_r32(Register dst, Register src); + +/*! + * Compare gpr64. This sets the flags for the jumps. + * todo UNTESTED + */ +extern Instruction cmp_gpr64_gpr64(Register a, Register b); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Or of two gprs + */ +extern Instruction or_gpr64_gpr64(Register dst, Register src); + +/*! + * And of two gprs + */ +extern Instruction and_gpr64_gpr64(Register dst, Register src); + +/*! + * Xor of two gprs + */ +extern Instruction xor_gpr64_gpr64(Register dst, Register src); + +/*! + * Bitwise not a gpr + */ +extern Instruction not_gpr64(Register reg); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Shift 64-bit gpr left by CL register + */ +extern Instruction shl_gpr64_cl(Register reg); + +/*! + * Shift 64-bit gpr right (logical) by CL register + */ +extern Instruction shr_gpr64_cl(Register reg); + +/*! + * Shift 64-bit gpr right (arithmetic) by CL register + */ +extern Instruction sar_gpr64_cl(Register reg); + +/*! + * Shift 64-ptr left (logical) by the constant shift amount "sa". + */ +extern Instruction shl_gpr64_u8(Register reg, uint8_t sa); + +/*! + * Shift 64-ptr right (logical) by the constant shift amount "sa". + */ +extern Instruction shr_gpr64_u8(Register reg, uint8_t sa); + +/*! + * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". + */ +extern Instruction sar_gpr64_u8(Register reg, uint8_t sa); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. + */ +extern Instruction jmp_32(); + +/*! + * Jump if equal. + */ +extern Instruction je_32(); + +/*! + * Jump not equal. + */ +extern Instruction jne_32(); + +/*! + * Jump less than or equal. + */ +extern Instruction jle_32(); + +/*! + * Jump greater than or equal. + */ +extern Instruction jge_32(); + +/*! + * Jump less than + */ +extern Instruction jl_32(); + +/*! + * Jump greater than + */ +extern Instruction jg_32(); + +/*! + * Jump below or equal + */ +extern Instruction jbe_32(); + +/*! + * Jump above or equal + */ +extern Instruction jae_32(); + +/*! + * Jump below + */ +extern Instruction jb_32(); + +/*! + * Jump above + */ +extern Instruction ja_32(); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Compare two floats and set flag register for jump (ucomiss) + */ +extern Instruction cmp_flt_flt(Register a, Register b); + +extern Instruction sqrts_xmm(Register dst, Register src); + +/*! + * Multiply two floats in xmm's + */ +extern Instruction mulss_xmm_xmm(Register dst, Register src); + +/*! + * Divide two floats in xmm's + */ +extern Instruction divss_xmm_xmm(Register dst, Register src); + +/*! + * Subtract two floats in xmm's + */ +extern Instruction subss_xmm_xmm(Register dst, Register src); + +/*! + * Add two floats in xmm's + */ +extern Instruction addss_xmm_xmm(Register dst, Register src); + +/*! + * Floating point minimum. + */ +extern Instruction minss_xmm_xmm(Register dst, Register src); + +/*! + * Floating point maximum. + */ +extern Instruction maxss_xmm_xmm(Register dst, Register src); + +/*! + * Convert GPR int32 to XMM float (single precision) + */ +extern Instruction int32_to_float(Register dst, Register src); + +/*! + * Convert XMM float to GPR int32(single precision) (truncate) + */ +extern Instruction float_to_int32(Register dst, Register src); + +extern Instruction nop(); + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * A "null" instruction. This instruction does not generate any bytes + * but can be referred to by a label. Useful to insert in place of a real instruction + * if the real instruction has been optimized out. + */ +extern Instruction null(); + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +extern Instruction nop_vf(); + +extern Instruction wait_vf(); + +extern Instruction mov_vf_vf(Register dst, Register src); + +extern Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2); + +extern Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8( - value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32( - value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { - ASSERT(dest.is_xmm()); - ASSERT(offset >= INT32_MIN); - ASSERT(offset <= INT32_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); - return instr; - } - - // TODO - rip relative loads and stores. - - static Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { - ASSERT(!(mask & 0b11110000)); - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x0c); // VBLENDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, - src1.hw_id(), false, VexPrefix::P_66); - instr.set(Imm(1, mask)); - return instr; - } - - static Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - ASSERT(dx < 4); - ASSERT(dy < 4); - ASSERT(dz < 4); - ASSERT(dw < 4); - u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); - return swizzle_vf(dst, src, imm); - - // SSE encoding version: - // Instruction instr(0x0f); - // instr.set_op2(0xc6); - // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - // instr.set(Imm(1, imm)); - // return instr; - } - - /* - Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. - Here's a brief run-down: - - 8-bits / 4 groups of 2 bits - - Right-to-left, each group is used to determine which element in `src` gets copied into - `dst`'s element (W->X). - - GROUP OPTIONS - - 00b - Copy the least-significant element (X) - - 01b - Copy the second element (from the right) (Y) - - 10b - Copy the third element (from the right) (Z) - - 11b - Copy the most significant element (W) - Examples - ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) - SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions - > (1.5, 1.5, 1.5, 1.5) - SHUFPS xmm1, xmm1, 0x39 ; Rotate right - > (4.5, 1.5, 2.5, 3.5) - */ - static Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xC6); // VSHUFPS - - // we use the AVX "VEX" encoding here. This is a three-operand form, - // but we just set both source - // to the same register. It seems like this is one byte longer but is faster maybe? - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); - instr.set(Imm(1, controlBytes)); - return instr; - } - - /* - Splats a single element in 'src' to all elements in 'dst' - For example (pseudocode): - xmm1 = (1.5, 2.5, 3.5, 4.5) - xmm2 = (1, 2, 3, 4) - splat_vf(xmm1, xmm2, XMM_ELEMENT::X); - xmm1 = (4, 4, 4, 4) - */ - static Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { - switch (element) { - case Register::VF_ELEMENT::X: // Least significant element - return swizzle_vf(dst, src, 0b00000000); - break; - case Register::VF_ELEMENT::Y: - return swizzle_vf(dst, src, 0b01010101); - break; - case Register::VF_ELEMENT::Z: - return swizzle_vf(dst, src, 0b10101010); - break; - case Register::VF_ELEMENT::W: // Most significant element - return swizzle_vf(dst, src, 0b11111111); - break; - default: - ASSERT(false); - return {0}; - } - } - - static Instruction xor_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x57); // VXORPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction sub_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5c); // VSUBPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction add_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x58); // VADDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction mul_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x59); // VMULPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction max_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5F); // VMAXPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction min_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5D); // VMINPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction div_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5E); // VDIVPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction sqrt_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0x51); // VSQRTPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); - return instr; - } - - static Instruction itof_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0x5b); // VCVTDQ2PS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); - return instr; - } - - static Instruction ftoi_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 - Instruction instr(0x5b); // VCVTTPS2DQ - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - return instr; - } - - static Instruction pw_sra(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction pw_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction ph_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /2 ib VPSRLW - Instruction instr(0x71); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction pw_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - static Instruction ph_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 - Instruction instr(0x71); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction parallel_add_byte(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xFC); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEF); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xDB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Reminder - a word in MIPS = 32bits = a DWORD in x86 - // MIPS || x86 - // ----------------------- - // byte || byte - // halfword || word - // word || dword - // doubleword || quadword - - // -- Unpack High Data Instructions - static Instruction pextub_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x68); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextuh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x69); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextuw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6a); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // -- Unpack Low Data Instructions - static Instruction pextlb_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x60); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextlh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x61); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextlw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x62); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 16 bytes (8 bits) - static Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x74); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 8 halfwords (16 bits) - static Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x75); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 4 words (32 bits) - static Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x76); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 16 bytes (8 bits) - static Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x64); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 8 halfwords (16 bits) - static Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x65); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 4 words (32 bits) - static Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x66); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6c); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { - return vpunpcklqdq(dst, src0, src1); - } - - static Instruction pcpyud(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6d); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpsubd(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 - // reg, vec, r/m - Instruction instr(0xfa); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpsrldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 - Instruction instr(0x73); - instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpslldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 - Instruction instr(0x73); - instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpshuflw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 - Instruction instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F2); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpshufhw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 - Instruction instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpackuswb(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - - Instruction instr(0x67); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } -}; + s64 offset); + +extern Instruction loadvf_rip_plus_s32(Register dest, s64 offset); + +// TODO - rip relative loads and stores. + +extern Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask); + +extern Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw); + +/* + Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. + Here's a brief run-down: + - 8-bits / 4 groups of 2 bits + - Right-to-left, each group is used to determine which element in `src` gets copied into + `dst`'s element (W->X). + - GROUP OPTIONS + - 00b - Copy the least-significant element (X) + - 01b - Copy the second element (from the right) (Y) + - 10b - Copy the third element (from the right) (Z) + - 11b - Copy the most significant element (W) + Examples + ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) + SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions + > (1.5, 1.5, 1.5, 1.5) + SHUFPS xmm1, xmm1, 0x39 ; Rotate right + > (4.5, 1.5, 2.5, 3.5) + */ +extern Instruction swizzle_vf(Register dst, Register src, u8 controlBytes); + +/* + Splats a single element in 'src' to all elements in 'dst' + For example (pseudocode): + xmm1 = (1.5, 2.5, 3.5, 4.5) + xmm2 = (1, 2, 3, 4) + splat_vf(xmm1, xmm2, XMM_ELEMENT::X); + xmm1 = (4, 4, 4, 4) + */ +extern Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element); + +extern Instruction xor_vf(Register dst, Register src1, Register src2); + +extern Instruction sub_vf(Register dst, Register src1, Register src2); + +extern Instruction add_vf(Register dst, Register src1, Register src2); + +extern Instruction mul_vf(Register dst, Register src1, Register src2); + +extern Instruction max_vf(Register dst, Register src1, Register src2); + +extern Instruction min_vf(Register dst, Register src1, Register src2); + +extern Instruction div_vf(Register dst, Register src1, Register src2); + +extern Instruction sqrt_vf(Register dst, Register src); + +extern Instruction itof_vf(Register dst, Register src); + +extern Instruction ftoi_vf(Register dst, Register src); + +extern Instruction pw_sra(Register dst, Register src, u8 imm); + +extern Instruction pw_srl(Register dst, Register src, u8 imm); + +extern Instruction ph_srl(Register dst, Register src, u8 imm); + +extern Instruction pw_sll(Register dst, Register src, u8 imm); + +extern Instruction ph_sll(Register dst, Register src, u8 imm); + +extern Instruction parallel_add_byte(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_or(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_and(Register dst, Register src0, Register src1); + +// Reminder - a word in MIPS = 32bits = a DWORD in x86 +// MIPS || x86 +// ----------------------- +// byte || byte +// halfword || word +// word || dword +// doubleword || quadword + +// -- Unpack High Data Instructions +extern Instruction pextub_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextuh_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextuw_swapped(Register dst, Register src0, Register src1); + +// -- Unpack Low Data Instructions +extern Instruction pextlb_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextlh_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextlw_swapped(Register dst, Register src0, Register src1); + +// Equal to than comparison as 16 bytes (8 bits) +extern Instruction parallel_compare_e_b(Register dst, Register src0, Register src1); + +// Equal to than comparison as 8 halfwords (16 bits) +extern Instruction parallel_compare_e_h(Register dst, Register src0, Register src1); + +// Equal to than comparison as 4 words (32 bits) +extern Instruction parallel_compare_e_w(Register dst, Register src0, Register src1); + +// Greater than comparison as 16 bytes (8 bits) +extern Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1); + +// Greater than comparison as 8 halfwords (16 bits) +extern Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1); + +// Greater than comparison as 4 words (32 bits) +extern Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1); + +extern Instruction vpunpcklqdq(Register dst, Register src0, Register src1); + +extern Instruction pcpyld_swapped(Register dst, Register src0, Register src1); + +extern Instruction pcpyud(Register dst, Register src0, Register src1); + +extern Instruction vpsubd(Register dst, Register src0, Register src1); + +extern Instruction vpsrldq(Register dst, Register src, u8 imm); + +extern Instruction vpslldq(Register dst, Register src, u8 imm); + +extern Instruction vpshuflw(Register dst, Register src, u8 imm); + +extern Instruction vpshufhw(Register dst, Register src, u8 imm); + +extern Instruction vpackuswb(Register dst, Register src0, Register src1); +}; // namespace IGen } // namespace emitter diff --git a/goalc/emitter/IGenARM64.cpp b/goalc/emitter/IGenARM64.cpp new file mode 100644 index 00000000000..9212edbee4e --- /dev/null +++ b/goalc/emitter/IGenARM64.cpp @@ -0,0 +1,960 @@ + +#include "goalc/emitter/Instruction.h" +#ifdef __aarch64__ +#include + +#include "IGen.h" + +// https://armconverter.com/?code=ret +// https://developer.arm.com/documentation/ddi0487/latest + +namespace emitter { +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction mov_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction mov_gpr64_u64(Register dst, uint64_t val) { + return Instruction(0b0); +} + +Instruction mov_gpr64_u32(Register dst, uint64_t val) { + return Instruction(0b0); +} + +Instruction mov_gpr64_s32(Register dst, int64_t val) { + return Instruction(0b0); +} + +Instruction movd_gpr32_xmm32(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction movd_xmm32_gpr32(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction movq_gpr64_xmm64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction movq_xmm64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction mov_xmm32_xmm32(Register dst, Register src) { + return Instruction(0b0); +} + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + return Instruction(0b0); +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + return Instruction(0b0); +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + return Instruction(0b0); +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + return Instruction(0b0); +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + return Instruction(0b0); +} + +Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { + return Instruction(0b0); +} + +Instruction store_goal_gpr(Register addr, Register value, Register off, int offset, int size) { + return Instruction(0b0); +} + +Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { + return Instruction(0b0); +} + +Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { + return Instruction(0b0); +} + +Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + return Instruction(0b0); +} + +Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { + return Instruction(0b0); +} + +Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { + return Instruction(0b0); +} + +Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - SIMD (128-bit, QWORDS) +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { + return Instruction(0b0); +} + +Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { + return Instruction(0b0); +} + +Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { + return Instruction(0b0); +} + +Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { + return Instruction(0b0); +} + +Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { + return Instruction(0b0); +} + +Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load64_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load32s_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load32u_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load16u_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load16s_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load8u_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction load8s_rip_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { + return Instruction(0b0); +} + +Instruction store64_rip_s32(Register src, s64 offset) { + return Instruction(0b0); +} + +Instruction store32_rip_s32(Register src, s64 offset) { + return Instruction(0b0); +} + +Instruction store16_rip_s32(Register src, s64 offset) { + return Instruction(0b0); +} + +Instruction store8_rip_s32(Register src, s64 offset) { + return Instruction(0b0); +} + +Instruction static_store(Register value, s64 offset, int size) { + return Instruction(0b0); +} + +Instruction static_addr(Register dst, s64 offset) { + return Instruction(0b0); +} + +Instruction static_load_xmm32(Register xmm_dest, s64 offset) { + return Instruction(0b0); +} + +Instruction static_store_xmm32(Register xmm_value, s64 offset) { + return Instruction(0b0); +} + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { + return Instruction(0b0); +} + +Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction ret() { + // pg. 1850 + return Instruction(0b11010110010111110000001111000000); +} + +Instruction push_gpr64(Register reg) { + // pg. 1998 + ASSERT(reg.is_gpr()); + // TODO - is hw_id needed? + return Instruction(0b11111000001); // TODO - finish +} + +Instruction pop_gpr64(Register reg) { + // pg. 1998 + ASSERT(reg.is_gpr()); + // TODO - is hw_id needed? + return Instruction(0b11111000011); // TODO - finish +} + +Instruction call_r64(Register reg_) { + return Instruction(0b0); +} + +Instruction jmp_r64(Register reg_) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { + return Instruction(0b0); +} + +Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { + return Instruction(0b0); +} + +Instruction add_gpr64_imm8s(Register reg, int64_t v) { + return Instruction(0b0); +} + +Instruction add_gpr64_imm32s(Register reg, int64_t v) { + return Instruction(0b0); +} + +Instruction add_gpr64_imm(Register reg, int64_t imm) { + return Instruction(0b0); +} + +Instruction sub_gpr64_imm(Register reg, int64_t imm) { + return Instruction(0b0); +} + +Instruction add_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction sub_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction imul_gpr32_gpr32(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction imul_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction idiv_gpr32(Register reg) { + return Instruction(0b0); +} + +Instruction unsigned_div_gpr32(Register reg) { + return Instruction(0b0); +} + +Instruction cdq() { + return Instruction(0b0); +} + +Instruction movsx_r64_r32(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction cmp_gpr64_gpr64(Register a, Register b) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction or_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction and_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction xor_gpr64_gpr64(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction not_gpr64(Register reg) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction shl_gpr64_cl(Register reg) { + return Instruction(0b0); +} + +Instruction shr_gpr64_cl(Register reg) { + return Instruction(0b0); +} + +Instruction sar_gpr64_cl(Register reg) { + return Instruction(0b0); +} + +Instruction shl_gpr64_u8(Register reg, uint8_t sa) { + return Instruction(0b0); +} + +Instruction shr_gpr64_u8(Register reg, uint8_t sa) { + return Instruction(0b0); +} + +Instruction sar_gpr64_u8(Register reg, uint8_t sa) { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction jmp_32() { + return Instruction(0b0); +} + +Instruction je_32() { + return Instruction(0b0); +} + +Instruction jne_32() { + return Instruction(0b0); +} + +Instruction jle_32() { + return Instruction(0b0); +} + +Instruction jge_32() { + return Instruction(0b0); +} + +Instruction jl_32() { + return Instruction(0b0); +} + +Instruction jg_32() { + return Instruction(0b0); +} + +Instruction jbe_32() { + return Instruction(0b0); +} + +Instruction jae_32() { + return Instruction(0b0); +} + +Instruction jb_32() { + return Instruction(0b0); +} + +Instruction ja_32() { + return Instruction(0b0); +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction cmp_flt_flt(Register a, Register b) { + return Instruction(0b0); +} + +Instruction sqrts_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction mulss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction divss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction subss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction addss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction minss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction maxss_xmm_xmm(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction int32_to_float(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction float_to_int32(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction nop() { + return Instruction(0b0); +} + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction null() { + return Instruction(0b0); +} + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +Instruction nop_vf() { + return Instruction(0b0); +} + +Instruction wait_vf() { + return Instruction(0b0); +} + +Instruction mov_vf_vf(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { + return Instruction(0b0); +} + +Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, + Register addr1, + Register addr2, + s64 offset) { + return Instruction(0b0); +} + +Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { + return Instruction(0b0); +} + +// TODO - rip relative loads and stores. + +Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { + return Instruction(0b0); +} + +Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { + return Instruction(0b0); +} + +Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { + return Instruction(0b0); +} + +Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { + return Instruction(0b0); +} + +Instruction xor_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction sub_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction add_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction mul_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction max_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction min_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction div_vf(Register dst, Register src1, Register src2) { + return Instruction(0b0); +} + +Instruction sqrt_vf(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction itof_vf(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction ftoi_vf(Register dst, Register src) { + return Instruction(0b0); +} + +Instruction pw_sra(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction pw_srl(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction ph_srl(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction pw_sll(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} +Instruction ph_sll(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction parallel_add_byte(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextub_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextuh_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextuw_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextlb_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextlh_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pextlw_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction pcpyud(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction vpsubd(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} + +Instruction vpsrldq(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction vpslldq(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction vpshuflw(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction vpshufhw(Register dst, Register src, u8 imm) { + return Instruction(0b0); +} + +Instruction vpackuswb(Register dst, Register src0, Register src1) { + return Instruction(0b0); +} +} // namespace IGen +} // namespace emitter + +#endif \ No newline at end of file diff --git a/goalc/emitter/IGenX86.cpp b/goalc/emitter/IGenX86.cpp new file mode 100644 index 00000000000..024be6d8b19 --- /dev/null +++ b/goalc/emitter/IGenX86.cpp @@ -0,0 +1,2501 @@ +#ifndef __aarch64__ + +#include "IGen.h" + +namespace emitter { +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction mov_gpr64_gpr64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +Instruction mov_gpr64_u64(Register dst, uint64_t val) { + ASSERT(dst.is_gpr()); + bool rex_b = false; + auto dst_hw_id = dst.hw_id(); + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + InstructionX86 instr(0xb8 + dst_hw_id); + instr.set(REX(true, false, false, rex_b)); + instr.set(Imm(8, val)); + return instr; +} + +Instruction mov_gpr64_u32(Register dst, uint64_t val) { + ASSERT(val <= UINT32_MAX); + ASSERT(dst.is_gpr()); + auto dst_hw_id = dst.hw_id(); + bool rex_b = false; + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + + InstructionX86 instr(0xb8 + dst_hw_id); + if (rex_b) { + instr.set(REX(false, false, false, rex_b)); + } + instr.set(Imm(4, val)); + return instr; +} + +Instruction mov_gpr64_s32(Register dst, int64_t val) { + ASSERT(val >= INT32_MIN && val <= INT32_MAX); + ASSERT(dst.is_gpr()); + InstructionX86 instr(0xc7); + instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); + instr.set(Imm(4, val)); + return instr; +} + +Instruction movd_gpr32_xmm32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction movd_xmm32_gpr32(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction movq_gpr64_xmm64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +Instruction movq_xmm64_gpr64(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +Instruction mov_xmm32_xmm32(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { + if (offset == 0) { + return storevf_gpr64_plus_gpr64(value, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); + } + ASSERT(false); + return InstructionX86(0); +} + +Instruction store_goal_gpr(Register addr, Register value, Register off, int offset, int size) { + switch (size) { + case 1: + if (offset == 0) { + return store8_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + return store16_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + return store32_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return store64_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { + if (offset == 0) { + return loadvf_gpr64_plus_gpr64(dst, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend) { + switch (size) { + case 1: + if (offset == 0) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); + + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); + instr.set(Imm(4, offset)); + return instr; +} + +Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); + instr.set(Imm(1, offset)); + return instr; +} + +Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return lea_reg_plus_off8(dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return lea_reg_plus_off32(dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { + if (offset == 0) { + return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { + if (offset == 0) { + return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_value.is_128bit_simd()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_dest.is_128bit_simd()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM128 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_128bit_simd()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_128bit_simd()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { + if (offset == 0) { + return load128_xmm128_gpr64(xmm_dest, base); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load128_xmm128_gpr64_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load128_xmm128_gpr64_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { + if (offset == 0) { + return store128_gpr64_xmm128(base, xmm_val); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store128_gpr64_xmm128_s8(base, xmm_val, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store128_gpr64_xmm128_s32(base, xmm_val, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load64_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); + return instr; +} + +Instruction load16u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load16s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { + switch (size) { + case 1: + if (sign_extend) { + return load8s_rip_s32(dest, offset); + } else { + return load8u_rip_s32(dest, offset); + } + break; + case 2: + if (sign_extend) { + return load16s_rip_s32(dest, offset); + } else { + return load16u_rip_s32(dest, offset); + } + break; + case 4: + if (sign_extend) { + return load32s_rip_s32(dest, offset); + } else { + return load32u_rip_s32(dest, offset); + } + break; + case 8: + return load64_rip_s32(dest, offset); + default: + ASSERT(false); + } +} + +Instruction store64_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); + return instr; +} + +Instruction store32_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + return instr; +} + +Instruction store16_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store8_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + if (src.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction static_store(Register value, s64 offset, int size) { + switch (size) { + case 1: + return store8_rip_s32(value, offset); + case 2: + return store16_rip_s32(value, offset); + case 4: + return store32_rip_s32(value, offset); + case 8: + return store64_rip_s32(value, offset); + default: + ASSERT(false); + } +} + +Instruction static_addr(Register dst, s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); + return instr; +} + +Instruction static_load_xmm32(Register xmm_dest, s64 offset) { + ASSERT(xmm_dest.is_128bit_simd()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction static_store_xmm32(Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_128bit_simd()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { + ASSERT(dst_reg.is_gpr()); + ASSERT(src_reg.is_gpr()); + InstructionX86 instr(0x8b); + instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { + ASSERT(addr.is_gpr()); + ASSERT(value.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction ret() { + return InstructionX86(0xc3); +} + +Instruction push_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x50 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x50 + reg.hw_id()); +} + +Instruction pop_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x58 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x58 + reg.hw_id()); +} + +Instruction call_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 2; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +Instruction jmp_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 4; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); + // SUB r/m64, imm8 : REX.W + 83 /5 ib + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(4, imm)); + return instr; +} + +Instruction add_gpr64_imm8s(Register reg, int64_t v) { + ASSERT(v >= INT8_MIN && v <= INT8_MAX); + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(1, v)); + return instr; +} + +Instruction add_gpr64_imm32s(Register reg, int64_t v) { + ASSERT(v >= INT32_MIN && v <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(4, v)); + return instr; +} + +Instruction add_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return add_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return add_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction sub_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return sub_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return sub_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction add_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x01); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +Instruction sub_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x29); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +Instruction imul_gpr32_gpr32(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + return instr; +} + +Instruction imul_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +Instruction idiv_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); + return instr; +} + +Instruction unsigned_div_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); + return instr; +} + +Instruction cdq() { + InstructionX86 instr(0x99); + return instr; +} + +Instruction movsx_r64_r32(Register dst, Register src) { + InstructionX86 instr(0x63); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +Instruction cmp_gpr64_gpr64(Register a, Register b) { + InstructionX86 instr(0x3b); + ASSERT(a.is_gpr()); + ASSERT(b.is_gpr()); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction or_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x0b); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +Instruction and_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x23); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +Instruction xor_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x33); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +Instruction not_gpr64(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction shl_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + return instr; +} + +Instruction shr_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + return instr; +} + +Instruction sar_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + return instr; +} + +Instruction shl_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +Instruction shr_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +Instruction sar_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction jmp_32() { + InstructionX86 instr(0xe9); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction je_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x84); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jne_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x85); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jle_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8e); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jge_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8d); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jl_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8c); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jg_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8f); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jbe_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x86); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jae_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x83); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction jb_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x82); + instr.set(Imm(4, 0)); + return instr; +} + +Instruction ja_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x87); + instr.set(Imm(4, 0)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction cmp_flt_flt(Register a, Register b) { + ASSERT(a.is_128bit_simd()); + ASSERT(b.is_128bit_simd()); + InstructionX86 instr(0x0f); + instr.set_op2(0x2e); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); + return instr; +} + +Instruction sqrts_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x51); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction mulss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x59); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction divss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction subss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction addss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x58); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction minss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5d); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction maxss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5f); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction int32_to_float(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2a); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction float_to_int32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction nop() { + // NOP + InstructionX86 instr(0x90); + return instr; +} + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction null() { + InstructionX86 i(0); + i.m_flags |= InstructionX86::kIsNull; + return i; +} + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +Instruction nop_vf() { + InstructionX86 instr(0xd9); // FNOP + instr.set_op2(0xd0); + return instr; +} + +Instruction wait_vf() { + InstructionX86 instr(0x9B); // FWAIT / WAIT + return instr; +} + +Instruction mov_vf_vf(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + + if (src.hw_id() >= 8 && dst.hw_id() < 8) { + // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the + // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. + // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } else { + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } +} + +Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { + ASSERT(value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_128bit_simd()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { + ASSERT(dest.is_128bit_simd()); + ASSERT(offset >= INT32_MIN); + ASSERT(offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); + return instr; +} + +// TODO - rip relative loads and stores. + +Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { + ASSERT(!(mask & 0b11110000)); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x0c); // VBLENDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, src1.hw_id(), + false, VexPrefix::P_66); + instr.set(Imm(1, mask)); + return instr; +} + +Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + ASSERT(dx < 4); + ASSERT(dy < 4); + ASSERT(dz < 4); + ASSERT(dw < 4); + u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); + return swizzle_vf(dst, src, imm); + + // SSE encoding version: + // InstructionX86 instr(0x0f); + // instr.set_op2(0xc6); + // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + // instr.set(Imm(1, imm)); + // return instr; +} + +Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0xC6); // VSHUFPS + + // we use the AVX "VEX" encoding here. This is a three-operand form, + // but we just set both source + // to the same register. It seems like this is one byte longer but is faster maybe? + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); + instr.set(Imm(1, controlBytes)); + return instr; +} + +Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { + switch (element) { + case Register::VF_ELEMENT::X: // Least significant element + return swizzle_vf(dst, src, 0b00000000); + break; + case Register::VF_ELEMENT::Y: + return swizzle_vf(dst, src, 0b01010101); + break; + case Register::VF_ELEMENT::Z: + return swizzle_vf(dst, src, 0b10101010); + break; + case Register::VF_ELEMENT::W: // Most significant element + return swizzle_vf(dst, src, 0b11111111); + break; + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction xor_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x57); // VXORPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sub_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x5c); // VSUBPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction add_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x58); // VADDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction mul_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x59); // VMULPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction max_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x5F); // VMAXPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction min_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x5D); // VMINPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction div_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); + InstructionX86 instr(0x5E); // VDIVPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sqrt_vf(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0x51); // VSQRTPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); + return instr; +} + +Instruction itof_vf(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + InstructionX86 instr(0x5b); // VCVTDQ2PS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); + return instr; +} + +Instruction ftoi_vf(Register dst, Register src) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 + InstructionX86 instr(0x5b); // VCVTTPS2DQ + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + return instr; +} + +Instruction pw_sra(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction ph_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 71 /2 ib VPSRLW + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} +Instruction ph_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction parallel_add_byte(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xFC); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEF); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xDB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextub_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x68); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x69); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6a); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlb_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x60); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x61); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x62); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x74); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x75); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x76); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x64); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x65); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x66); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6c); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { + return vpunpcklqdq(dst, src0, src1); +} + +Instruction pcpyud(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6d); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsubd(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 + // reg, vec, r/m + InstructionX86 instr(0xfa); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsrldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpslldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshuflw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F2); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshufhw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); + // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpackuswb(Register dst, Register src0, Register src1) { + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + + InstructionX86 instr(0x67); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} +} // namespace IGen +} // namespace emitter + +#endif \ No newline at end of file diff --git a/goalc/emitter/Instruction.h b/goalc/emitter/Instruction.h index b2bd0357abd..e22055a6d3d 100644 --- a/goalc/emitter/Instruction.h +++ b/goalc/emitter/Instruction.h @@ -1,12 +1,43 @@ #pragma once -#ifndef JAK_INSTRUCTION_H -#define JAK_INSTRUCTION_H +#include #include "common/common_types.h" #include "common/util/Assert.h" namespace emitter { +/*! + * A high-level description of a opcode. It can emit itself. + */ +template +struct InstructionImpl { + /*! + * Emit into a buffer and return how many bytes written (can be zero) + */ + u8 emit(u8* buffer) const { return static_cast(this)->emit(buffer); } + + u8 length() const { return static_cast(this)->length(); } +}; + +// TODO probably separate these because x86 has a ton + +struct InstructionARM64 : InstructionImpl { + // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a + // single 32-bit word in that stream. The encoding of an ARM instruction is: + // TODO + // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf + u32 instruction_encoding; + + InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} + + uint8_t emit(uint8_t* buffer) const { + memcpy(buffer, &instruction_encoding, 4); + return 4; + } + + uint8_t length() const { return 4; } +}; + /*! * The ModRM byte */ @@ -133,13 +164,7 @@ struct VEX2 { : R(r), reg_id(_reg_id), prefix(_prefix), L(l) {} }; -/*! - * A high-level description of an x86-64 opcode. It can emit itself. - */ -struct Instruction { - Instruction(uint8_t opcode) : op(opcode) {} - uint8_t op; - +struct InstructionX86 : InstructionImpl { enum Flags { kOp2Set = (1 << 0), kOp3Set = (1 << 1), @@ -151,23 +176,27 @@ struct Instruction { kSetImm = (1 << 7), }; + InstructionX86(u8 opcode) : op(opcode) {} + + u8 op; + u8 m_flags = 0; - uint8_t op2; + u8 op2; - uint8_t op3; + u8 op3; u8 n_vex = 0; - uint8_t vex[3] = {0, 0, 0}; + u8 vex[3] = {0, 0, 0}; // the rex byte - uint8_t m_rex = 0; + u8 m_rex = 0; // the modrm byte - uint8_t m_modrm = 0; + u8 m_modrm = 0; // the sib byte - uint8_t m_sib = 0; + u8 m_sib = 0; // the displacement Imm disp; @@ -924,9 +953,6 @@ struct Instruction { return offset; } - /*! - * Emit into a buffer and return how many bytes written (can be zero) - */ uint8_t emit(uint8_t* buffer) const { if (m_flags & kIsNull) return 0; @@ -1015,6 +1041,11 @@ struct Instruction { return count; } }; -} // namespace emitter -#endif // JAK_INSTRUCTION_H +#ifdef __aarch64__ +using Instruction = InstructionARM64; +#else +using Instruction = InstructionX86; +#endif + +} // namespace emitter diff --git a/goalc/emitter/ObjectGenerator.cpp b/goalc/emitter/ObjectGenerator.cpp index 64b6e764ecb..c98bcca2b14 100644 --- a/goalc/emitter/ObjectGenerator.cpp +++ b/goalc/emitter/ObjectGenerator.cpp @@ -386,6 +386,7 @@ void ObjectGenerator::handle_temp_static_ptr_links(int seg) { * m_jump_temp_links_by_seg patching after memory layout is done */ void ObjectGenerator::handle_temp_jump_links(int seg) { +#ifndef __aarch64__ for (const auto& link : m_jump_temp_links_by_seg.at(seg)) { // we need to compute three offsets, all relative to the start of data. // 1). the location of the patch (the immediate of the opcode) @@ -411,6 +412,9 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { patch_data(seg, patch_location, dest_rip - source_rip); } +#else +// TODO - ARM64 +#endif } /*! @@ -419,6 +423,7 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { * after memory layout is done and before link tables are generated */ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { +#ifndef __aarch64__ for (const auto& links : m_symbol_instr_temp_links_by_seg.at(seg)) { const auto& sym_name = links.first; for (const auto& link : links.second) { @@ -436,6 +441,9 @@ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { m_sym_links_by_seg.at(seg)[sym_name].push_back(offset_of_instruction + offset_in_instruction); } } +#else +// TODO - ARM64 +#endif } void ObjectGenerator::handle_temp_rip_func_links(int seg) { @@ -539,6 +547,7 @@ void ObjectGenerator::emit_link_ptr(int seg) { } void ObjectGenerator::emit_link_rip(int seg) { +#ifndef __aarch64__ auto& out = m_link_by_seg.at(seg); for (auto& rec : m_rip_links_by_seg.at(seg)) { // kind (u8) @@ -564,6 +573,9 @@ void ObjectGenerator::emit_link_rip(int seg) { src_func.instruction_to_byte_in_data.at(rec.instr.instr_id) + src_instr.offset_of_disp(), out); } +#else +// TODO - ARM64 +#endif } void ObjectGenerator::emit_link_table(int seg, const TypeSystem* ts) { diff --git a/goalc/emitter/Register.h b/goalc/emitter/Register.h index 44ff8df3323..9adc48db2df 100644 --- a/goalc/emitter/Register.h +++ b/goalc/emitter/Register.h @@ -145,12 +145,25 @@ class Register { // intentionally not explicit so we can use X86_REGs in place of Registers Register(int id) : m_id(id) {} - bool is_xmm() const { return m_id >= XMM0 && m_id <= XMM15; } + bool is_128bit_simd() const { +#ifndef __aarch64__ + return m_id >= XMM0 && m_id <= XMM15; +#else + return m_id >= Q0 && m_id <= Q31; +#endif + } - bool is_gpr() const { return m_id >= RAX && m_id <= R15; } + bool is_gpr() const { +#ifndef __aarch64__ + return m_id >= RAX && m_id <= R15; +#else + return m_id >= X0 && m_id <= X30; +#endif + } int hw_id() const { - if (is_xmm()) { + // TODO - ARM64, even needed? + if (is_128bit_simd()) { return m_id - XMM0; } else if (is_gpr()) { return m_id - RAX; diff --git a/scripts/tasks/Taskfile_darwin.yml b/scripts/tasks/Taskfile_darwin.yml index def3dba28ea..753a1897182 100644 --- a/scripts/tasks/Taskfile_darwin.yml +++ b/scripts/tasks/Taskfile_darwin.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './build' GOALCTEST_BIN_RELEASE_DIR: './build' EXE_FILE_EXTENSION: '' + CMAKE_PRESET: 'Release-macos-clang' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' diff --git a/scripts/tasks/Taskfile_linux.yml b/scripts/tasks/Taskfile_linux.yml index def3dba28ea..28db06884c3 100644 --- a/scripts/tasks/Taskfile_linux.yml +++ b/scripts/tasks/Taskfile_linux.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './build' GOALCTEST_BIN_RELEASE_DIR: './build' EXE_FILE_EXTENSION: '' + CMAKE_PRESET: 'TODO' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' diff --git a/scripts/tasks/Taskfile_windows.yml b/scripts/tasks/Taskfile_windows.yml index 597e5f175ce..a4f3824ab52 100644 --- a/scripts/tasks/Taskfile_windows.yml +++ b/scripts/tasks/Taskfile_windows.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './out/build/Release/bin' GOALCTEST_BIN_RELEASE_DIR: './out/build/Release/bin' EXE_FILE_EXTENSION: '.exe' + CMAKE_PRESET: 'TODO' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' diff --git a/test/test_CodeTester.cpp b/test/test_CodeTester.cpp index a70c1e77eb1..4df693677e8 100644 --- a/test/test_CodeTester.cpp +++ b/test/test_CodeTester.cpp @@ -50,7 +50,7 @@ TEST(CodeTester, execute_push_pop_gprs) { tester.execute(); } -TEST(CodeTester, xmm_store_128) { +TEST(CodeTester, simd_store_128) { CodeTester tester; tester.init_code_buffer(256); // movdqa [rbx], xmm3 diff --git a/test/test_emitter.cpp b/test/test_emitter.cpp index b39ce889a7e..46ecedfd49f 100644 --- a/test/test_emitter.cpp +++ b/test/test_emitter.cpp @@ -1,3901 +1,3903 @@ -#include "goalc/emitter/CodeTester.h" -#include "goalc/emitter/IGen.h" -#include "gtest/gtest.h" - -using namespace emitter; - -TEST(EmitterIntegerMath, add_gpr64_imm8s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val + imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::add_gpr64_imm8s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::add_gpr64_imm8s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 83 c4 0c"); -} - -TEST(EmitterIntegerMath, add_gpr64_imm32s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val + imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::add_gpr64_imm32s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::add_gpr64_imm32s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 81 c4 0c 00 00 00"); -} - -TEST(EmitterIntegerMath, sub_gpr64_imm8s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val - imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::sub_gpr64_imm8s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::sub_gpr64_imm8s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 83 ec 0c"); -} - -TEST(EmitterIntegerMath, sub_gpr64_imm32s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val - imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::sub_gpr64_imm32s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::sub_gpr64_imm32s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 81 ec 0c 00 00 00"); -} - -TEST(EmitterIntegerMath, add_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 + v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::add_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, sub_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 - v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::sub_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, mul_gpr32_gpr32) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = { - 0, 1, -2, -20, 123123, INT32_MIN, INT32_MAX, INT32_MIN + 1, INT32_MAX - 1}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - // this is kind of weird behavior, but it's what the PS2 CPU does, I think. - // the lower 32-bits of the result are sign extended, even if this sign doesn't match - // the sign of the real product. This is true for both signed and unsigned multiply. - auto expected = ((s64(v1) * s64(v2)) << 32) >> 32; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, (s64)v1)); - tester.emit(IGen::mov_gpr64_u64(j, (s64)v2)); - tester.emit(IGen::imul_gpr32_gpr32(i, j)); - tester.emit(IGen::movsx_r64_r32(RAX, i)); // weird PS2 sign extend. - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, or_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 | v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::or_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, and_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 & v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::and_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, xor_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 ^ v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::xor_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, not_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v1 : vals) { - auto expected = ~v1; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::not_gpr64(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } -} - -TEST(EmitterIntegerMath, shl_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v << sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::shl_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shr_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), - INT64_MAX, 117, 32, u64(-348473), 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::shr_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, sar_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::sar_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shl_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v << sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::shl_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shr_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), - INT64_MAX, 117, 32, u64(-348473), 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::shr_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, sar_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::sar_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, jumps) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector reads; - - auto x = IGen::jmp_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::je_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jne_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jle_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jge_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jl_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jg_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jbe_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jae_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jb_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::ja_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - for (auto off : reads) { - EXPECT_EQ(0, tester.read(off)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "E9000000000F84000000000F85000000000F8E000000000F8D000000000F8C000000000F8F000000000F86" - "000000000F83000000000F82000000000F8700000000"); -} - -TEST(EmitterIntegerMath, null) { - auto instr = IGen::null(); - EXPECT_EQ(0, instr.emit(nullptr)); -} - -TEST(EmitterLoadsAndStores, load_constant_64_and_move_gpr_gpr_64) { - std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; - - // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. - // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after - - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } - } -} - -TEST(EmitterLoadsAndStores, load_constant_32_unsigned) { - std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; - - // test loading 32-bit constants, with all upper 32-bits zero. - // this uses a different opcode than 64-bit loads. - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(r1, UINT64_MAX)); - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } -} - -TEST(EmitterLoadsAndStores, load_constant_32_signed) { - std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; - - // test loading signed 32-bit constants. for values < 0 this will sign extend. - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : s32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_s32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 04 1e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f be 24 1e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f be 24 3e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f be 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 44 1e fd"); - - auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 84 1e fd ff ff ff"); - - auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 04 1e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b6 24 1e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b6 24 3e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b6 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 44 1e fd"); - - auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 84 1e fd ff ff ff"); - - auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 04 1e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f bf 24 1e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f bf 24 3e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f bf 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 44 1e fd"); - - auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 84 1e fd ff ff ff"); - - auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 04 1e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b7 24 1e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b7 24 3e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b7 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 44 1e fd"); - - auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 84 1e fd ff ff ff"); - - auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 44 1e fd"); - - auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 84 1e fd ff ff ff"); - - auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "8b 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "8b 44 1e fd"); - - auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "8b 84 1e fd ff ff ff"); - - auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 44 1e fd"); - - auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 84 1e fd ff ff ff"); - - auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(RAX, RCX, RDX)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 14 01"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 54 01 0c"); - - auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 94 01 0c 00 00 00"); - - auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 44 01 0c"); - - auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 84 01 0c 00 00 00"); - - auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12, 0xffffffff12341234, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 0x12341234); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 44 01 0c"); - - auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s32(0xffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 84 01 0c 00 00 00"); - - auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s32(0xffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24, 0xffffffff12341234, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 0xffffffff12341234); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 44 01 0c"); - - auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 84 01 0c 00 00 00"); - - auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load64_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load64_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "488B050C000000488B0D0C000000488B150C000000488B1D0C000000488B250C000000488B2D0C00000048" - "8B350C000000488B3D0C0000004C8B050C0000004C8B0D0C0000004C8B150C0000004C8B1D0C0000004C8B" - "250C0000004C8B2D0C0000004C8B350C0000004C8B3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load32s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load32s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load32s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "4863050C00000048630D0C0000004863150C00000048631D0C0000004863250C00000048632D0C00000048" - "63350C00000048633D0C0000004C63050C0000004C630D0C0000004C63150C0000004C631D0C0000004C63" - "250C0000004C632D0C0000004C63350C0000004C633D0C000000"); -} - -TEST(EmitterLoadsAndStores, load32u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load32u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "8b 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load32u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "8B050C0000008B0D0C0000008B150C0000008B1D0C0000008B250C0000008B2D0C0000008B350C0000008B" - "3D0C000000448B050C000000448B0D0C000000448B150C000000448B1D0C000000448B250C000000448B2D" - "0C000000448B350C000000448B3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load16u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load16u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load16u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FB7050C000000480FB70D0C000000480FB7150C000000480FB71D0C000000480FB7250C000000480FB7" - "2D0C000000480FB7350C000000480FB73D0C0000004C0FB7050C0000004C0FB70D0C0000004C0FB7150C00" - "00004C0FB71D0C0000004C0FB7250C0000004C0FB72D0C0000004C0FB7350C0000004C0FB73D0C000000"); -} - -TEST(EmitterLoadsAndStores, load16s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load16s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load16s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FBF050C000000480FBF0D0C000000480FBF150C000000480FBF1D0C000000480FBF250C000000480FBF" - "2D0C000000480FBF350C000000480FBF3D0C0000004C0FBF050C0000004C0FBF0D0C0000004C0FBF150C00" - "00004C0FBF1D0C0000004C0FBF250C0000004C0FBF2D0C0000004C0FBF350C0000004C0FBF3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load8s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load8s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load8s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FBE050C000000480FBE0D0C000000480FBE150C000000480FBE1D0C000000480FBE250C000000480FBE" - "2D0C000000480FBE350C000000480FBE3D0C0000004C0FBE050C0000004C0FBE0D0C0000004C0FBE150C00" - "00004C0FBE1D0C0000004C0FBE250C0000004C0FBE2D0C0000004C0FBE350C0000004C0FBE3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load8u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load8u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load8u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FB6050C000000480FB60D0C000000480FB6150C000000480FB61D0C000000480FB6250C000000480FB6" - "2D0C000000480FB6350C000000480FB63D0C0000004C0FB6050C0000004C0FB60D0C0000004C0FB6150C00" - "00004C0FB61D0C0000004C0FB6250C0000004C0FB62D0C0000004C0FB6350C0000004C0FB63D0C000000"); -} - -TEST(EmitterLoadsAndStores, store64_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store64_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store64_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "4889050C00000048890D0C0000004889150C00000048891D0C0000004889250C00000048892D0C00000048" - "89350C00000048893D0C0000004C89050C0000004C890D0C0000004C89150C0000004C891D0C0000004C89" - "250C0000004C892D0C0000004C89350C0000004C893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store32_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store32_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store32_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "89050C000000890D0C00000089150C000000891D0C00000089250C000000892D0C00000089350C00000089" - "3D0C0000004489050C00000044890D0C0000004489150C00000044891D0C0000004489250C00000044892D" - "0C0000004489350C00000044893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store16_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store16_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store16_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "6689050C00000066890D0C0000006689150C00000066891D0C0000006689250C00000066892D0C00000066" - "89350C00000066893D0C000000664489050C0000006644890D0C000000664489150C0000006644891D0C00" - "0000664489250C0000006644892D0C000000664489350C0000006644893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store8_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store8_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store8_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "88050C000000880D0C00000088150C000000881D0C0000004088250C00000040882D0C0000004088350C00" - "000040883D0C0000004488050C00000044880D0C0000004488150C00000044881D0C0000004488250C0000" - "0044882D0C0000004488350C00000044883D0C000000"); -} - -TEST(EmitterLoadsAndStores, static_addr) { - CodeTester tester; - tester.init_code_buffer(512); - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, 12345)); // load test reg with junk - int start_of_lea = tester.size(); - auto lea_instr = IGen::static_addr(i, INT32_MAX); - tester.emit(lea_instr); - // patch instruction to lea the start of this code + 1. - tester.write(-start_of_lea - lea_instr.length() + 1, - start_of_lea + lea_instr.offset_of_disp()); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute(); - EXPECT_EQ(result, (u64)(tester.data()) + 1); - } -} - -#ifdef __linux__ -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM3, RAX, RBX)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 1c 03"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM0 + k, i, j)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float), 0, 0), 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float), 0, 0), 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float), 0, 0), 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float), 0, 0), 0); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RAX, RBX, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 5c 03 ff"); - - auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM0 + k, i, j, -3)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, 0), 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, 0, 0), 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + 3, 0, 0), 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) + 3, 0, 0), 0); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RAX, RBX, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 9c 03 ff ff ff ff"); - - auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RBX, RSI, -1234); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM0 + k, i, j, offset)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) - offset, 0, 0), - 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) - offset, 0, 0), - 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) - offset, 0, 0), - 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) - offset, 0, 0), - 0); - iter++; - } - } - } -} - -namespace { -template -float as_float(T x) { - float result; - memcpy(&result, &x, sizeof(float)); - return result; -} - -u32 as_u32(float x) { - u32 result; - memcpy(&result, &x, 4); - return result; -} -} // namespace - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(RAX, RBX, XMM7)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 3c 03"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - // store - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(i, j, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RAX, RBX, XMM3, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 5c 03 ff"); - - auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RBX, RSI, XMM3, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - s64 offset = (iter & 1) ? INT8_MAX : INT8_MIN; - - // load into k - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(i, j, XMM0 + k, offset)); - - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RAX, RBX, XMM3, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 9c 03 ff ff ff ff"); - - auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RBX, RSI, XMM3, -1234); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; - - // load into k - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(i, j, XMM0 + k, offset)); - - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, static_load_xmm32) { - CodeTester tester; - tester.init_code_buffer(512); - for (int i = 0; i < 16; i++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - - auto loc_of_load = tester.size(); - auto load_instr = IGen::static_load_xmm32(XMM0 + i, INT32_MAX); - - tester.emit(load_instr); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto loc_of_float = tester.emit_data(float(1.2345f)); - - // patch offset - tester.write(loc_of_float - loc_of_load - load_instr.length(), - loc_of_load + load_instr.offset_of_disp()); - - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, 1.2345f); - } -} - -TEST(EmitterXmm32, static_store_xmm32) { - CodeTester tester; - tester.init_code_buffer(512); - for (int i = 0; i < 16; i++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, tester.get_c_abi_arg_reg(0))); - - auto loc_of_store = tester.size(); - auto store_instr = IGen::static_store_xmm32(XMM0 + i, INT32_MAX); - - tester.emit(store_instr); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto loc_of_float = tester.emit_data(float(1.2345f)); - - tester.write(loc_of_float - loc_of_store - store_instr.length(), - loc_of_store + store_instr.offset_of_disp()); - tester.execute(as_u32(-44.567f), 0, 0, 0); - EXPECT_FLOAT_EQ(-44.567f, tester.read(loc_of_float)); - } -} - -TEST(EmitterXmm32, ucomiss) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::cmp_flt_flt(XMM13, XMM14)); - EXPECT_EQ("45 0f 2e ee", tester.dump_to_hex_string()); -} - -TEST(EmitterXmm32, mul) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = f * g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::mulss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, div) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g / f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::divss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, add) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g + f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::addss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, sub) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g - f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::subss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, float_to_int) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, - 7.545f, 0.1f, 0.9f, -0.1f, -0.9f}; - - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (j == RSP) { - continue; - } - s32 expected = g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - tester.emit(IGen::float_to_int32(j, XMM0 + i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterXmm32, int_to_float) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0, 1, -1, INT32_MAX, -3457343, 7, INT32_MIN}; - - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (j == RSP) { - continue; - } - float expected = g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(j, g)); - tester.emit(IGen::int32_to_float(XMM0 + i, j)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterSlow, xmm32_move) { - std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; - - // test moving between xmms (32-bit) and gprs. - CodeTester tester; - tester.init_code_buffer(512); - - for (auto constant : u32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - for (int r3 = 0; r3 < 16; r3++) { - for (int r4 = 0; r4 < 16; r4++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // move constant to gpr - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - // move gpr to xmm - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + r3, r1)); - // move xmm to xmm - tester.emit(IGen::mov_xmm32_xmm32(XMM0 + r4, XMM0 + r3)); - // move xmm to gpr - tester.emit(IGen::movd_gpr32_xmm32(r2, XMM0 + r4)); - // return! - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - } - } - } - } - } - // todo - finish this test -} -#endif - -TEST(Emitter, LEA) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3)); - tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3)); - tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3)); - tester.emit(IGen::lea_reg_plus_off(R13, R12, -3)); - tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300)); - tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300)); - tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300)); - tester.emit(IGen::lea_reg_plus_off(R13, R12, -300)); - EXPECT_EQ(tester.dump_to_hex_string(true), - "488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF" - "FF4D8DAC24D4FEFFFF"); -} - -TEST(EmitterXMM, StackLoad32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234)); - tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF"); -} - -TEST(EmitterXMM, StackLoad8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12)); - tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4"); -} - -TEST(EmitterXMM, StackLoadFull32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234)); - tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF"); -} - -TEST(EmitterXMM, StackLoadFull8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12)); - tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4"); -} - -TEST(EmitterXMM, StackStore32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234)); - tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF"); -} - -TEST(EmitterXMM, StackStore8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12)); - tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4"); -} - -TEST(EmitterXMM, StackStoreFull32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234)); - tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF"); -} - -TEST(EmitterXMM, StackStoreFull8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12)); - tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4"); -} - -TEST(EmitterXMM, SqrtS) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 2)); - tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 2)); - tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 12)); - tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F51CAF3440F51DAF3410F51CCF3450F51DC"); -} +// #include "goalc/emitter/CodeTester.h" +// #include "goalc/emitter/IGen.h" +// #include "gtest/gtest.h" + +// using namespace emitter; + +// TEST(EmitterIntegerMath, add_gpr64_imm8s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val + imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::add_gpr64_imm8s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::add_gpr64_imm8s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 83 c4 0c"); +// } + +// TEST(EmitterIntegerMath, add_gpr64_imm32s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val + imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::add_gpr64_imm32s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::add_gpr64_imm32s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 81 c4 0c 00 00 00"); +// } + +// TEST(EmitterIntegerMath, sub_gpr64_imm8s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val - imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::sub_gpr64_imm8s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::sub_gpr64_imm8s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 83 ec 0c"); +// } + +// TEST(EmitterIntegerMath, sub_gpr64_imm32s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val - imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::sub_gpr64_imm32s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::sub_gpr64_imm32s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 81 ec 0c 00 00 00"); +// } + +// TEST(EmitterIntegerMath, add_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 + v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::add_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sub_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 - v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::sub_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, mul_gpr32_gpr32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = { +// 0, 1, -2, -20, 123123, INT32_MIN, INT32_MAX, INT32_MIN + 1, INT32_MAX - 1}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// // this is kind of weird behavior, but it's what the PS2 CPU does, I think. +// // the lower 32-bits of the result are sign extended, even if this sign doesn't match +// // the sign of the real product. This is true for both signed and unsigned multiply. +// auto expected = ((s64(v1) * s64(v2)) << 32) >> 32; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, (s64)v1)); +// tester.emit(IGen::mov_gpr64_u64(j, (s64)v2)); +// tester.emit(IGen::imul_gpr32_gpr32(i, j)); +// tester.emit(IGen::movsx_r64_r32(RAX, i)); // weird PS2 sign extend. +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, or_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 | v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::or_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, and_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 & v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::and_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, xor_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 ^ v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::xor_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, not_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v1 : vals) { +// auto expected = ~v1; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::not_gpr64(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// TEST(EmitterIntegerMath, shl_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v << sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::shl_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shr_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), +// INT64_MAX, 117, 32, u64(-348473), 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::shr_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sar_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::sar_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shl_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v << sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::shl_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shr_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), +// INT64_MAX, 117, 32, u64(-348473), 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::shr_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sar_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::sar_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, jumps) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector reads; + +// auto x = IGen::jmp_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::je_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jne_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jle_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jge_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jl_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jg_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jbe_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jae_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jb_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::ja_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// for (auto off : reads) { +// EXPECT_EQ(0, tester.read(off)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "E9000000000F84000000000F85000000000F8E000000000F8D000000000F8C000000000F8F000000000F86" +// "000000000F83000000000F82000000000F8700000000"); +// } + +// TEST(EmitterIntegerMath, null) { +// auto instr = IGen::null(); +// EXPECT_EQ(0, instr.emit(nullptr)); +// } + +// TEST(EmitterLoadsAndStores, load_constant_64_and_move_gpr_gpr_64) { +// std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; + +// // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. +// // rsp is skipping because that's the stack pointer and would prevent us from popping gprs +// after + +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : u64_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// for (int r2 = 0; r2 < 16; r2++) { +// if (r2 == RSP) { +// continue; +// } +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load_constant_32_unsigned) { +// std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; + +// // test loading 32-bit constants, with all upper 32-bits zero. +// // this uses a different opcode than 64-bit loads. +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : u64_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(r1, UINT64_MAX)); +// tester.emit(IGen::mov_gpr64_u32(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load_constant_32_signed) { +// std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; + +// // test loading signed 32-bit constants. for values < 0 this will sign extend. +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : s32_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_s32(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f be 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f be 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f be 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 44 1e fd"); + +// auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 84 1e fd ff ff ff"); + +// auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b6 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b6 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b6 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 44 1e fd"); + +// auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 84 1e fd ff ff ff"); + +// auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f bf 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f bf 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f bf 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 44 1e fd"); + +// auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 84 1e fd ff ff ff"); + +// auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b7 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b7 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b7 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 44 1e fd"); + +// auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 84 1e fd ff ff ff"); + +// auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 44 1e fd"); + +// auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 84 1e fd ff ff ff"); + +// auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 44 1e fd"); + +// auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 84 1e fd ff ff ff"); + +// auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 44 1e fd"); + +// auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 84 1e fd ff ff ff"); + +// auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(RAX, RCX, RDX)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 14 01"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 54 01 0c"); + +// auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 94 01 0c 00 00 00"); + +// auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 44 01 0c"); + +// auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12, 0xffffffff12341234, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 0x12341234); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 44 01 0c"); + +// auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s32(0xffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s32(0xffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24, 0xffffffff12341234, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 0xffffffff12341234); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 44 01 0c"); + +// auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load64_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load64_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "488B050C000000488B0D0C000000488B150C000000488B1D0C000000488B250C000000488B2D0C00000048" +// "8B350C000000488B3D0C0000004C8B050C0000004C8B0D0C0000004C8B150C0000004C8B1D0C0000004C8B" +// "250C0000004C8B2D0C0000004C8B350C0000004C8B3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load32s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load32s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load32s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "4863050C00000048630D0C0000004863150C00000048631D0C0000004863250C00000048632D0C00000048" +// "63350C00000048633D0C0000004C63050C0000004C630D0C0000004C63150C0000004C631D0C0000004C63" +// "250C0000004C632D0C0000004C63350C0000004C633D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load32u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load32u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load32u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "8B050C0000008B0D0C0000008B150C0000008B1D0C0000008B250C0000008B2D0C0000008B350C0000008B" +// "3D0C000000448B050C000000448B0D0C000000448B150C000000448B1D0C000000448B250C000000448B2D" +// "0C000000448B350C000000448B3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load16u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load16u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load16u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FB7050C000000480FB70D0C000000480FB7150C000000480FB71D0C000000480FB7250C000000480FB7" +// "2D0C000000480FB7350C000000480FB73D0C0000004C0FB7050C0000004C0FB70D0C0000004C0FB7150C00" +// "00004C0FB71D0C0000004C0FB7250C0000004C0FB72D0C0000004C0FB7350C0000004C0FB73D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load16s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load16s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load16s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FBF050C000000480FBF0D0C000000480FBF150C000000480FBF1D0C000000480FBF250C000000480FBF" +// "2D0C000000480FBF350C000000480FBF3D0C0000004C0FBF050C0000004C0FBF0D0C0000004C0FBF150C00" +// "00004C0FBF1D0C0000004C0FBF250C0000004C0FBF2D0C0000004C0FBF350C0000004C0FBF3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load8s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load8s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load8s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FBE050C000000480FBE0D0C000000480FBE150C000000480FBE1D0C000000480FBE250C000000480FBE" +// "2D0C000000480FBE350C000000480FBE3D0C0000004C0FBE050C0000004C0FBE0D0C0000004C0FBE150C00" +// "00004C0FBE1D0C0000004C0FBE250C0000004C0FBE2D0C0000004C0FBE350C0000004C0FBE3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load8u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load8u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load8u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FB6050C000000480FB60D0C000000480FB6150C000000480FB61D0C000000480FB6250C000000480FB6" +// "2D0C000000480FB6350C000000480FB63D0C0000004C0FB6050C0000004C0FB60D0C0000004C0FB6150C00" +// "00004C0FB61D0C0000004C0FB6250C0000004C0FB62D0C0000004C0FB6350C0000004C0FB63D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store64_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store64_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store64_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "4889050C00000048890D0C0000004889150C00000048891D0C0000004889250C00000048892D0C00000048" +// "89350C00000048893D0C0000004C89050C0000004C890D0C0000004C89150C0000004C891D0C0000004C89" +// "250C0000004C892D0C0000004C89350C0000004C893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store32_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store32_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store32_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "89050C000000890D0C00000089150C000000891D0C00000089250C000000892D0C00000089350C00000089" +// "3D0C0000004489050C00000044890D0C0000004489150C00000044891D0C0000004489250C00000044892D" +// "0C0000004489350C00000044893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store16_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store16_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store16_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "6689050C00000066890D0C0000006689150C00000066891D0C0000006689250C00000066892D0C00000066" +// "89350C00000066893D0C000000664489050C0000006644890D0C000000664489150C0000006644891D0C00" +// "0000664489250C0000006644892D0C000000664489350C0000006644893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store8_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store8_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store8_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "88050C000000880D0C00000088150C000000881D0C0000004088250C00000040882D0C0000004088350C00" +// "000040883D0C0000004488050C00000044880D0C0000004488150C00000044881D0C0000004488250C0000" +// "0044882D0C0000004488350C00000044883D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, static_addr) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, 12345)); // load test reg with junk +// int start_of_lea = tester.size(); +// auto lea_instr = IGen::static_addr(i, INT32_MAX); +// tester.emit(lea_instr); +// // patch instruction to lea the start of this code + 1. +// tester.write(-start_of_lea - lea_instr.length() + 1, +// start_of_lea + lea_instr.offset_of_disp()); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute(); +// EXPECT_EQ(result, (u64)(tester.data()) + 1); +// } +// } + +// #ifdef __linux__ +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM3, RAX, RBX)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 1c 03"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM0 + k, i, j)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float), 0, 0), 3.45f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float), 0, 0), 1.23f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float), 0, 0), 5.67f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float), 0, 0), 0); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RAX, RBX, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 5c 03 ff"); + +// auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM0 + k, i, j, -3)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, +// 0), 3.45f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, +// 0, 0), 1.23f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + +// 3, 0, 0), 5.67f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * +// sizeof(float) + 3, 0, 0), 0); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RAX, RBX, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 9c 03 ff ff ff ff"); + +// auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RBX, RSI, -1234); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM0 + k, i, j, offset)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) - offset, 0, 0), +// 3.45f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) - offset, 0, 0), +// 1.23f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) - offset, 0, 0), +// 5.67f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) - offset, 0, 0), +// 0); +// iter++; +// } +// } +// } +// } + +// namespace { +// template +// float as_float(T x) { +// float result; +// memcpy(&result, &x, sizeof(float)); +// return result; +// } + +// u32 as_u32(float x) { +// u32 result; +// memcpy(&result, &x, 4); +// return result; +// } +// } // namespace + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(RAX, RBX, XMM7)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 3c 03"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack + +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// // store +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(i, j, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RAX, RBX, XMM3, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 5c 03 ff"); + +// auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RBX, RSI, XMM3, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// s64 offset = (iter & 1) ? INT8_MAX : INT8_MIN; + +// // load into k +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(i, j, XMM0 + k, offset)); + +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RAX, RBX, XMM3, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 9c 03 ff ff ff ff"); + +// auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RBX, RSI, XMM3, -1234); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + +// // load into k +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(i, j, XMM0 + k, offset)); + +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, static_load_xmm32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// for (int i = 0; i < 16; i++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); + +// auto loc_of_load = tester.size(); +// auto load_instr = IGen::static_load_xmm32(XMM0 + i, INT32_MAX); + +// tester.emit(load_instr); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto loc_of_float = tester.emit_data(float(1.2345f)); + +// // patch offset +// tester.write(loc_of_float - loc_of_load - load_instr.length(), +// loc_of_load + load_instr.offset_of_disp()); + +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, 1.2345f); +// } +// } + +// TEST(EmitterXmm32, static_store_xmm32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// for (int i = 0; i < 16; i++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, tester.get_c_abi_arg_reg(0))); + +// auto loc_of_store = tester.size(); +// auto store_instr = IGen::static_store_xmm32(XMM0 + i, INT32_MAX); + +// tester.emit(store_instr); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto loc_of_float = tester.emit_data(float(1.2345f)); + +// tester.write(loc_of_float - loc_of_store - store_instr.length(), +// loc_of_store + store_instr.offset_of_disp()); +// tester.execute(as_u32(-44.567f), 0, 0, 0); +// EXPECT_FLOAT_EQ(-44.567f, tester.read(loc_of_float)); +// } +// } + +// TEST(EmitterXmm32, ucomiss) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::cmp_flt_flt(XMM13, XMM14)); +// EXPECT_EQ("45 0f 2e ee", tester.dump_to_hex_string()); +// } + +// TEST(EmitterXmm32, mul) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = f * g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::mulss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, div) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g / f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::divss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, add) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g + f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::addss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, sub) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g - f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::subss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, float_to_int) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, +// 7.545f, 0.1f, 0.9f, -0.1f, -0.9f}; + +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (j == RSP) { +// continue; +// } +// s32 expected = g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// tester.emit(IGen::float_to_int32(j, XMM0 + i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterXmm32, int_to_float) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0, 1, -1, INT32_MAX, -3457343, 7, INT32_MIN}; + +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (j == RSP) { +// continue; +// } +// float expected = g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(j, g)); +// tester.emit(IGen::int32_to_float(XMM0 + i, j)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterSlow, xmm32_move) { +// std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; + +// // test moving between xmms (32-bit) and gprs. +// CodeTester tester; +// tester.init_code_buffer(512); + +// for (auto constant : u32_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } +// for (int r2 = 0; r2 < 16; r2++) { +// if (r2 == RSP) { +// continue; +// } +// for (int r3 = 0; r3 < 16; r3++) { +// for (int r4 = 0; r4 < 16; r4++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // move constant to gpr +// tester.emit(IGen::mov_gpr64_u32(r1, constant)); +// // move gpr to xmm +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + r3, r1)); +// // move xmm to xmm +// tester.emit(IGen::mov_xmm32_xmm32(XMM0 + r4, XMM0 + r3)); +// // move xmm to gpr +// tester.emit(IGen::movd_gpr32_xmm32(r2, XMM0 + r4)); +// // return! +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// } +// } +// } +// } +// } +// // todo - finish this test +// } +// #endif + +// TEST(Emitter, LEA) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3)); +// tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3)); +// tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3)); +// tester.emit(IGen::lea_reg_plus_off(R13, R12, -3)); +// tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300)); +// tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300)); +// tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300)); +// tester.emit(IGen::lea_reg_plus_off(R13, R12, -300)); +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF" +// "FF4D8DAC24D4FEFFFF"); +// } + +// TEST(EmitterXMM, StackLoad32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234)); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackLoad8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12)); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4"); +// } + +// TEST(EmitterXMM, StackLoadFull32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234)); +// tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackLoadFull8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12)); +// tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4"); +// } + +// TEST(EmitterXMM, StackStore32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234)); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackStore8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12)); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4"); +// } + +// TEST(EmitterXMM, StackStoreFull32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234)); +// tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackStoreFull8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12)); +// tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4"); +// } + +// TEST(EmitterXMM, SqrtS) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 2)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 2)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 12)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F51CAF3440F51DAF3410F51CCF3450F51DC"); +// }