From c63e6bdaaae3f1865c2c32333c68f457815b6b48 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Thu, 26 Oct 2023 13:47:11 -0400 Subject: [PATCH 01/12] arm: fix arm64 compilation errors --- game/graphics/opengl_renderer/background/Shrub.cpp | 2 +- game/graphics/opengl_renderer/background/TFragment.cpp | 2 +- game/graphics/opengl_renderer/background/Tie3.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/game/graphics/opengl_renderer/background/Shrub.cpp b/game/graphics/opengl_renderer/background/Shrub.cpp index 66dba11a46d..172d9613fd3 100644 --- a/game/graphics/opengl_renderer/background/Shrub.cpp +++ b/game/graphics/opengl_renderer/background/Shrub.cpp @@ -240,7 +240,7 @@ void Shrub::render_tree(int idx, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif tree.perf.tod_time.add(interp_timer.getSeconds()); diff --git a/game/graphics/opengl_renderer/background/TFragment.cpp b/game/graphics/opengl_renderer/background/TFragment.cpp index a4efc782869..b92b7478dbb 100644 --- a/game/graphics/opengl_renderer/background/TFragment.cpp +++ b/game/graphics/opengl_renderer/background/TFragment.cpp @@ -430,7 +430,7 @@ void TFragment::render_tree(int geom, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_1D, tree.time_of_day_texture); diff --git a/game/graphics/opengl_renderer/background/Tie3.cpp b/game/graphics/opengl_renderer/background/Tie3.cpp index 51dc5894518..35eca4bf014 100644 --- a/game/graphics/opengl_renderer/background/Tie3.cpp +++ b/game/graphics/opengl_renderer/background/Tie3.cpp @@ -434,7 +434,7 @@ void Tie3::setup_tree(int idx, interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); } #else - interp_time_of_day_slow(settings.itimes, *tree.colors, m_color_result.data()); + interp_time_of_day_slow(settings.camera.itimes, *tree.colors, m_color_result.data()); #endif glActiveTexture(GL_TEXTURE10); From 0b667d306a53c90650512267f0c8bffc8d0adaec Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Thu, 26 Oct 2023 13:47:21 -0400 Subject: [PATCH 02/12] task: add some tasks to make building via command line faster --- Taskfile.yml | 8 ++++++++ scripts/tasks/Taskfile_darwin.yml | 2 ++ scripts/tasks/Taskfile_linux.yml | 2 ++ scripts/tasks/Taskfile_windows.yml | 2 ++ 4 files changed, 14 insertions(+) diff --git a/Taskfile.yml b/Taskfile.yml index ab9394ac423..af1ccff1639 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -92,6 +92,14 @@ tasks: ignore_error: true - cmd: npx prettier --write ./decompiler/config/jak2/**/*.jsonc ignore_error: true + gen-cmake: + desc: "Generate the CMake" + cmds: + - "cmake -B build --preset={{.CMAKE_PRESET}}" + build: + desc: "Build the project using the generated CMake" + cmds: + - "cmake --build build --parallel {{.CMAKE_NUM_THREADS}}" # DECOMPILING decomp: cmds: diff --git a/scripts/tasks/Taskfile_darwin.yml b/scripts/tasks/Taskfile_darwin.yml index def3dba28ea..753a1897182 100644 --- a/scripts/tasks/Taskfile_darwin.yml +++ b/scripts/tasks/Taskfile_darwin.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './build' GOALCTEST_BIN_RELEASE_DIR: './build' EXE_FILE_EXTENSION: '' + CMAKE_PRESET: 'Release-macos-clang' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' diff --git a/scripts/tasks/Taskfile_linux.yml b/scripts/tasks/Taskfile_linux.yml index def3dba28ea..28db06884c3 100644 --- a/scripts/tasks/Taskfile_linux.yml +++ b/scripts/tasks/Taskfile_linux.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './build' GOALCTEST_BIN_RELEASE_DIR: './build' EXE_FILE_EXTENSION: '' + CMAKE_PRESET: 'TODO' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' diff --git a/scripts/tasks/Taskfile_windows.yml b/scripts/tasks/Taskfile_windows.yml index 597e5f175ce..a4f3824ab52 100644 --- a/scripts/tasks/Taskfile_windows.yml +++ b/scripts/tasks/Taskfile_windows.yml @@ -9,3 +9,5 @@ vars: OFFLINETEST_BIN_RELEASE_DIR: './out/build/Release/bin' GOALCTEST_BIN_RELEASE_DIR: './out/build/Release/bin' EXE_FILE_EXTENSION: '.exe' + CMAKE_PRESET: 'TODO' + CMAKE_NUM_THREADS: '$((`sysctl -n hw.logicalcpu`))' From 5a0cd2d281607a970b6095b3a9492be534a176b8 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Thu, 26 Oct 2023 20:58:20 -0400 Subject: [PATCH 03/12] arm: start working through `CodeTester` file --- .vscode/launch.json | 22 ++++++++++++++++++++-- Taskfile.yml | 3 +++ common/util/os.cpp | 1 + game/runtime.cpp | 7 +++++++ game/system/hid/input_bindings.h | 2 +- goalc/emitter/CodeTester.cpp | 26 ++++++++++++++++++++++++++ 6 files changed, 58 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 9d77fdb9505..ab489aa64ea 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,22 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Run C++ Tests LLDB", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/goalc-test", + "args": [ + "--gtest_brief=0", + "--gtest_filter=*CodeTester*", + "--gtest_break_on_failure" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "lldb" + }, { "name": "Append File Docs", "type": "python", @@ -11,7 +27,9 @@ "program": "${workspaceFolder}/scripts/ci/lint-characters.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}", - "args": ["--fix"] + "args": [ + "--fix" + ] }, ] -} +} \ No newline at end of file diff --git a/Taskfile.yml b/Taskfile.yml index af1ccff1639..8715cacde5f 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -180,3 +180,6 @@ tasks: type-test: cmds: - cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*Jak2TypeConsistency*" --gtest_break_on_failure' + tests-filtered: + cmds: + - cmd: '{{.GOALCTEST_BIN_RELEASE_DIR}}/goalc-test --gtest_brief=0 --gtest_filter="*{{.FILTER}}*" --gtest_break_on_failure' diff --git a/common/util/os.cpp b/common/util/os.cpp index 0b49c25d4bd..56c33582c8b 100644 --- a/common/util/os.cpp +++ b/common/util/os.cpp @@ -30,6 +30,7 @@ void __cpuidex(int result[4], int eax, int ecx) { : "0"(eax), "2"(ecx)); } #else +// TODO - implement ARM64 detection, check for NEON instead of AVX // for now, just return 0's. void __cpuidex(int result[4], int eax, int ecx) { lg::warn("cpuid not implemented on this platform"); diff --git a/game/runtime.cpp b/game/runtime.cpp index 36f1a9d4ea6..7309b5b83b0 100644 --- a/game/runtime.cpp +++ b/game/runtime.cpp @@ -145,6 +145,13 @@ void deci2_runner(SystemThreadInterface& iface) { void ee_runner(SystemThreadInterface& iface) { prof().root_event(); // Allocate Main RAM. Must have execute enabled. + // TODO Apple Silicone - You cannot make a page be RWX, + // or more specifically it can't be both writable and executable at the same time + // + // https://github.com/zherczeg/sljit/issues/99 + // + // The solution to this is to flip-flop between permissions, or perhaps have two threads + // one that has writing permission, and another with executable permission if (EE_MEM_LOW_MAP) { g_ee_main_mem = (u8*)mmap((void*)0x10000000, EE_MAIN_MEM_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, diff --git a/game/system/hid/input_bindings.h b/game/system/hid/input_bindings.h index 20650849e3f..b2207ab9590 100644 --- a/game/system/hid/input_bindings.h +++ b/game/system/hid/input_bindings.h @@ -336,7 +336,7 @@ extern const InputBindingGroups DEFAULT_MOUSE_BINDS; // So there are some potential solutions but this doesn't feel high priority and this was always an // issue. struct CommandBinding { - enum Source { CONTROLLER, KEYBOARD, MOUSE }; + enum class Source { CONTROLLER, KEYBOARD, MOUSE }; CommandBinding(const u32 _host_key, std::function _command) : host_key(_host_key), command(_command){}; diff --git a/goalc/emitter/CodeTester.cpp b/goalc/emitter/CodeTester.cpp index f8f1216f572..2f4aa81b335 100644 --- a/goalc/emitter/CodeTester.cpp +++ b/goalc/emitter/CodeTester.cpp @@ -119,7 +119,14 @@ void CodeTester::clear() { * Execute the buffered code with no arguments, return the value of RAX. */ u64 CodeTester::execute() { +#if defined(__APPLE__) && defined(__aarch64__) + mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ); + auto ret = ((u64(*)())code_buffer)(); + mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ); + return ret; +#else return ((u64(*)())code_buffer)(); +#endif } /*! @@ -127,15 +134,34 @@ u64 CodeTester::execute() { * arguments will appear in (will handle windows/linux differences) */ u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) { +#if defined(__APPLE__) && defined(__aarch64__) + mprotect(code_buffer, code_buffer_capacity, PROT_EXEC | PROT_READ); + auto ret = ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); + mprotect(code_buffer, code_buffer_capacity, PROT_WRITE | PROT_READ); + return ret; +#else return ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); +#endif } /*! * Allocate a code buffer of the given size. */ void CodeTester::init_code_buffer(int capacity) { +// TODO Apple Silicone - You cannot make a page be RWX, +// or more specifically it can't be both writable and executable at the same time +// +// https://github.com/zherczeg/sljit/issues/99 +// +// The solution to this is to flip-flop between permissions, or perhaps have two threads +// one that has writing permission, and another with executable permission +#if defined(__APPLE__) && defined(__aarch64__) + code_buffer = + (u8*)mmap(nullptr, capacity, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); +#else code_buffer = (u8*)mmap(nullptr, capacity, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); +#endif if (code_buffer == (u8*)(-1)) { ASSERT_MSG(false, "[CodeTester] Failed to map memory!"); } From dc863b21f2e88f8800eb1838a71d75115ca13114 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Thu, 26 Oct 2023 20:58:59 -0400 Subject: [PATCH 04/12] arm-wip: start trying to abstract instruction emission code --- goalc/CMakeLists.txt | 2 + goalc/compiler/IR.cpp | 7 +- goalc/emitter/IGen.h | 3527 +++---------- goalc/emitter/IGenARM64.cpp | 2749 ++++++++++ goalc/emitter/IGenX86.cpp | 2748 ++++++++++ goalc/emitter/Instruction.h | 1010 +--- goalc/emitter/InstructionARM64.h | 29 + goalc/emitter/InstructionX86.h | 1011 ++++ goalc/emitter/ObjectGenerator.cpp | 15 + test/test_CodeTester.cpp | 34 +- test/test_emitter.cpp | 7802 ++++++++++++++--------------- 11 files changed, 11268 insertions(+), 7666 deletions(-) create mode 100644 goalc/emitter/IGenARM64.cpp create mode 100644 goalc/emitter/IGenX86.cpp create mode 100644 goalc/emitter/InstructionARM64.h create mode 100644 goalc/emitter/InstructionX86.h diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index f2ae00b0959..fdcfd61d3ad 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -53,6 +53,8 @@ add_library(compiler data_compiler/DataObjectGenerator.cpp debugger/Debugger.cpp debugger/DebugInfo.cpp + emitter/IGenX86.cpp + emitter/IGenARM64.cpp listener/Listener.cpp listener/MemoryMap.cpp make/MakeSystem.cpp diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 37d81a340ad..124e3c20fa6 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -862,7 +862,9 @@ RegAllocInstr IR_ConditionalBranch::to_rai() { void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, const AllocationResult& allocs, emitter::IR_Record irec) { - Instruction jump_instr(0); + Instruction jump_instr; + #ifndef __aarch64__ + jump_instr = InstructionX86(0); ASSERT(m_resolved); switch (condition.kind) { case ConditionKind::EQUAL: @@ -903,6 +905,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, default: ASSERT(false); } + #else + // TODO - ARM64 + #endif if (condition.is_float) { gen->add_instr( diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index 334666c62e0..cc79c996fec 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -8,2757 +8,804 @@ #include "common/util/Assert.h" namespace emitter { -class IGen { - public: - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // MOVES - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - /*! - * Move data from src to dst. Moves all 64-bits of the GPR. - */ - static Instruction mov_gpr64_gpr64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - Instruction instr(0x89); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - /*! - * Move a 64-bit constant into a register. - */ - static Instruction mov_gpr64_u64(Register dst, uint64_t val) { - ASSERT(dst.is_gpr()); - bool rex_b = false; - auto dst_hw_id = dst.hw_id(); - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - Instruction instr(0xb8 + dst_hw_id); - instr.set(REX(true, false, false, rex_b)); - instr.set(Imm(8, val)); - return instr; - } - - /*! - * Move a 32-bit constant into a register. Zeros the upper 32 bits. - */ - static Instruction mov_gpr64_u32(Register dst, uint64_t val) { - ASSERT(val <= UINT32_MAX); - ASSERT(dst.is_gpr()); - auto dst_hw_id = dst.hw_id(); - bool rex_b = false; - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - - Instruction instr(0xb8 + dst_hw_id); - if (rex_b) { - instr.set(REX(false, false, false, rex_b)); - } - instr.set(Imm(4, val)); - return instr; - } - - /*! - * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. - * When possible prefer mov_gpr64_u32. (use this only for negative values...) - * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. - */ - static Instruction mov_gpr64_s32(Register dst, int64_t val) { - ASSERT(val >= INT32_MIN && val <= INT32_MAX); - ASSERT(dst.is_gpr()); - Instruction instr(0xc7); - instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); - instr.set(Imm(4, val)); - return instr; - } - - /*! - * Move 32-bits of xmm to 32 bits of gpr (no sign extension). - */ - static Instruction movd_gpr32_xmm32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 32-bits of gpr to 32-bits of xmm (no sign extension) - */ - static Instruction movd_xmm32_gpr32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 64-bits of xmm to 64 bits of gpr (no sign extension). - */ - static Instruction movq_gpr64_xmm64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 64-bits of gpr to 64-bits of xmm (no sign extension) - */ - static Instruction movq_xmm64_gpr64(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Move 32-bits between xmm's - */ - static Instruction mov_xmm32_xmm32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - // todo - GPR64 -> XMM64 (zext) - // todo - XMM -> GPR64 - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // GOAL Loads and Stores - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * movsx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Move data from src to dst. Moves all 64-bits of the GPR. + */ +extern Instruction mov_gpr64_gpr64(Register dst, Register src); + +/*! + * Move a 64-bit constant into a register. + */ +extern Instruction mov_gpr64_u64(Register dst, uint64_t val); + +/*! + * Move a 32-bit constant into a register. Zeros the upper 32 bits. + */ +extern Instruction mov_gpr64_u32(Register dst, uint64_t val); + +/*! + * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. + * When possible prefer mov_gpr64_u32. (use this only for negative values...) + * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. + */ +extern Instruction mov_gpr64_s32(Register dst, int64_t val); + +/*! + * Move 32-bits of xmm to 32 bits of gpr (no sign extension). + */ +extern Instruction movd_gpr32_xmm32(Register dst, Register src); + +/*! + * Move 32-bits of gpr to 32-bits of xmm (no sign extension) + */ +extern Instruction movd_xmm32_gpr32(Register dst, Register src); + +/*! + * Move 64-bits of xmm to 64 bits of gpr (no sign extension). + */ +extern Instruction movq_gpr64_xmm64(Register dst, Register src); + +/*! + * Move 64-bits of gpr to 64-bits of xmm (no sign extension) + */ +extern Instruction movq_xmm64_gpr64(Register dst, Register src); + +/*! + * Move 32-bits between xmm's + */ +extern Instruction mov_xmm32_xmm32(Register dst, Register src); + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * movsx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +/*! + * movzx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +/*! + * movsx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - /*! - * movzx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +extern Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movsx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; - } - - static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movzx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; - } - - static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - /*! - * movsxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; - } - - static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - /*! - * movzxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; - } - - static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; - } - - /*! - * mov dst, QWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ - static Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - true); - return instr; - } - - static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +/*! + * movzx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register value, - s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; - } - - static Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { - if (offset == 0) { - return storevf_gpr64_plus_gpr64(value, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); - } - ASSERT(false); - return {0}; - } - - static Instruction store_goal_gpr(Register addr, - Register value, - Register off, - int offset, - int size) { - switch (size) { - case 1: - if (offset == 0) { - return store8_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - return store16_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - return store32_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return store64_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - default: - ASSERT(false); - return {0}; - } - } - - static Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { - if (offset == 0) { - return loadvf_gpr64_plus_gpr64(dst, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - ASSERT(false); - return {0}; - } - } - - /*! - * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. - * This will pick the appropriate fancy addressing mode instruction. - */ - static Instruction load_goal_gpr(Register dst, - Register addr, - Register off, - int offset, - int size, - bool sign_extend) { - switch (size) { - case 1: - if (offset == 0) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); - - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - - } else { - ASSERT(false); - } - default: - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // LOADS n' STORES - XMM32 - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - static Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, - Register addr2, - Register xmm_value) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); + s64 offset); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); +/*! + * movsxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); - instr.swap_op0_rex(); - return instr; - } +extern Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); - static Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, - Register addr1, - Register addr2) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, - Register addr2, - Register xmm_value, - s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, +extern Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, - s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, - Register addr2, - Register xmm_value, - s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); - instr.set(Imm(4, offset)); - return instr; - } - - static Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); - instr.set(Imm(1, offset)); - return instr; - } - - static Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return lea_reg_plus_off8(dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return lea_reg_plus_off32(dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), - addr2.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { - if (offset == 0) { - return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { - if (offset == 0) { - return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_value.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // LOADS n' STORES - XMM128 - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Store a 128-bit xmm into an address stored in a register, no offset - */ - static Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x66); - // Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; - } - - static Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { - if (offset == 0) { - return load128_xmm128_gpr64(xmm_dest, base); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load128_xmm128_gpr64_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load128_xmm128_gpr64_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return {0}; - } - } - - static Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { - if (offset == 0) { - return store128_gpr64_xmm128(base, xmm_val); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store128_gpr64_xmm128_s8(base, xmm_val, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store128_gpr64_xmm128_s32(base, xmm_val, offset); - } else { - ASSERT(false); - return {0}; - } - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // RIP loads and stores - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - static Instruction load64_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load32s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x63); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load32u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); - return instr; - } - - static Instruction load16u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load16s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load8u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction load8s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; - } - - static Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { - switch (size) { - case 1: - if (sign_extend) { - return load8s_rip_s32(dest, offset); - } else { - return load8u_rip_s32(dest, offset); - } - break; - case 2: - if (sign_extend) { - return load16s_rip_s32(dest, offset); - } else { - return load16u_rip_s32(dest, offset); - } - break; - case 4: - if (sign_extend) { - return load32s_rip_s32(dest, offset); - } else { - return load32u_rip_s32(dest, offset); - } - break; - case 8: - return load64_rip_s32(dest, offset); - default: - ASSERT(false); - } - } - - static Instruction store64_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); - return instr; - } - - static Instruction store32_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - return instr; - } - - static Instruction store16_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction store8_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x88); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - if (src.id() > RBX) { - instr.add_rex(); - } - return instr; - } - - static Instruction static_store(Register value, s64 offset, int size) { - switch (size) { - case 1: - return store8_rip_s32(value, offset); - case 2: - return store16_rip_s32(value, offset); - case 4: - return store32_rip_s32(value, offset); - case 8: - return store64_rip_s32(value, offset); - default: - ASSERT(false); - } - } - - static Instruction static_addr(Register dst, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x8d); - instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); - return instr; - } - - static Instruction static_load_xmm32(Register xmm_dest, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - static Instruction static_store_xmm32(Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; - } - - // TODO, special load/stores of 128 bit values. - - // TODO, consider specialized stack loads and stores? - static Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { - ASSERT(dst_reg.is_gpr()); - ASSERT(src_reg.is_gpr()); - Instruction instr(0x8b); - instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; - } - - /*! - * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. - */ - static Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { - ASSERT(addr.is_gpr()); - ASSERT(value.is_gpr()); - Instruction instr(0x89); - instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // FUNCTION STUFF - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - /*! - * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. - */ - static Instruction ret() { return Instruction(0xc3); } - - /*! - * Instruction to push gpr (64-bits) onto the stack - */ - static Instruction push_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = Instruction(0x50 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return Instruction(0x50 + reg.hw_id()); - } - - /*! - * Instruction to pop 64 bit gpr from the stack - */ - static Instruction pop_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = Instruction(0x58 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return Instruction(0x58 + reg.hw_id()); - } - - /*! - * Call a function stored in a 64-bit gpr - */ - static Instruction call_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - Instruction instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 2; - mrm.mod = 3; - instr.set(mrm); - return instr; - } - - /*! - * Jump to an x86-64 address stored in a 64-bit gpr. - */ - static Instruction jmp_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - Instruction instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 4; - mrm.mod = 3; - instr.set(mrm); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // INTEGER MATH - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - static Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); - // SUB r/m64, imm8 : REX.W + 83 /5 ib - Instruction instr(0x83); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); - Instruction instr(0x81); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(4, imm)); - return instr; - } - - static Instruction add_gpr64_imm8s(Register reg, int64_t v) { - ASSERT(v >= INT8_MIN && v <= INT8_MAX); - Instruction instr(0x83); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(1, v)); - return instr; - } - - static Instruction add_gpr64_imm32s(Register reg, int64_t v) { - ASSERT(v >= INT32_MIN && v <= INT32_MAX); - Instruction instr(0x81); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(4, v)); - return instr; - } - - static Instruction add_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return add_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return add_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } - } - - static Instruction sub_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return sub_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return sub_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } - } - - static Instruction add_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x01); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - static Instruction sub_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x29); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; - } - - /*! - * Multiply gprs (32-bit, signed). - * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) - */ - static Instruction imul_gpr32_gpr32(Register dst, Register src) { - Instruction instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - return instr; - } - - /*! - * Multiply gprs (64-bit, signed). - * DANGER - this treats all operands as 64-bit. This is not like the EE. - */ - static Instruction imul_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Divide (idiv, 32 bit) - */ - static Instruction idiv_gpr32(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); - return instr; - } - - static Instruction unsigned_div_gpr32(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); - return instr; - } - - /*! - * Convert doubleword to quadword for division. - */ - static Instruction cdq() { - Instruction instr(0x99); - return instr; - } - - /*! - * Move from gpr32 to gpr64, with sign extension. - * Needed for multiplication/divsion madness. - */ - static Instruction movsx_r64_r32(Register dst, Register src) { - Instruction instr(0x63); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Compare gpr64. This sets the flags for the jumps. - * todo UNTESTED - */ - static Instruction cmp_gpr64_gpr64(Register a, Register b) { - Instruction instr(0x3b); - ASSERT(a.is_gpr()); - ASSERT(b.is_gpr()); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // BIT STUFF - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Or of two gprs - */ - static Instruction or_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x0b); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * And of two gprs - */ - static Instruction and_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x23); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Xor of two gprs - */ - static Instruction xor_gpr64_gpr64(Register dst, Register src) { - Instruction instr(0x33); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; - } - - /*! - * Bitwise not a gpr - */ - static Instruction not_gpr64(Register reg) { - Instruction instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // SHIFTS - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Shift 64-bit gpr left by CL register - */ - static Instruction shl_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-bit gpr right (logical) by CL register - */ - static Instruction shr_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-bit gpr right (arithmetic) by CL register - */ - static Instruction sar_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - Instruction instr(0xd3); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - return instr; - } - - /*! - * Shift 64-ptr left (logical) by the constant shift amount "sa". - */ - static Instruction shl_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - /*! - * Shift 64-ptr right (logical) by the constant shift amount "sa". - */ - static Instruction shr_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - /*! - * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". - */ - static Instruction sar_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - Instruction instr(0xc1); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // CONTROL FLOW - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. - */ - static Instruction jmp_32() { - Instruction instr(0xe9); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump if equal. - */ - static Instruction je_32() { - Instruction instr(0x0f); - instr.set_op2(0x84); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump not equal. - */ - static Instruction jne_32() { - Instruction instr(0x0f); - instr.set_op2(0x85); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump less than or equal. - */ - static Instruction jle_32() { - Instruction instr(0x0f); - instr.set_op2(0x8e); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump greater than or equal. - */ - static Instruction jge_32() { - Instruction instr(0x0f); - instr.set_op2(0x8d); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump less than - */ - static Instruction jl_32() { - Instruction instr(0x0f); - instr.set_op2(0x8c); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump greater than - */ - static Instruction jg_32() { - Instruction instr(0x0f); - instr.set_op2(0x8f); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump below or equal - */ - static Instruction jbe_32() { - Instruction instr(0x0f); - instr.set_op2(0x86); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump above or equal - */ - static Instruction jae_32() { - Instruction instr(0x0f); - instr.set_op2(0x83); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump below - */ - static Instruction jb_32() { - Instruction instr(0x0f); - instr.set_op2(0x82); - instr.set(Imm(4, 0)); - return instr; - } - - /*! - * Jump above - */ - static Instruction ja_32() { - Instruction instr(0x0f); - instr.set_op2(0x87); - instr.set(Imm(4, 0)); - return instr; - } - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // FLOAT MATH - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * Compare two floats and set flag register for jump (ucomiss) - */ - static Instruction cmp_flt_flt(Register a, Register b) { - ASSERT(a.is_xmm()); - ASSERT(b.is_xmm()); - Instruction instr(0x0f); - instr.set_op2(0x2e); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); - return instr; - } - - static Instruction sqrts_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x51); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Multiply two floats in xmm's - */ - static Instruction mulss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x59); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Divide two floats in xmm's - */ - static Instruction divss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Subtract two floats in xmm's - */ - static Instruction subss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Add two floats in xmm's - */ - static Instruction addss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x58); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Floating point minimum. - */ - static Instruction minss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5d); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Floating point maximum. - */ - static Instruction maxss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5f); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Convert GPR int32 to XMM float (single precision) - */ - static Instruction int32_to_float(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2a); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - /*! - * Convert XMM float to GPR int32(single precision) (truncate) - */ - static Instruction float_to_int32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - Instruction instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; - } - - static Instruction nop() { - // NOP - Instruction instr(0x90); - return instr; - } - - // TODO - rsqrt / abs / sqrt - - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // UTILITIES - //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - /*! - * A "null" instruction. This instruction does not generate any bytes - * but can be referred to by a label. Useful to insert in place of a real instruction - * if the real instruction has been optimized out. - */ - static Instruction null() { - Instruction i(0); - i.m_flags |= Instruction::kIsNull; - return i; - } - - ///////////////////////////// - // AVX (VF - Vector Float) // - ///////////////////////////// - - static Instruction nop_vf() { - Instruction instr(0xd9); // FNOP - instr.set_op2(0xd0); - return instr; - } - - static Instruction wait_vf() { - Instruction instr(0x9B); // FWAIT / WAIT - return instr; - } - - static Instruction mov_vf_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - - if (src.hw_id() >= 8 && dst.hw_id() < 8) { - // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the - // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. - // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. - Instruction instr(0x29); - instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } else { - Instruction instr(0x28); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } - } - - static Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + s64 offset); + +extern Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +/*! + * movzxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +/*! + * mov dst, QWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +extern Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value); + +extern Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset); + +extern Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset); + +extern Instruction store_goal_gpr(Register addr, + Register value, + Register off, + int offset, + int size); + +extern Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset); + +/*! + * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. + * This will pick the appropriate fancy addressing mode instruction. + */ +extern Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +extern Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register xmm_value); + +extern Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2); + +extern Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset); + +extern Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset); + +extern Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset); + +extern Instruction lea_reg_plus_off(Register dest, Register base, s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset); + +extern Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset); + +extern Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset); + +extern Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset); + +extern Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset); + +extern Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset); + +extern Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM128 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Store a 128-bit xmm into an address stored in a register, no offset + */ +extern Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value); + +extern Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset); + +extern Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset); + +extern Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr); + +extern Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset); + +extern Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset); + +extern Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset); + +extern Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +extern Instruction load64_rip_s32(Register dest, s64 offset); + +extern Instruction load32s_rip_s32(Register dest, s64 offset); + +extern Instruction load32u_rip_s32(Register dest, s64 offset); + +extern Instruction load16u_rip_s32(Register dest, s64 offset); + +extern Instruction load16s_rip_s32(Register dest, s64 offset); + +extern Instruction load8u_rip_s32(Register dest, s64 offset); + +extern Instruction load8s_rip_s32(Register dest, s64 offset); + +extern Instruction static_load(Register dest, s64 offset, int size, bool sign_extend); + +extern Instruction store64_rip_s32(Register src, s64 offset); + +extern Instruction store32_rip_s32(Register src, s64 offset); + +extern Instruction store16_rip_s32(Register src, s64 offset); + +extern Instruction store8_rip_s32(Register src, s64 offset); + +extern Instruction static_store(Register value, s64 offset, int size); + +extern Instruction static_addr(Register dst, s64 offset); + +extern Instruction static_load_xmm32(Register xmm_dest, s64 offset); + +extern Instruction static_store_xmm32(Register xmm_value, s64 offset); + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +extern Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg); + +/*! + * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. + */ +extern Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. + */ +extern Instruction ret(); + +/*! + * Instruction to push gpr (64-bits) onto the stack + */ +extern Instruction push_gpr64(Register reg); + +/*! + * Instruction to pop 64 bit gpr from the stack + */ +extern Instruction pop_gpr64(Register reg); + +/*! + * Call a function stored in a 64-bit gpr + */ +extern Instruction call_r64(Register reg_); + +/*! + * Jump to an x86-64 address stored in a 64-bit gpr. + */ +extern Instruction jmp_r64(Register reg_); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +extern Instruction sub_gpr64_imm8s(Register reg, int64_t imm); + +extern Instruction sub_gpr64_imm32s(Register reg, int64_t imm); + +extern Instruction add_gpr64_imm8s(Register reg, int64_t v); + +extern Instruction add_gpr64_imm32s(Register reg, int64_t v); + +extern Instruction add_gpr64_imm(Register reg, int64_t imm); + +extern Instruction sub_gpr64_imm(Register reg, int64_t imm); + +extern Instruction add_gpr64_gpr64(Register dst, Register src); + +extern Instruction sub_gpr64_gpr64(Register dst, Register src); + +/*! + * Multiply gprs (32-bit, signed). + * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) + */ +extern Instruction imul_gpr32_gpr32(Register dst, Register src); + +/*! + * Multiply gprs (64-bit, signed). + * DANGER - this treats all operands as 64-bit. This is not like the EE. + */ +extern Instruction imul_gpr64_gpr64(Register dst, Register src); + +/*! + * Divide (idiv, 32 bit) + */ +extern Instruction idiv_gpr32(Register reg); + +extern Instruction unsigned_div_gpr32(Register reg); + +/*! + * Convert doubleword to quadword for division. + */ +extern Instruction cdq(); + +/*! + * Move from gpr32 to gpr64, with sign extension. + * Needed for multiplication/divsion madness. + */ +extern Instruction movsx_r64_r32(Register dst, Register src); + +/*! + * Compare gpr64. This sets the flags for the jumps. + * todo UNTESTED + */ +extern Instruction cmp_gpr64_gpr64(Register a, Register b); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Or of two gprs + */ +extern Instruction or_gpr64_gpr64(Register dst, Register src); + +/*! + * And of two gprs + */ +extern Instruction and_gpr64_gpr64(Register dst, Register src); + +/*! + * Xor of two gprs + */ +extern Instruction xor_gpr64_gpr64(Register dst, Register src); + +/*! + * Bitwise not a gpr + */ +extern Instruction not_gpr64(Register reg); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Shift 64-bit gpr left by CL register + */ +extern Instruction shl_gpr64_cl(Register reg); + +/*! + * Shift 64-bit gpr right (logical) by CL register + */ +extern Instruction shr_gpr64_cl(Register reg); + +/*! + * Shift 64-bit gpr right (arithmetic) by CL register + */ +extern Instruction sar_gpr64_cl(Register reg); + +/*! + * Shift 64-ptr left (logical) by the constant shift amount "sa". + */ +extern Instruction shl_gpr64_u8(Register reg, uint8_t sa); + +/*! + * Shift 64-ptr right (logical) by the constant shift amount "sa". + */ +extern Instruction shr_gpr64_u8(Register reg, uint8_t sa); + +/*! + * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". + */ +extern Instruction sar_gpr64_u8(Register reg, uint8_t sa); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. + */ +extern Instruction jmp_32(); + +/*! + * Jump if equal. + */ +extern Instruction je_32(); + +/*! + * Jump not equal. + */ +extern Instruction jne_32(); + +/*! + * Jump less than or equal. + */ +extern Instruction jle_32(); + +/*! + * Jump greater than or equal. + */ +extern Instruction jge_32(); + +/*! + * Jump less than + */ +extern Instruction jl_32(); + +/*! + * Jump greater than + */ +extern Instruction jg_32(); + +/*! + * Jump below or equal + */ +extern Instruction jbe_32(); + +/*! + * Jump above or equal + */ +extern Instruction jae_32(); + +/*! + * Jump below + */ +extern Instruction jb_32(); + +/*! + * Jump above + */ +extern Instruction ja_32(); + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Compare two floats and set flag register for jump (ucomiss) + */ +extern Instruction cmp_flt_flt(Register a, Register b); + +extern Instruction sqrts_xmm(Register dst, Register src); + +/*! + * Multiply two floats in xmm's + */ +extern Instruction mulss_xmm_xmm(Register dst, Register src); + +/*! + * Divide two floats in xmm's + */ +extern Instruction divss_xmm_xmm(Register dst, Register src); + +/*! + * Subtract two floats in xmm's + */ +extern Instruction subss_xmm_xmm(Register dst, Register src); + +/*! + * Add two floats in xmm's + */ +extern Instruction addss_xmm_xmm(Register dst, Register src); + +/*! + * Floating point minimum. + */ +extern Instruction minss_xmm_xmm(Register dst, Register src); + +/*! + * Floating point maximum. + */ +extern Instruction maxss_xmm_xmm(Register dst, Register src); + +/*! + * Convert GPR int32 to XMM float (single precision) + */ +extern Instruction int32_to_float(Register dst, Register src); + +/*! + * Convert XMM float to GPR int32(single precision) (truncate) + */ +extern Instruction float_to_int32(Register dst, Register src); + +extern Instruction nop(); + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * A "null" instruction. This instruction does not generate any bytes + * but can be referred to by a label. Useful to insert in place of a real instruction + * if the real instruction has been optimized out. + */ +extern Instruction null(); + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +extern Instruction nop_vf(); + +extern Instruction wait_vf(); + +extern Instruction mov_vf_vf(Register dst, Register src); + +extern Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2); + +extern Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2); + +extern Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset); + +extern Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, Register addr1, Register addr2, - s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8( - value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, - Register addr1, - Register addr2, - s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - Instruction instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32( - value.hw_id(), addr1.hw_id(), addr2.hw_id(), offset, VEX3::LeadingBytes::P_0F, false); - return instr; - } - - static Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { - ASSERT(dest.is_xmm()); - ASSERT(offset >= INT32_MIN); - ASSERT(offset <= INT32_MAX); - Instruction instr(0x28); - instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); - return instr; - } - - // TODO - rip relative loads and stores. - - static Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { - ASSERT(!(mask & 0b11110000)); - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x0c); // VBLENDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, - src1.hw_id(), false, VexPrefix::P_66); - instr.set(Imm(1, mask)); - return instr; - } - - static Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - ASSERT(dx < 4); - ASSERT(dy < 4); - ASSERT(dz < 4); - ASSERT(dw < 4); - u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); - return swizzle_vf(dst, src, imm); - - // SSE encoding version: - // Instruction instr(0x0f); - // instr.set_op2(0xc6); - // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - // instr.set(Imm(1, imm)); - // return instr; - } - - /* - Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. - Here's a brief run-down: - - 8-bits / 4 groups of 2 bits - - Right-to-left, each group is used to determine which element in `src` gets copied into - `dst`'s element (W->X). - - GROUP OPTIONS - - 00b - Copy the least-significant element (X) - - 01b - Copy the second element (from the right) (Y) - - 10b - Copy the third element (from the right) (Z) - - 11b - Copy the most significant element (W) - Examples - ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) - SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions - > (1.5, 1.5, 1.5, 1.5) - SHUFPS xmm1, xmm1, 0x39 ; Rotate right - > (4.5, 1.5, 2.5, 3.5) - */ - static Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0xC6); // VSHUFPS - - // we use the AVX "VEX" encoding here. This is a three-operand form, - // but we just set both source - // to the same register. It seems like this is one byte longer but is faster maybe? - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); - instr.set(Imm(1, controlBytes)); - return instr; - } - - /* - Splats a single element in 'src' to all elements in 'dst' - For example (pseudocode): - xmm1 = (1.5, 2.5, 3.5, 4.5) - xmm2 = (1, 2, 3, 4) - splat_vf(xmm1, xmm2, XMM_ELEMENT::X); - xmm1 = (4, 4, 4, 4) - */ - static Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { - switch (element) { - case Register::VF_ELEMENT::X: // Least significant element - return swizzle_vf(dst, src, 0b00000000); - break; - case Register::VF_ELEMENT::Y: - return swizzle_vf(dst, src, 0b01010101); - break; - case Register::VF_ELEMENT::Z: - return swizzle_vf(dst, src, 0b10101010); - break; - case Register::VF_ELEMENT::W: // Most significant element - return swizzle_vf(dst, src, 0b11111111); - break; - default: - ASSERT(false); - return {0}; - } - } - - static Instruction xor_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x57); // VXORPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction sub_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5c); // VSUBPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction add_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x58); // VADDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction mul_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x59); // VMULPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction max_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5F); // VMAXPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction min_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5D); // VMINPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction div_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - Instruction instr(0x5E); // VDIVPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; - } - - static Instruction sqrt_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0x51); // VSQRTPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); - return instr; - } - - static Instruction itof_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - Instruction instr(0x5b); // VCVTDQ2PS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); - return instr; - } - - static Instruction ftoi_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 - Instruction instr(0x5b); // VCVTTPS2DQ - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - return instr; - } - - static Instruction pw_sra(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction pw_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction ph_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /2 ib VPSRLW - Instruction instr(0x71); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction pw_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 - Instruction instr(0x72); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - static Instruction ph_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 - Instruction instr(0x71); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction parallel_add_byte(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xFC); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEF); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xDB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Reminder - a word in MIPS = 32bits = a DWORD in x86 - // MIPS || x86 - // ----------------------- - // byte || byte - // halfword || word - // word || dword - // doubleword || quadword - - // -- Unpack High Data Instructions - static Instruction pextub_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x68); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextuh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x69); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextuw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6a); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // -- Unpack Low Data Instructions - static Instruction pextlb_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x60); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextlh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x61); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pextlw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x62); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 16 bytes (8 bits) - static Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x74); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 8 halfwords (16 bits) - static Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x75); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Equal to than comparison as 4 words (32 bits) - static Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x76); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 16 bytes (8 bits) - static Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x64); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 8 halfwords (16 bits) - static Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x65); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - // Greater than comparison as 4 words (32 bits) - static Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x66); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6c); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { - return vpunpcklqdq(dst, src0, src1); - } - - static Instruction pcpyud(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0x6d); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpsubd(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 - // reg, vec, r/m - Instruction instr(0xfa); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction vpsrldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 - Instruction instr(0x73); - instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpslldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 - Instruction instr(0x73); - instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpshuflw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 - Instruction instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F2); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpshufhw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 - Instruction instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - instr.set(Imm(1, imm)); - return instr; - } - - static Instruction vpackuswb(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - - Instruction instr(0x67); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } -}; + s64 offset); + +extern Instruction loadvf_rip_plus_s32(Register dest, s64 offset); + +// TODO - rip relative loads and stores. + +extern Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask); + +extern Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw); + +/* + Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. + Here's a brief run-down: + - 8-bits / 4 groups of 2 bits + - Right-to-left, each group is used to determine which element in `src` gets copied into + `dst`'s element (W->X). + - GROUP OPTIONS + - 00b - Copy the least-significant element (X) + - 01b - Copy the second element (from the right) (Y) + - 10b - Copy the third element (from the right) (Z) + - 11b - Copy the most significant element (W) + Examples + ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) + SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions + > (1.5, 1.5, 1.5, 1.5) + SHUFPS xmm1, xmm1, 0x39 ; Rotate right + > (4.5, 1.5, 2.5, 3.5) + */ +extern Instruction swizzle_vf(Register dst, Register src, u8 controlBytes); + +/* + Splats a single element in 'src' to all elements in 'dst' + For example (pseudocode): + xmm1 = (1.5, 2.5, 3.5, 4.5) + xmm2 = (1, 2, 3, 4) + splat_vf(xmm1, xmm2, XMM_ELEMENT::X); + xmm1 = (4, 4, 4, 4) + */ +extern Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element); + +extern Instruction xor_vf(Register dst, Register src1, Register src2); + +extern Instruction sub_vf(Register dst, Register src1, Register src2); + +extern Instruction add_vf(Register dst, Register src1, Register src2); + +extern Instruction mul_vf(Register dst, Register src1, Register src2); + +extern Instruction max_vf(Register dst, Register src1, Register src2); + +extern Instruction min_vf(Register dst, Register src1, Register src2); + +extern Instruction div_vf(Register dst, Register src1, Register src2); + +extern Instruction sqrt_vf(Register dst, Register src); + +extern Instruction itof_vf(Register dst, Register src); + +extern Instruction ftoi_vf(Register dst, Register src); + +extern Instruction pw_sra(Register dst, Register src, u8 imm); + +extern Instruction pw_srl(Register dst, Register src, u8 imm); + +extern Instruction ph_srl(Register dst, Register src, u8 imm); + +extern Instruction pw_sll(Register dst, Register src, u8 imm); + +extern Instruction ph_sll(Register dst, Register src, u8 imm); + +extern Instruction parallel_add_byte(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_or(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1); + +extern Instruction parallel_bitwise_and(Register dst, Register src0, Register src1); + +// Reminder - a word in MIPS = 32bits = a DWORD in x86 +// MIPS || x86 +// ----------------------- +// byte || byte +// halfword || word +// word || dword +// doubleword || quadword + +// -- Unpack High Data Instructions +extern Instruction pextub_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextuh_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextuw_swapped(Register dst, Register src0, Register src1); + +// -- Unpack Low Data Instructions +extern Instruction pextlb_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextlh_swapped(Register dst, Register src0, Register src1); + +extern Instruction pextlw_swapped(Register dst, Register src0, Register src1); + +// Equal to than comparison as 16 bytes (8 bits) +extern Instruction parallel_compare_e_b(Register dst, Register src0, Register src1); + +// Equal to than comparison as 8 halfwords (16 bits) +extern Instruction parallel_compare_e_h(Register dst, Register src0, Register src1); + +// Equal to than comparison as 4 words (32 bits) +extern Instruction parallel_compare_e_w(Register dst, Register src0, Register src1); + +// Greater than comparison as 16 bytes (8 bits) +extern Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1); + +// Greater than comparison as 8 halfwords (16 bits) +extern Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1); + +// Greater than comparison as 4 words (32 bits) +extern Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1); + +extern Instruction vpunpcklqdq(Register dst, Register src0, Register src1); + +extern Instruction pcpyld_swapped(Register dst, Register src0, Register src1); + +extern Instruction pcpyud(Register dst, Register src0, Register src1); + +extern Instruction vpsubd(Register dst, Register src0, Register src1); + +extern Instruction vpsrldq(Register dst, Register src, u8 imm); + +extern Instruction vpslldq(Register dst, Register src, u8 imm); + +extern Instruction vpshuflw(Register dst, Register src, u8 imm); + +extern Instruction vpshufhw(Register dst, Register src, u8 imm); + +extern Instruction vpackuswb(Register dst, Register src0, Register src1); +}; // namespace IGen } // namespace emitter diff --git a/goalc/emitter/IGenARM64.cpp b/goalc/emitter/IGenARM64.cpp new file mode 100644 index 00000000000..d0934f2ecd8 --- /dev/null +++ b/goalc/emitter/IGenARM64.cpp @@ -0,0 +1,2749 @@ +#include "goalc/emitter/InstructionARM64.h" +#ifdef __aarch64__ + +#include "IGen.h" +#include "goalc/emitter/InstructionX86.h" + +namespace emitter { +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Move data from src to dst. Moves all 64-bits of the GPR. + */ +Instruction mov_gpr64_gpr64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +/*! + * Move a 64-bit constant into a register. + */ +Instruction mov_gpr64_u64(Register dst, uint64_t val) { + ASSERT(dst.is_gpr()); + bool rex_b = false; + auto dst_hw_id = dst.hw_id(); + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + InstructionX86 instr(0xb8 + dst_hw_id); + instr.set(REX(true, false, false, rex_b)); + instr.set(Imm(8, val)); + return instr; +} + +/*! + * Move a 32-bit constant into a register. Zeros the upper 32 bits. + */ +Instruction mov_gpr64_u32(Register dst, uint64_t val) { + ASSERT(val <= UINT32_MAX); + ASSERT(dst.is_gpr()); + auto dst_hw_id = dst.hw_id(); + bool rex_b = false; + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + + InstructionX86 instr(0xb8 + dst_hw_id); + if (rex_b) { + instr.set(REX(false, false, false, rex_b)); + } + instr.set(Imm(4, val)); + return instr; +} + +/*! + * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. + * When possible prefer mov_gpr64_u32. (use this only for negative values...) + * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. + */ +Instruction mov_gpr64_s32(Register dst, int64_t val) { + ASSERT(val >= INT32_MIN && val <= INT32_MAX); + ASSERT(dst.is_gpr()); + InstructionX86 instr(0xc7); + instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); + instr.set(Imm(4, val)); + return instr; +} + +/*! + * Move 32-bits of xmm to 32 bits of gpr (no sign extension). + */ +Instruction movd_gpr32_xmm32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 32-bits of gpr to 32-bits of xmm (no sign extension) + */ +Instruction movd_xmm32_gpr32(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 64-bits of xmm to 64 bits of gpr (no sign extension). + */ +Instruction movq_gpr64_xmm64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 64-bits of gpr to 64-bits of xmm (no sign extension) + */ +Instruction movq_xmm64_gpr64(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 32-bits between xmm's + */ +Instruction mov_xmm32_xmm32(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * movsx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +/*! + * movzx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movsx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movzx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movsxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +/*! + * movzxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +/*! + * mov dst, QWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { + if (offset == 0) { + return storevf_gpr64_plus_gpr64(value, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); + } + ASSERT(false); + return InstructionX86(0); +} + +Instruction store_goal_gpr(Register addr, Register value, Register off, int offset, int size) { + switch (size) { + case 1: + if (offset == 0) { + return store8_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + return store16_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + return store32_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return store64_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { + if (offset == 0) { + return loadvf_gpr64_plus_gpr64(dst, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +/*! + * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. + * This will pick the appropriate fancy addressing mode instruction. + */ +Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend) { + switch (size) { + case 1: + if (offset == 0) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); + + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); + instr.set(Imm(4, offset)); + return instr; +} + +Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); + instr.set(Imm(1, offset)); + return instr; +} + +Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return lea_reg_plus_off8(dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return lea_reg_plus_off32(dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { + if (offset == 0) { + return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { + if (offset == 0) { + return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_value.is_xmm()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM128 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Store a 128-bit xmm into an address stored in a register, no offset + */ +Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { + if (offset == 0) { + return load128_xmm128_gpr64(xmm_dest, base); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load128_xmm128_gpr64_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load128_xmm128_gpr64_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { + if (offset == 0) { + return store128_gpr64_xmm128(base, xmm_val); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store128_gpr64_xmm128_s8(base, xmm_val, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store128_gpr64_xmm128_s32(base, xmm_val, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load64_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); + return instr; +} + +Instruction load16u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load16s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { + switch (size) { + case 1: + if (sign_extend) { + return load8s_rip_s32(dest, offset); + } else { + return load8u_rip_s32(dest, offset); + } + break; + case 2: + if (sign_extend) { + return load16s_rip_s32(dest, offset); + } else { + return load16u_rip_s32(dest, offset); + } + break; + case 4: + if (sign_extend) { + return load32s_rip_s32(dest, offset); + } else { + return load32u_rip_s32(dest, offset); + } + break; + case 8: + return load64_rip_s32(dest, offset); + default: + ASSERT(false); + } +} + +Instruction store64_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); + return instr; +} + +Instruction store32_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + return instr; +} + +Instruction store16_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store8_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + if (src.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction static_store(Register value, s64 offset, int size) { + switch (size) { + case 1: + return store8_rip_s32(value, offset); + case 2: + return store16_rip_s32(value, offset); + case 4: + return store32_rip_s32(value, offset); + case 8: + return store64_rip_s32(value, offset); + default: + ASSERT(false); + } +} + +Instruction static_addr(Register dst, s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); + return instr; +} + +Instruction static_load_xmm32(Register xmm_dest, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction static_store_xmm32(Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { + ASSERT(dst_reg.is_gpr()); + ASSERT(src_reg.is_gpr()); + InstructionX86 instr(0x8b); + instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +/*! + * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. + */ +Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { + ASSERT(addr.is_gpr()); + ASSERT(value.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. + */ +Instruction ret() { + return InstructionARM64(0b11010110010111110000001111000000); +} + +/*! + * Instruction to push gpr (64-bits) onto the stack + */ +Instruction push_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x50 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x50 + reg.hw_id()); +} + +/*! + * Instruction to pop 64 bit gpr from the stack + */ +Instruction pop_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x58 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x58 + reg.hw_id()); +} + +/*! + * Call a function stored in a 64-bit gpr + */ +Instruction call_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 2; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +/*! + * Jump to an x86-64 address stored in a 64-bit gpr. + */ +Instruction jmp_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 4; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); + // SUB r/m64, imm8 : REX.W + 83 /5 ib + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(4, imm)); + return instr; +} + +Instruction add_gpr64_imm8s(Register reg, int64_t v) { + ASSERT(v >= INT8_MIN && v <= INT8_MAX); + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(1, v)); + return instr; +} + +Instruction add_gpr64_imm32s(Register reg, int64_t v) { + ASSERT(v >= INT32_MIN && v <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(4, v)); + return instr; +} + +Instruction add_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return add_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return add_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction sub_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return sub_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return sub_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction add_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x01); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +Instruction sub_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x29); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +/*! + * Multiply gprs (32-bit, signed). + * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) + */ +Instruction imul_gpr32_gpr32(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + return instr; +} + +/*! + * Multiply gprs (64-bit, signed). + * DANGER - this treats all operands as 64-bit. This is not like the EE. + */ +Instruction imul_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Divide (idiv, 32 bit) + */ +Instruction idiv_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); + return instr; +} + +Instruction unsigned_div_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); + return instr; +} + +/*! + * Convert doubleword to quadword for division. + */ +Instruction cdq() { + InstructionX86 instr(0x99); + return instr; +} + +/*! + * Move from gpr32 to gpr64, with sign extension. + * Needed for multiplication/divsion madness. + */ +Instruction movsx_r64_r32(Register dst, Register src) { + InstructionX86 instr(0x63); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Compare gpr64. This sets the flags for the jumps. + * todo UNTESTED + */ +Instruction cmp_gpr64_gpr64(Register a, Register b) { + InstructionX86 instr(0x3b); + ASSERT(a.is_gpr()); + ASSERT(b.is_gpr()); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Or of two gprs + */ +Instruction or_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x0b); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * And of two gprs + */ +Instruction and_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x23); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Xor of two gprs + */ +Instruction xor_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x33); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Bitwise not a gpr + */ +Instruction not_gpr64(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Shift 64-bit gpr left by CL register + */ +Instruction shl_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-bit gpr right (logical) by CL register + */ +Instruction shr_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-bit gpr right (arithmetic) by CL register + */ +Instruction sar_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-ptr left (logical) by the constant shift amount "sa". + */ +Instruction shl_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +/*! + * Shift 64-ptr right (logical) by the constant shift amount "sa". + */ +Instruction shr_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +/*! + * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". + */ +Instruction sar_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. + */ +Instruction jmp_32() { + InstructionX86 instr(0xe9); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump if equal. + */ +Instruction je_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x84); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump not equal. + */ +Instruction jne_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x85); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump less than or equal. + */ +Instruction jle_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8e); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump greater than or equal. + */ +Instruction jge_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8d); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump less than + */ +Instruction jl_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8c); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump greater than + */ +Instruction jg_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8f); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump below or equal + */ +Instruction jbe_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x86); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump above or equal + */ +Instruction jae_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x83); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump below + */ +Instruction jb_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x82); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump above + */ +Instruction ja_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x87); + instr.set(Imm(4, 0)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Compare two floats and set flag register for jump (ucomiss) + */ +Instruction cmp_flt_flt(Register a, Register b) { + ASSERT(a.is_xmm()); + ASSERT(b.is_xmm()); + InstructionX86 instr(0x0f); + instr.set_op2(0x2e); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); + return instr; +} + +Instruction sqrts_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x51); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Multiply two floats in xmm's + */ +Instruction mulss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x59); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Divide two floats in xmm's + */ +Instruction divss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Subtract two floats in xmm's + */ +Instruction subss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Add two floats in xmm's + */ +Instruction addss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x58); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Floating point minimum. + */ +Instruction minss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5d); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Floating point maximum. + */ +Instruction maxss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5f); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Convert GPR int32 to XMM float (single precision) + */ +Instruction int32_to_float(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2a); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Convert XMM float to GPR int32(single precision) (truncate) + */ +Instruction float_to_int32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction nop() { + // NOP + InstructionX86 instr(0x90); + return instr; +} + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * A "null" instruction. This instruction does not generate any bytes + * but can be referred to by a label. Useful to insert in place of a real instruction + * if the real instruction has been optimized out. + */ +Instruction null() { + InstructionX86 i(0); + i.m_flags |= InstructionX86::kIsNull; + return i; +} + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +Instruction nop_vf() { + InstructionX86 instr(0xd9); // FNOP + instr.set_op2(0xd0); + return instr; +} + +Instruction wait_vf() { + InstructionX86 instr(0x9B); // FWAIT / WAIT + return instr; +} + +Instruction mov_vf_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + + if (src.hw_id() >= 8 && dst.hw_id() < 8) { + // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the + // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. + // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } else { + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } +} + +Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { + ASSERT(dest.is_xmm()); + ASSERT(offset >= INT32_MIN); + ASSERT(offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); + return instr; +} + +// TODO - rip relative loads and stores. + +Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { + ASSERT(!(mask & 0b11110000)); + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x0c); // VBLENDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, src1.hw_id(), + false, VexPrefix::P_66); + instr.set(Imm(1, mask)); + return instr; +} + +Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + ASSERT(dx < 4); + ASSERT(dy < 4); + ASSERT(dz < 4); + ASSERT(dw < 4); + u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); + return swizzle_vf(dst, src, imm); + + // SSE encoding version: + // InstructionX86 instr(0x0f); + // instr.set_op2(0xc6); + // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + // instr.set(Imm(1, imm)); + // return instr; +} + +/* + Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. + Here's a brief run-down: + - 8-bits / 4 groups of 2 bits + - Right-to-left, each group is used to determine which element in `src` gets copied into + `dst`'s element (W->X). + - GROUP OPTIONS + - 00b - Copy the least-significant element (X) + - 01b - Copy the second element (from the right) (Y) + - 10b - Copy the third element (from the right) (Z) + - 11b - Copy the most significant element (W) + Examples + ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) + SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions + > (1.5, 1.5, 1.5, 1.5) + SHUFPS xmm1, xmm1, 0x39 ; Rotate right + > (4.5, 1.5, 2.5, 3.5) + */ +Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xC6); // VSHUFPS + + // we use the AVX "VEX" encoding here. This is a three-operand form, + // but we just set both source + // to the same register. It seems like this is one byte longer but is faster maybe? + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); + instr.set(Imm(1, controlBytes)); + return instr; +} + +/* + Splats a single element in 'src' to all elements in 'dst' + For example (pseudocode): + xmm1 = (1.5, 2.5, 3.5, 4.5) + xmm2 = (1, 2, 3, 4) + splat_vf(xmm1, xmm2, XMM_ELEMENT::X); + xmm1 = (4, 4, 4, 4) + */ +Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { + switch (element) { + case Register::VF_ELEMENT::X: // Least significant element + return swizzle_vf(dst, src, 0b00000000); + break; + case Register::VF_ELEMENT::Y: + return swizzle_vf(dst, src, 0b01010101); + break; + case Register::VF_ELEMENT::Z: + return swizzle_vf(dst, src, 0b10101010); + break; + case Register::VF_ELEMENT::W: // Most significant element + return swizzle_vf(dst, src, 0b11111111); + break; + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction xor_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x57); // VXORPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sub_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5c); // VSUBPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction add_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x58); // VADDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction mul_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x59); // VMULPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction max_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5F); // VMAXPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction min_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5D); // VMINPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction div_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5E); // VDIVPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sqrt_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x51); // VSQRTPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); + return instr; +} + +Instruction itof_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x5b); // VCVTDQ2PS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); + return instr; +} + +Instruction ftoi_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 + InstructionX86 instr(0x5b); // VCVTTPS2DQ + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + return instr; +} + +Instruction pw_sra(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction ph_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 71 /2 ib VPSRLW + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} +Instruction ph_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction parallel_add_byte(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xFC); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEF); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xDB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Reminder - a word in MIPS = 32bits = a DWORD in x86 +// MIPS || x86 +// ----------------------- +// byte || byte +// halfword || word +// word || dword +// doubleword || quadword + +// -- Unpack High Data Instructions +Instruction pextub_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x68); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x69); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6a); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// -- Unpack Low Data Instructions +Instruction pextlb_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x60); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x61); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x62); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 16 bytes (8 bits) +Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x74); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 8 halfwords (16 bits) +Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x75); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 4 words (32 bits) +Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x76); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 16 bytes (8 bits) +Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x64); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 8 halfwords (16 bits) +Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x65); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 4 words (32 bits) +Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x66); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6c); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { + return vpunpcklqdq(dst, src0, src1); +} + +Instruction pcpyud(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6d); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsubd(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 + // reg, vec, r/m + InstructionX86 instr(0xfa); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsrldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpslldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshuflw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F2); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshufhw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpackuswb(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + + InstructionX86 instr(0x67); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} +} // namespace IGen +} // namespace emitter + +#endif \ No newline at end of file diff --git a/goalc/emitter/IGenX86.cpp b/goalc/emitter/IGenX86.cpp new file mode 100644 index 00000000000..1cbcf46c0ab --- /dev/null +++ b/goalc/emitter/IGenX86.cpp @@ -0,0 +1,2748 @@ +#ifndef __aarch64__ + +#include "IGen.h" +#include "goalc/emitter/InstructionX86.h" + +namespace emitter { +namespace IGen { +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// MOVES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Move data from src to dst. Moves all 64-bits of the GPR. + */ +Instruction mov_gpr64_gpr64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +/*! + * Move a 64-bit constant into a register. + */ +Instruction mov_gpr64_u64(Register dst, uint64_t val) { + ASSERT(dst.is_gpr()); + bool rex_b = false; + auto dst_hw_id = dst.hw_id(); + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + InstructionX86 instr(0xb8 + dst_hw_id); + instr.set(REX(true, false, false, rex_b)); + instr.set(Imm(8, val)); + return instr; +} + +/*! + * Move a 32-bit constant into a register. Zeros the upper 32 bits. + */ +Instruction mov_gpr64_u32(Register dst, uint64_t val) { + ASSERT(val <= UINT32_MAX); + ASSERT(dst.is_gpr()); + auto dst_hw_id = dst.hw_id(); + bool rex_b = false; + if (dst_hw_id >= 8) { + dst_hw_id -= 8; + rex_b = true; + } + + InstructionX86 instr(0xb8 + dst_hw_id); + if (rex_b) { + instr.set(REX(false, false, false, rex_b)); + } + instr.set(Imm(4, val)); + return instr; +} + +/*! + * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. + * When possible prefer mov_gpr64_u32. (use this only for negative values...) + * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. + */ +Instruction mov_gpr64_s32(Register dst, int64_t val) { + ASSERT(val >= INT32_MIN && val <= INT32_MAX); + ASSERT(dst.is_gpr()); + InstructionX86 instr(0xc7); + instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); + instr.set(Imm(4, val)); + return instr; +} + +/*! + * Move 32-bits of xmm to 32 bits of gpr (no sign extension). + */ +Instruction movd_gpr32_xmm32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 32-bits of gpr to 32-bits of xmm (no sign extension) + */ +Instruction movd_xmm32_gpr32(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 64-bits of xmm to 64 bits of gpr (no sign extension). + */ +Instruction movq_gpr64_xmm64(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7e); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 64-bits of gpr to 64-bits of xmm (no sign extension) + */ +Instruction movq_xmm64_gpr64(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Move 32-bits between xmm's + */ +Instruction mov_xmm32_xmm32(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +// todo - GPR64 -> XMM64 (zext) +// todo - XMM -> GPR64 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// GOAL Loads and Stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * movsx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +/*! + * movzx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movsx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movzx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +/*! + * movsxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +/*! + * movzxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; +} + +/*! + * mov dst, QWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ +Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + ASSERT(value.is_gpr()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; +} + +Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { + if (offset == 0) { + return storevf_gpr64_plus_gpr64(value, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); + } + ASSERT(false); + return InstructionX86(0); +} + +Instruction store_goal_gpr(Register addr, Register value, Register off, int offset, int size) { + switch (size) { + case 1: + if (offset == 0) { + return store8_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + return store16_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + return store32_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return store64_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { + if (offset == 0) { + return loadvf_gpr64_plus_gpr64(dst, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +/*! + * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. + * This will pick the appropriate fancy addressing mode instruction. + */ +Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend) { + switch (size) { + case 1: + if (offset == 0) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 2: + if (offset == 0) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 4: + if (offset == 0) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + ASSERT(false); + } + case 8: + if (offset == 0) { + return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); + + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + + } else { + ASSERT(false); + } + default: + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM32 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); + instr.set(Imm(4, offset)); + return instr; +} + +Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); + instr.set(Imm(1, offset)); + return instr; +} + +Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return lea_reg_plus_off8(dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return lea_reg_plus_off32(dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(base.is_gpr()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { + if (offset == 0) { + return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { + if (offset == 0) { + return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_value.is_xmm()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { + ASSERT(base.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// LOADS n' STORES - XMM128 +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Store a 128-bit xmm into an address stored in a register, no offset + */ +Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); + instr.set(Imm(4, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { + ASSERT(gpr_addr.is_gpr()); + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x66); + // InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); + instr.set(Imm(1, offset)); + instr.swap_op0_rex(); + return instr; +} + +Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { + if (offset == 0) { + return load128_xmm128_gpr64(xmm_dest, base); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load128_xmm128_gpr64_s8(xmm_dest, base, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load128_xmm128_gpr64_s32(xmm_dest, base, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { + if (offset == 0) { + return store128_gpr64_xmm128(base, xmm_val); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store128_gpr64_xmm128_s8(base, xmm_val, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store128_gpr64_xmm128_s32(base, xmm_val, offset); + } else { + ASSERT(false); + return InstructionX86(0); + } +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// RIP loads and stores +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Instruction load64_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x63); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load32u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); + return instr; +} + +Instruction load16u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load16s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8u_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction load8s_rip_s32(Register dest, s64 offset) { + ASSERT(dest.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; +} + +Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { + switch (size) { + case 1: + if (sign_extend) { + return load8s_rip_s32(dest, offset); + } else { + return load8u_rip_s32(dest, offset); + } + break; + case 2: + if (sign_extend) { + return load16s_rip_s32(dest, offset); + } else { + return load16u_rip_s32(dest, offset); + } + break; + case 4: + if (sign_extend) { + return load32s_rip_s32(dest, offset); + } else { + return load32u_rip_s32(dest, offset); + } + break; + case 8: + return load64_rip_s32(dest, offset); + default: + ASSERT(false); + } +} + +Instruction store64_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); + return instr; +} + +Instruction store32_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + return instr; +} + +Instruction store16_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction store8_rip_s32(Register src, s64 offset) { + ASSERT(src.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x88); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + if (src.id() > RBX) { + instr.add_rex(); + } + return instr; +} + +Instruction static_store(Register value, s64 offset, int size) { + switch (size) { + case 1: + return store8_rip_s32(value, offset); + case 2: + return store16_rip_s32(value, offset); + case 4: + return store32_rip_s32(value, offset); + case 8: + return store64_rip_s32(value, offset); + default: + ASSERT(false); + } +} + +Instruction static_addr(Register dst, s64 offset) { + ASSERT(dst.is_gpr()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x8d); + instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); + return instr; +} + +Instruction static_load_xmm32(Register xmm_dest, s64 offset) { + ASSERT(xmm_dest.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +Instruction static_store_xmm32(Register xmm_value, s64 offset) { + ASSERT(xmm_value.is_xmm()); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; +} + +// TODO, special load/stores of 128 bit values. + +// TODO, consider specialized stack loads and stores? +Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { + ASSERT(dst_reg.is_gpr()); + ASSERT(src_reg.is_gpr()); + InstructionX86 instr(0x8b); + instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +/*! + * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. + */ +Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { + ASSERT(addr.is_gpr()); + ASSERT(value.is_gpr()); + InstructionX86 instr(0x89); + instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); + instr.set_disp(Imm(4, offset)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FUNCTION STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/*! + * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. + */ +Instruction ret() { + return InstructionX86(0xc3); +} + +/*! + * Instruction to push gpr (64-bits) onto the stack + */ +Instruction push_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x50 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x50 + reg.hw_id()); +} + +/*! + * Instruction to pop 64 bit gpr from the stack + */ +Instruction pop_gpr64(Register reg) { + ASSERT(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = InstructionX86(0x58 + reg.hw_id() - 8); + i.set(REX(false, false, false, true)); + return i; + } + return InstructionX86(0x58 + reg.hw_id()); +} + +/*! + * Call a function stored in a 64-bit gpr + */ +Instruction call_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 2; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +/*! + * Jump to an x86-64 address stored in a 64-bit gpr. + */ +Instruction jmp_r64(Register reg_) { + ASSERT(reg_.is_gpr()); + auto reg = reg_.hw_id(); + InstructionX86 instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + ASSERT(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 4; + mrm.mod = 3; + instr.set(mrm); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// INTEGER MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); + // SUB r/m64, imm8 : REX.W + 83 /5 ib + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { + ASSERT(reg.is_gpr()); + ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(4, imm)); + return instr; +} + +Instruction add_gpr64_imm8s(Register reg, int64_t v) { + ASSERT(v >= INT8_MIN && v <= INT8_MAX); + InstructionX86 instr(0x83); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(1, v)); + return instr; +} + +Instruction add_gpr64_imm32s(Register reg, int64_t v) { + ASSERT(v >= INT32_MIN && v <= INT32_MAX); + InstructionX86 instr(0x81); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(4, v)); + return instr; +} + +Instruction add_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return add_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return add_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction sub_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return sub_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return sub_gpr64_imm32s(reg, imm); + } else { + throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + + std::to_string(imm) + "]"); + } +} + +Instruction add_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x01); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +Instruction sub_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x29); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; +} + +/*! + * Multiply gprs (32-bit, signed). + * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) + */ +Instruction imul_gpr32_gpr32(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + return instr; +} + +/*! + * Multiply gprs (64-bit, signed). + * DANGER - this treats all operands as 64-bit. This is not like the EE. + */ +Instruction imul_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0xf); + instr.set_op2(0xaf); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Divide (idiv, 32 bit) + */ +Instruction idiv_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); + return instr; +} + +Instruction unsigned_div_gpr32(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); + return instr; +} + +/*! + * Convert doubleword to quadword for division. + */ +Instruction cdq() { + InstructionX86 instr(0x99); + return instr; +} + +/*! + * Move from gpr32 to gpr64, with sign extension. + * Needed for multiplication/divsion madness. + */ +Instruction movsx_r64_r32(Register dst, Register src) { + InstructionX86 instr(0x63); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Compare gpr64. This sets the flags for the jumps. + * todo UNTESTED + */ +Instruction cmp_gpr64_gpr64(Register a, Register b) { + InstructionX86 instr(0x3b); + ASSERT(a.is_gpr()); + ASSERT(b.is_gpr()); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// BIT STUFF +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Or of two gprs + */ +Instruction or_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x0b); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * And of two gprs + */ +Instruction and_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x23); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Xor of two gprs + */ +Instruction xor_gpr64_gpr64(Register dst, Register src) { + InstructionX86 instr(0x33); + ASSERT(dst.is_gpr()); + ASSERT(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; +} + +/*! + * Bitwise not a gpr + */ +Instruction not_gpr64(Register reg) { + InstructionX86 instr(0xf7); + ASSERT(reg.is_gpr()); + instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// SHIFTS +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Shift 64-bit gpr left by CL register + */ +Instruction shl_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-bit gpr right (logical) by CL register + */ +Instruction shr_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-bit gpr right (arithmetic) by CL register + */ +Instruction sar_gpr64_cl(Register reg) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xd3); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + return instr; +} + +/*! + * Shift 64-ptr left (logical) by the constant shift amount "sa". + */ +Instruction shl_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +/*! + * Shift 64-ptr right (logical) by the constant shift amount "sa". + */ +Instruction shr_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +/*! + * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". + */ +Instruction sar_gpr64_u8(Register reg, uint8_t sa) { + ASSERT(reg.is_gpr()); + InstructionX86 instr(0xc1); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); + instr.set(Imm(1, sa)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// CONTROL FLOW +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. + */ +Instruction jmp_32() { + InstructionX86 instr(0xe9); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump if equal. + */ +Instruction je_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x84); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump not equal. + */ +Instruction jne_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x85); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump less than or equal. + */ +Instruction jle_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8e); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump greater than or equal. + */ +Instruction jge_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8d); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump less than + */ +Instruction jl_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8c); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump greater than + */ +Instruction jg_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x8f); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump below or equal + */ +Instruction jbe_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x86); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump above or equal + */ +Instruction jae_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x83); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump below + */ +Instruction jb_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x82); + instr.set(Imm(4, 0)); + return instr; +} + +/*! + * Jump above + */ +Instruction ja_32() { + InstructionX86 instr(0x0f); + instr.set_op2(0x87); + instr.set(Imm(4, 0)); + return instr; +} + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// FLOAT MATH +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * Compare two floats and set flag register for jump (ucomiss) + */ +Instruction cmp_flt_flt(Register a, Register b) { + ASSERT(a.is_xmm()); + ASSERT(b.is_xmm()); + InstructionX86 instr(0x0f); + instr.set_op2(0x2e); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); + return instr; +} + +Instruction sqrts_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x51); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Multiply two floats in xmm's + */ +Instruction mulss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x59); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Divide two floats in xmm's + */ +Instruction divss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Subtract two floats in xmm's + */ +Instruction subss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Add two floats in xmm's + */ +Instruction addss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x58); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Floating point minimum. + */ +Instruction minss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5d); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Floating point maximum. + */ +Instruction maxss_xmm_xmm(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5f); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Convert GPR int32 to XMM float (single precision) + */ +Instruction int32_to_float(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_gpr()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2a); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +/*! + * Convert XMM float to GPR int32(single precision) (truncate) + */ +Instruction float_to_int32(Register dst, Register src) { + ASSERT(dst.is_gpr()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; +} + +Instruction nop() { + // NOP + InstructionX86 instr(0x90); + return instr; +} + +// TODO - rsqrt / abs / sqrt + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// UTILITIES +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +/*! + * A "null" instruction. This instruction does not generate any bytes + * but can be referred to by a label. Useful to insert in place of a real instruction + * if the real instruction has been optimized out. + */ +Instruction null() { + InstructionX86 i(0); + i.m_flags |= InstructionX86::kIsNull; + return i; +} + +///////////////////////////// +// AVX (VF - Vector Float) // +///////////////////////////// + +Instruction nop_vf() { + InstructionX86 instr(0xd9); // FNOP + instr.set_op2(0xd0); + return instr; +} + +Instruction wait_vf() { + InstructionX86 instr(0x9B); // FWAIT / WAIT + return instr; +} + +Instruction mov_vf_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + + if (src.hw_id() >= 8 && dst.hw_id() < 8) { + // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the + // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. + // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } else { + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); + return instr; + } +} + +Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(dst.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, + Register addr1, + Register addr2, + s64 offset) { + ASSERT(value.is_xmm()); + ASSERT(addr1.is_gpr()); + ASSERT(addr2.is_gpr()); + ASSERT(addr1 != addr2); + ASSERT(addr1 != RSP); + ASSERT(addr2 != RSP); + ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); + InstructionX86 instr(0x29); + instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, VEX3::LeadingBytes::P_0F, false); + return instr; +} + +Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { + ASSERT(dest.is_xmm()); + ASSERT(offset >= INT32_MIN); + ASSERT(offset <= INT32_MAX); + InstructionX86 instr(0x28); + instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); + return instr; +} + +// TODO - rip relative loads and stores. + +Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { + ASSERT(!(mask & 0b11110000)); + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x0c); // VBLENDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, src1.hw_id(), + false, VexPrefix::P_66); + instr.set(Imm(1, mask)); + return instr; +} + +Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + ASSERT(dx < 4); + ASSERT(dy < 4); + ASSERT(dz < 4); + ASSERT(dw < 4); + u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); + return swizzle_vf(dst, src, imm); + + // SSE encoding version: + // InstructionX86 instr(0x0f); + // instr.set_op2(0xc6); + // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + // instr.set(Imm(1, imm)); + // return instr; +} + +/* + Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. + Here's a brief run-down: + - 8-bits / 4 groups of 2 bits + - Right-to-left, each group is used to determine which element in `src` gets copied into + `dst`'s element (W->X). + - GROUP OPTIONS + - 00b - Copy the least-significant element (X) + - 01b - Copy the second element (from the right) (Y) + - 10b - Copy the third element (from the right) (Z) + - 11b - Copy the most significant element (W) + Examples + ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) + SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions + > (1.5, 1.5, 1.5, 1.5) + SHUFPS xmm1, xmm1, 0x39 ; Rotate right + > (4.5, 1.5, 2.5, 3.5) + */ +Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0xC6); // VSHUFPS + + // we use the AVX "VEX" encoding here. This is a three-operand form, + // but we just set both source + // to the same register. It seems like this is one byte longer but is faster maybe? + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); + instr.set(Imm(1, controlBytes)); + return instr; +} + +/* + Splats a single element in 'src' to all elements in 'dst' + For example (pseudocode): + xmm1 = (1.5, 2.5, 3.5, 4.5) + xmm2 = (1, 2, 3, 4) + splat_vf(xmm1, xmm2, XMM_ELEMENT::X); + xmm1 = (4, 4, 4, 4) + */ +Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { + switch (element) { + case Register::VF_ELEMENT::X: // Least significant element + return swizzle_vf(dst, src, 0b00000000); + break; + case Register::VF_ELEMENT::Y: + return swizzle_vf(dst, src, 0b01010101); + break; + case Register::VF_ELEMENT::Z: + return swizzle_vf(dst, src, 0b10101010); + break; + case Register::VF_ELEMENT::W: // Most significant element + return swizzle_vf(dst, src, 0b11111111); + break; + default: + ASSERT(false); + return InstructionX86(0); + } +} + +Instruction xor_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x57); // VXORPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sub_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5c); // VSUBPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction add_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x58); // VADDPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction mul_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x59); // VMULPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction max_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5F); // VMAXPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction min_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5D); // VMINPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction div_vf(Register dst, Register src1, Register src2) { + ASSERT(dst.is_xmm()); + ASSERT(src1.is_xmm()); + ASSERT(src2.is_xmm()); + InstructionX86 instr(0x5E); // VDIVPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); + return instr; +} + +Instruction sqrt_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x51); // VSQRTPS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); + return instr; +} + +Instruction itof_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + InstructionX86 instr(0x5b); // VCVTDQ2PS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); + return instr; +} + +Instruction ftoi_vf(Register dst, Register src) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 + InstructionX86 instr(0x5b); // VCVTTPS2DQ + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + return instr; +} + +Instruction pw_sra(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction ph_srl(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 71 /2 ib VPSRLW + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction pw_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 + InstructionX86 instr(0x72); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} +Instruction ph_sll(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 + InstructionX86 instr(0x71); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction parallel_add_byte(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xFC); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xEF); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0xDB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Reminder - a word in MIPS = 32bits = a DWORD in x86 +// MIPS || x86 +// ----------------------- +// byte || byte +// halfword || word +// word || dword +// doubleword || quadword + +// -- Unpack High Data Instructions +Instruction pextub_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x68); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x69); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextuw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6a); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// -- Unpack Low Data Instructions +Instruction pextlb_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x60); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlh_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x61); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pextlw_swapped(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x62); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 16 bytes (8 bits) +Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x74); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 8 halfwords (16 bits) +Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x75); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Equal to than comparison as 4 words (32 bits) +Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x76); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 16 bytes (8 bits) +Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x64); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 8 halfwords (16 bits) +Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x65); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +// Greater than comparison as 4 words (32 bits) +Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x66); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6c); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { + return vpunpcklqdq(dst, src0, src1); +} + +Instruction pcpyud(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + InstructionX86 instr(0x6d); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsubd(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 + // reg, vec, r/m + InstructionX86 instr(0xfa); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} + +Instruction vpsrldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpslldq(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 + InstructionX86 instr(0x73); + instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshuflw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F2); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpshufhw(Register dst, Register src, u8 imm) { + ASSERT(dst.is_xmm()); + ASSERT(src.is_xmm()); + // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 + InstructionX86 instr(0x70); + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_F3); + instr.set(Imm(1, imm)); + return instr; +} + +Instruction vpackuswb(Register dst, Register src0, Register src1) { + ASSERT(dst.is_xmm()); + ASSERT(src0.is_xmm()); + ASSERT(src1.is_xmm()); + // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + + InstructionX86 instr(0x67); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; +} +} // namespace IGen +} // namespace emitter + +#endif \ No newline at end of file diff --git a/goalc/emitter/Instruction.h b/goalc/emitter/Instruction.h index b2bd0357abd..4168b54d2f9 100644 --- a/goalc/emitter/Instruction.h +++ b/goalc/emitter/Instruction.h @@ -1,1020 +1,16 @@ #pragma once -#ifndef JAK_INSTRUCTION_H -#define JAK_INSTRUCTION_H - #include "common/common_types.h" -#include "common/util/Assert.h" namespace emitter { /*! - * The ModRM byte - */ -struct ModRM { - uint8_t mod; - uint8_t reg_op; - uint8_t rm; - - uint8_t operator()() const { return (mod << 6) | (reg_op << 3) | (rm << 0); } -}; - -/*! - * The SIB Byte - */ -struct SIB { - uint8_t scale, index, base; - - uint8_t operator()() const { return (scale << 6) | (index << 3) | (base << 0); } -}; - -/*! - * An Immediate (either imm or disp) - */ -struct Imm { - Imm() = default; - Imm(uint8_t sz, uint64_t v) : size(sz), value(v) {} - uint8_t size; - union { - uint64_t value; - uint8_t v_arr[8]; - }; -}; - -/*! - * The REX prefix byte - */ -struct REX { - explicit REX(bool w = false, bool r = false, bool x = false, bool b = false) - : W(w), R(r), X(x), B(b) {} - // W - 64-bit operands - // R - reg extension - // X - SIB i extnsion - // B - other extension - bool W, R, X, B; - uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); } -}; - -enum class VexPrefix : u8 { P_NONE = 0, P_66 = 1, P_F3 = 2, P_F2 = 3 }; - -/*! - * The "VEX" 3-byte format for AVX instructions - */ -struct VEX3 { - bool W, R, X, B; - enum class LeadingBytes : u8 { P_INVALID = 0, P_0F = 1, P_0F_38 = 2, P_0F_3A = 3 } leading_bytes; - u8 reg_id; - VexPrefix prefix; - bool L; - - u8 emit(u8 byte) const { - if (byte == 0) { - return 0b11000100; - } else if (byte == 1) { - u8 result = 0; - result |= ((!R) << 7); - result |= ((!X) << 6); - result |= ((!B) << 5); - result |= (0b11111 & u8(leading_bytes)); - return result; - } else if (byte == 2) { - u8 result = 0; - result |= (W << 7); // this may be inverted? - result |= ((~reg_id) & 0b1111) << 3; - result |= (L << 2); - result |= (u8(prefix) & 0b11); - return result; - } else { - ASSERT(false); - return -1; - } - } - - VEX3(bool w, - bool r, - bool x, - bool b, - LeadingBytes _leading_bytes, - u8 _reg_id = 0, - VexPrefix _prefix = VexPrefix::P_NONE, - bool l = false) - : W(w), - R(r), - X(x), - B(b), - leading_bytes(_leading_bytes), - reg_id(_reg_id), - prefix(_prefix), - L(l) {} -}; - -struct VEX2 { - bool R; - u8 reg_id; - VexPrefix prefix; - bool L; - - u8 emit(u8 byte) const { - if (byte == 0) { - return 0b11000101; - } else if (byte == 1) { - u8 result = 0; - result |= ((!R) << 7); - result |= ((~reg_id) & 0b1111) << 3; - result |= (L << 2); - result |= (u8(prefix) & 0b11); - return result; - } else { - ASSERT(false); - return -1; - } - } - - VEX2(bool r, u8 _reg_id = 0, VexPrefix _prefix = VexPrefix::P_NONE, bool l = false) - : R(r), reg_id(_reg_id), prefix(_prefix), L(l) {} -}; - -/*! - * A high-level description of an x86-64 opcode. It can emit itself. + * A high-level description of a opcode. It can emit itself. */ struct Instruction { - Instruction(uint8_t opcode) : op(opcode) {} - uint8_t op; - - enum Flags { - kOp2Set = (1 << 0), - kOp3Set = (1 << 1), - kIsNull = (1 << 2), - kSetRex = (1 << 3), - kSetModrm = (1 << 4), - kSetSib = (1 << 5), - kSetDispImm = (1 << 6), - kSetImm = (1 << 7), - }; - - u8 m_flags = 0; - - uint8_t op2; - - uint8_t op3; - - u8 n_vex = 0; - uint8_t vex[3] = {0, 0, 0}; - - // the rex byte - uint8_t m_rex = 0; - - // the modrm byte - uint8_t m_modrm = 0; - - // the sib byte - uint8_t m_sib = 0; - - // the displacement - Imm disp; - - // the immediate - Imm imm; - - /*! - * Move opcode byte 0 to before the rex prefix. - */ - void swap_op0_rex() { - if (!(m_flags & kSetRex)) - return; - auto temp = op; - op = m_rex; - m_rex = temp; - } - - void set(REX r) { - m_rex = r(); - m_flags |= kSetRex; - } - - void set(ModRM modrm) { - m_modrm = modrm(); - m_flags |= kSetModrm; - } - - void set(SIB sib) { - m_sib = sib(); - m_flags |= kSetSib; - } - - void set(VEX3 vex3) { - n_vex = 3; - for (int i = 0; i < n_vex; i++) { - vex[i] = vex3.emit(i); - } - } - - void set(VEX2 vex2) { - n_vex = 2; - for (int i = 0; i < n_vex; i++) { - vex[i] = vex2.emit(i); - } - } - - void set_disp(Imm i) { - disp = i; - m_flags |= kSetDispImm; - } - - void set(Imm i) { - imm = i; - m_flags |= kSetImm; - } - - void set_op2(uint8_t b) { - m_flags |= kOp2Set; - op2 = b; - } - - void set_op3(uint8_t b) { - m_flags |= kOp3Set; - op3 = b; - } - - int get_imm_size() const { - if (m_flags & kSetImm) { - return imm.size; - } else { - return 0; - } - } - - int get_disp_size() const { - if (m_flags & kSetDispImm) { - return disp.size; - } else { - return 0; - } - } - - /*! - * Set modrm and rex as needed for two regs. - */ - void set_modrm_and_rex(uint8_t reg, uint8_t rm, uint8_t mod, bool rex_w = false) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = mod; - modrm.reg_op = reg; - modrm.rm = rm; - - set(modrm); - - if (rex_b || rex_w || rex_r) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - void set_vex_modrm_and_rex(uint8_t reg, - uint8_t rm, - VEX3::LeadingBytes lb, - uint8_t vex_reg = 0, - bool rex_w = false, - VexPrefix prefix = VexPrefix::P_NONE) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 3; - modrm.reg_op = reg; - modrm.rm = rm; - - set(modrm); - if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, rex_b, lb, vex_reg, prefix)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - set(VEX2(rex_r, vex_reg, prefix)); - } - } - - /*! - * Set VEX prefix for REX as needed for two registers. - */ - void set_vex_modrm_and_rex(uint8_t reg, - uint8_t rm, - uint8_t mod, - VEX3::LeadingBytes lb, - bool rex_w = false) { - bool rex_b = false; - bool rex_r = false; - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = mod; - modrm.reg_op = reg; - modrm.rm = rm; - set(modrm); - if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, rex_b, lb)); - } else { - // can get away with two byte version - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - set(VEX2(rex_r)); - } - } - - void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s8 offset, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 1; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(1, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_r || rex_x) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s8 offset, - VEX3::LeadingBytes lb, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 1; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(1, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s32 offset, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 2; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(4, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_r || rex_x) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s32 offset, - VEX3::LeadingBytes lb, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 2; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(4, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - bool rex_w = false, - bool rex_always = false) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - if (addr1 == 5 && addr2 == 5) { - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - modrm.mod = 1; - set_disp(Imm(1, 0)); - - } else { - // default addr1 in index - bool flipped = (addr1 == 4) || (addr2 == 5); - - if (flipped) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.base != 5); - ASSERT(sib.index != 4); - } - - if (rex_b || rex_w || rex_r || rex_x || rex_always) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - VEX3::LeadingBytes lb, - bool rex_w = false) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - if (addr1 == 5 && addr2 == 5) { - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - modrm.mod = 1; - set_disp(Imm(1, 0)); - - } else { - // default addr1 in index - bool flipped = (addr1 == 4) || (addr2 == 5); - - if (flipped) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.base != 5); - ASSERT(sib.index != 4); - } - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - } - - /*! - * Set modrm and rex as needed for two regs for an addressing mode. - * Will set SIB if R12 or RSP indexing is used. - */ - void set_modrm_and_rex_for_reg_addr(uint8_t reg, uint8_t rm, bool rex_w = false) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = rm; - - if (rm == 4) { - SIB sib; - sib.scale = 0; - sib.base = 4; - sib.index = 4; - - set(sib); - } - - if (rm == 5) { - modrm.mod = 1; // 1 byte imm - set_disp(Imm(1, 0)); - } - - set(modrm); - if (rex_b || rex_w || rex_r) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - void set_modrm_and_rex_for_rip_plus_s32(uint8_t reg, s32 offset, bool rex_w = false) { - bool rex_r = false; - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = 5; // use the RIP addressing mode - set(modrm); - - if (rex_r || rex_w) { - set(REX(rex_w, rex_r, false, false)); - } - - set_disp(Imm(4, offset)); - } - - void add_rex() { - if (!(m_flags & kSetRex)) { - set(REX()); - } - } - - void set_vex_modrm_and_rex_for_rip_plus_s32(uint8_t reg, - s32 offset, - VEX3::LeadingBytes lb = VEX3::LeadingBytes::P_0F, - bool rex_w = false) { - bool rex_r = false; - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = 5; // use the RIP addressing mode - set(modrm); - - if (rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, false, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_w); - set(VEX2(rex_r)); - } - - set_disp(Imm(4, offset)); - } - - /*! - * Set up modrm and rex for the commonly used immediate displacement indexing mode. - */ - void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) { - ModRM modrm; - - bool rex_r = false; - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - modrm.reg_op = reg; - - modrm.mod = mod; - - modrm.rm = 4; // use sib - - SIB sib; - sib.scale = 0; - sib.index = 4; - bool rex_b = false; - if (rm >= 8) { - rex_b = true; - rm -= 8; - } - - sib.base = rm; - - set(modrm); - set(sib); - - if (rex_r || rex_w || rex_b) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - /*! - * Get the position of the disp immediate relative to the start of the instruction - */ - int offset_of_disp() const { - if (m_flags & kIsNull) - return 0; - ASSERT(m_flags & kSetDispImm); - int offset = 0; - offset += n_vex; - if (m_flags & kSetRex) - offset++; - offset++; // opcode - if (m_flags & kOp2Set) - offset++; - if (m_flags & kOp3Set) - offset++; - if (m_flags & kSetModrm) - offset++; - if (m_flags & kSetSib) - offset++; - return offset; - } - - /*! - * Get the position of the imm immediate relative to the start of the instruction - */ - int offset_of_imm() const { - if (m_flags & kIsNull) - return 0; - ASSERT(m_flags & kSetImm); - int offset = 0; - offset += n_vex; - if (m_flags & kSetRex) - offset++; - offset++; // opcode - if (m_flags & kOp2Set) - offset++; - if (m_flags & kOp3Set) - offset++; - if (m_flags & kSetModrm) - offset++; - if (m_flags & kSetSib) - offset++; - if (m_flags & kSetDispImm) - offset += disp.size; - return offset; - } - /*! * Emit into a buffer and return how many bytes written (can be zero) */ - uint8_t emit(uint8_t* buffer) const { - if (m_flags & kIsNull) - return 0; - uint8_t count = 0; - - for (int i = 0; i < n_vex; i++) { - buffer[count++] = vex[i]; - } - - if (m_flags & kSetRex) { - buffer[count++] = m_rex; - } - - buffer[count++] = op; - - if (m_flags & kOp2Set) { - buffer[count++] = op2; - } - - if (m_flags & kOp3Set) { - buffer[count++] = op3; - } - - if (m_flags & kSetModrm) { - buffer[count++] = m_modrm; - } - - if (m_flags & kSetSib) { - buffer[count++] = m_sib; - } - - if (m_flags & kSetDispImm) { - for (int i = 0; i < disp.size; i++) { - buffer[count++] = disp.v_arr[i]; - } - } - - if (m_flags & kSetImm) { - for (int i = 0; i < imm.size; i++) { - buffer[count++] = imm.v_arr[i]; - } - } - return count; - } - - uint8_t length() const { - if (m_flags & kIsNull) - return 0; - uint8_t count = 0; - - count += n_vex; - - if (m_flags & kSetRex) { - count++; - } - - count++; - - if (m_flags & kOp2Set) { - count++; - } - - if (m_flags & kOp3Set) { - count++; - } - - if (m_flags & kSetModrm) { - count++; - } - - if (m_flags & kSetSib) { - count++; - } - - if (m_flags & kSetDispImm) { - for (int i = 0; i < disp.size; i++) { - count++; - } - } - - if (m_flags & kSetImm) { - for (int i = 0; i < imm.size; i++) { - count++; - } - } - return count; - } + virtual u8 emit(u8* buffer) const = 0; + virtual u8 length() const = 0; }; } // namespace emitter - -#endif // JAK_INSTRUCTION_H diff --git a/goalc/emitter/InstructionARM64.h b/goalc/emitter/InstructionARM64.h new file mode 100644 index 00000000000..a39d5bb67a3 --- /dev/null +++ b/goalc/emitter/InstructionARM64.h @@ -0,0 +1,29 @@ +#ifdef __aarch64__ + +#pragma once + +#include +#include "Instruction.h" + +namespace emitter { +struct InstructionARM64 : Instruction { + // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a single 32-bit word in that stream. + // The encoding of an ARM instruction is: + // TODO + // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf + u32 instruction_encoding; + + InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} + + uint8_t emit(uint8_t* buffer) const override { + memcpy(buffer, &instruction_encoding, 4); + return 4; + } + + uint8_t length() const override { + return 4; + } + +}; +} // namespace emitter +#endif \ No newline at end of file diff --git a/goalc/emitter/InstructionX86.h b/goalc/emitter/InstructionX86.h new file mode 100644 index 00000000000..3eda421f318 --- /dev/null +++ b/goalc/emitter/InstructionX86.h @@ -0,0 +1,1011 @@ +#pragma once + +#include "Instruction.h" + +#include "common/util/Assert.h" + +namespace emitter { +/*! + * The ModRM byte + */ +struct ModRM { + uint8_t mod; + uint8_t reg_op; + uint8_t rm; + + uint8_t operator()() const { return (mod << 6) | (reg_op << 3) | (rm << 0); } +}; + +/*! + * The SIB Byte + */ +struct SIB { + uint8_t scale, index, base; + + uint8_t operator()() const { return (scale << 6) | (index << 3) | (base << 0); } +}; + +/*! + * An Immediate (either imm or disp) + */ +struct Imm { + Imm() = default; + Imm(uint8_t sz, uint64_t v) : size(sz), value(v) {} + uint8_t size; + union { + uint64_t value; + uint8_t v_arr[8]; + }; +}; + +/*! + * The REX prefix byte + */ +struct REX { + explicit REX(bool w = false, bool r = false, bool x = false, bool b = false) + : W(w), R(r), X(x), B(b) {} + // W - 64-bit operands + // R - reg extension + // X - SIB i extnsion + // B - other extension + bool W, R, X, B; + uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); } +}; + +enum class VexPrefix : u8 { P_NONE = 0, P_66 = 1, P_F3 = 2, P_F2 = 3 }; + +/*! + * The "VEX" 3-byte format for AVX instructions + */ +struct VEX3 { + bool W, R, X, B; + enum class LeadingBytes : u8 { P_INVALID = 0, P_0F = 1, P_0F_38 = 2, P_0F_3A = 3 } leading_bytes; + u8 reg_id; + VexPrefix prefix; + bool L; + + u8 emit(u8 byte) const { + if (byte == 0) { + return 0b11000100; + } else if (byte == 1) { + u8 result = 0; + result |= ((!R) << 7); + result |= ((!X) << 6); + result |= ((!B) << 5); + result |= (0b11111 & u8(leading_bytes)); + return result; + } else if (byte == 2) { + u8 result = 0; + result |= (W << 7); // this may be inverted? + result |= ((~reg_id) & 0b1111) << 3; + result |= (L << 2); + result |= (u8(prefix) & 0b11); + return result; + } else { + ASSERT(false); + return -1; + } + } + + VEX3(bool w, + bool r, + bool x, + bool b, + LeadingBytes _leading_bytes, + u8 _reg_id = 0, + VexPrefix _prefix = VexPrefix::P_NONE, + bool l = false) + : W(w), + R(r), + X(x), + B(b), + leading_bytes(_leading_bytes), + reg_id(_reg_id), + prefix(_prefix), + L(l) {} +}; + +struct VEX2 { + bool R; + u8 reg_id; + VexPrefix prefix; + bool L; + + u8 emit(u8 byte) const { + if (byte == 0) { + return 0b11000101; + } else if (byte == 1) { + u8 result = 0; + result |= ((!R) << 7); + result |= ((~reg_id) & 0b1111) << 3; + result |= (L << 2); + result |= (u8(prefix) & 0b11); + return result; + } else { + ASSERT(false); + return -1; + } + } + + VEX2(bool r, u8 _reg_id = 0, VexPrefix _prefix = VexPrefix::P_NONE, bool l = false) + : R(r), reg_id(_reg_id), prefix(_prefix), L(l) {} +}; + +struct InstructionX86 : Instruction { + enum Flags { + kOp2Set = (1 << 0), + kOp3Set = (1 << 1), + kIsNull = (1 << 2), + kSetRex = (1 << 3), + kSetModrm = (1 << 4), + kSetSib = (1 << 5), + kSetDispImm = (1 << 6), + kSetImm = (1 << 7), + }; + + InstructionX86(u8 opcode) : op(opcode) {} + + u8 op; + + u8 m_flags = 0; + + u8 op2; + + u8 op3; + + u8 n_vex = 0; + u8 vex[3] = {0, 0, 0}; + + // the rex byte + u8 m_rex = 0; + + // the modrm byte + u8 m_modrm = 0; + + // the sib byte + u8 m_sib = 0; + + // the displacement + Imm disp; + + // the immediate + Imm imm; + + /*! + * Move opcode byte 0 to before the rex prefix. + */ + void swap_op0_rex() { + if (!(m_flags & kSetRex)) + return; + auto temp = op; + op = m_rex; + m_rex = temp; + } + + void set(REX r) { + m_rex = r(); + m_flags |= kSetRex; + } + + void set(ModRM modrm) { + m_modrm = modrm(); + m_flags |= kSetModrm; + } + + void set(SIB sib) { + m_sib = sib(); + m_flags |= kSetSib; + } + + void set(VEX3 vex3) { + n_vex = 3; + for (int i = 0; i < n_vex; i++) { + vex[i] = vex3.emit(i); + } + } + + void set(VEX2 vex2) { + n_vex = 2; + for (int i = 0; i < n_vex; i++) { + vex[i] = vex2.emit(i); + } + } + + void set_disp(Imm i) { + disp = i; + m_flags |= kSetDispImm; + } + + void set(Imm i) { + imm = i; + m_flags |= kSetImm; + } + + void set_op2(uint8_t b) { + m_flags |= kOp2Set; + op2 = b; + } + + void set_op3(uint8_t b) { + m_flags |= kOp3Set; + op3 = b; + } + + int get_imm_size() const { + if (m_flags & kSetImm) { + return imm.size; + } else { + return 0; + } + } + + int get_disp_size() const { + if (m_flags & kSetDispImm) { + return disp.size; + } else { + return 0; + } + } + + /*! + * Set modrm and rex as needed for two regs. + */ + void set_modrm_and_rex(uint8_t reg, uint8_t rm, uint8_t mod, bool rex_w = false) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = mod; + modrm.reg_op = reg; + modrm.rm = rm; + + set(modrm); + + if (rex_b || rex_w || rex_r) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + void set_vex_modrm_and_rex(uint8_t reg, + uint8_t rm, + VEX3::LeadingBytes lb, + uint8_t vex_reg = 0, + bool rex_w = false, + VexPrefix prefix = VexPrefix::P_NONE) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 3; + modrm.reg_op = reg; + modrm.rm = rm; + + set(modrm); + if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, rex_b, lb, vex_reg, prefix)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + set(VEX2(rex_r, vex_reg, prefix)); + } + } + + /*! + * Set VEX prefix for REX as needed for two registers. + */ + void set_vex_modrm_and_rex(uint8_t reg, + uint8_t rm, + uint8_t mod, + VEX3::LeadingBytes lb, + bool rex_w = false) { + bool rex_b = false; + bool rex_r = false; + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = mod; + modrm.reg_op = reg; + modrm.rm = rm; + set(modrm); + if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, rex_b, lb)); + } else { + // can get away with two byte version + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + set(VEX2(rex_r)); + } + } + + void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s8 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 1; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(1, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s8 offset, + VEX3::LeadingBytes lb, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 1; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(1, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s32 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 2; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(4, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s32 offset, + VEX3::LeadingBytes lb, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 2; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(4, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + bool rex_w = false, + bool rex_always = false) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + if (addr1 == 5 && addr2 == 5) { + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + modrm.mod = 1; + set_disp(Imm(1, 0)); + + } else { + // default addr1 in index + bool flipped = (addr1 == 4) || (addr2 == 5); + + if (flipped) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.base != 5); + ASSERT(sib.index != 4); + } + + if (rex_b || rex_w || rex_r || rex_x || rex_always) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + VEX3::LeadingBytes lb, + bool rex_w = false) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + if (addr1 == 5 && addr2 == 5) { + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + modrm.mod = 1; + set_disp(Imm(1, 0)); + + } else { + // default addr1 in index + bool flipped = (addr1 == 4) || (addr2 == 5); + + if (flipped) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.base != 5); + ASSERT(sib.index != 4); + } + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + } + + /*! + * Set modrm and rex as needed for two regs for an addressing mode. + * Will set SIB if R12 or RSP indexing is used. + */ + void set_modrm_and_rex_for_reg_addr(uint8_t reg, uint8_t rm, bool rex_w = false) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = rm; + + if (rm == 4) { + SIB sib; + sib.scale = 0; + sib.base = 4; + sib.index = 4; + + set(sib); + } + + if (rm == 5) { + modrm.mod = 1; // 1 byte imm + set_disp(Imm(1, 0)); + } + + set(modrm); + if (rex_b || rex_w || rex_r) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + void set_modrm_and_rex_for_rip_plus_s32(uint8_t reg, s32 offset, bool rex_w = false) { + bool rex_r = false; + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = 5; // use the RIP addressing mode + set(modrm); + + if (rex_r || rex_w) { + set(REX(rex_w, rex_r, false, false)); + } + + set_disp(Imm(4, offset)); + } + + void add_rex() { + if (!(m_flags & kSetRex)) { + set(REX()); + } + } + + void set_vex_modrm_and_rex_for_rip_plus_s32(uint8_t reg, + s32 offset, + VEX3::LeadingBytes lb = VEX3::LeadingBytes::P_0F, + bool rex_w = false) { + bool rex_r = false; + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = 5; // use the RIP addressing mode + set(modrm); + + if (rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, false, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_w); + set(VEX2(rex_r)); + } + + set_disp(Imm(4, offset)); + } + + /*! + * Set up modrm and rex for the commonly used immediate displacement indexing mode. + */ + void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) { + ModRM modrm; + + bool rex_r = false; + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + modrm.reg_op = reg; + + modrm.mod = mod; + + modrm.rm = 4; // use sib + + SIB sib; + sib.scale = 0; + sib.index = 4; + bool rex_b = false; + if (rm >= 8) { + rex_b = true; + rm -= 8; + } + + sib.base = rm; + + set(modrm); + set(sib); + + if (rex_r || rex_w || rex_b) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + /*! + * Get the position of the disp immediate relative to the start of the instruction + */ + int offset_of_disp() const { + if (m_flags & kIsNull) + return 0; + ASSERT(m_flags & kSetDispImm); + int offset = 0; + offset += n_vex; + if (m_flags & kSetRex) + offset++; + offset++; // opcode + if (m_flags & kOp2Set) + offset++; + if (m_flags & kOp3Set) + offset++; + if (m_flags & kSetModrm) + offset++; + if (m_flags & kSetSib) + offset++; + return offset; + } + + /*! + * Get the position of the imm immediate relative to the start of the instruction + */ + int offset_of_imm() const { + if (m_flags & kIsNull) + return 0; + ASSERT(m_flags & kSetImm); + int offset = 0; + offset += n_vex; + if (m_flags & kSetRex) + offset++; + offset++; // opcode + if (m_flags & kOp2Set) + offset++; + if (m_flags & kOp3Set) + offset++; + if (m_flags & kSetModrm) + offset++; + if (m_flags & kSetSib) + offset++; + if (m_flags & kSetDispImm) + offset += disp.size; + return offset; + } + + uint8_t emit(uint8_t* buffer) const override { + if (m_flags & kIsNull) + return 0; + uint8_t count = 0; + + for (int i = 0; i < n_vex; i++) { + buffer[count++] = vex[i]; + } + + if (m_flags & kSetRex) { + buffer[count++] = m_rex; + } + + buffer[count++] = op; + + if (m_flags & kOp2Set) { + buffer[count++] = op2; + } + + if (m_flags & kOp3Set) { + buffer[count++] = op3; + } + + if (m_flags & kSetModrm) { + buffer[count++] = m_modrm; + } + + if (m_flags & kSetSib) { + buffer[count++] = m_sib; + } + + if (m_flags & kSetDispImm) { + for (int i = 0; i < disp.size; i++) { + buffer[count++] = disp.v_arr[i]; + } + } + + if (m_flags & kSetImm) { + for (int i = 0; i < imm.size; i++) { + buffer[count++] = imm.v_arr[i]; + } + } + return count; + } + + uint8_t length() const override { + if (m_flags & kIsNull) + return 0; + uint8_t count = 0; + + count += n_vex; + + if (m_flags & kSetRex) { + count++; + } + + count++; + + if (m_flags & kOp2Set) { + count++; + } + + if (m_flags & kOp3Set) { + count++; + } + + if (m_flags & kSetModrm) { + count++; + } + + if (m_flags & kSetSib) { + count++; + } + + if (m_flags & kSetDispImm) { + for (int i = 0; i < disp.size; i++) { + count++; + } + } + + if (m_flags & kSetImm) { + for (int i = 0; i < imm.size; i++) { + count++; + } + } + return count; + } +}; +} // namespace emitter diff --git a/goalc/emitter/ObjectGenerator.cpp b/goalc/emitter/ObjectGenerator.cpp index 64b6e764ecb..6735c6db738 100644 --- a/goalc/emitter/ObjectGenerator.cpp +++ b/goalc/emitter/ObjectGenerator.cpp @@ -386,6 +386,7 @@ void ObjectGenerator::handle_temp_static_ptr_links(int seg) { * m_jump_temp_links_by_seg patching after memory layout is done */ void ObjectGenerator::handle_temp_jump_links(int seg) { + #ifndef __aarch64__ for (const auto& link : m_jump_temp_links_by_seg.at(seg)) { // we need to compute three offsets, all relative to the start of data. // 1). the location of the patch (the immediate of the opcode) @@ -411,6 +412,10 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { patch_data(seg, patch_location, dest_rip - source_rip); } + #else + // TODO - ARM64 + #endif + } /*! @@ -419,6 +424,7 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { * after memory layout is done and before link tables are generated */ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { + #ifndef __aarch64__ for (const auto& links : m_symbol_instr_temp_links_by_seg.at(seg)) { const auto& sym_name = links.first; for (const auto& link : links.second) { @@ -436,6 +442,10 @@ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { m_sym_links_by_seg.at(seg)[sym_name].push_back(offset_of_instruction + offset_in_instruction); } } + #else + // TODO - ARM64 + #endif + } void ObjectGenerator::handle_temp_rip_func_links(int seg) { @@ -539,6 +549,7 @@ void ObjectGenerator::emit_link_ptr(int seg) { } void ObjectGenerator::emit_link_rip(int seg) { + #ifndef __aarch64__ auto& out = m_link_by_seg.at(seg); for (auto& rec : m_rip_links_by_seg.at(seg)) { // kind (u8) @@ -564,6 +575,10 @@ void ObjectGenerator::emit_link_rip(int seg) { src_func.instruction_to_byte_in_data.at(rec.instr.instr_id) + src_instr.offset_of_disp(), out); } + #else + // TODO - ARM64 + #endif + } void ObjectGenerator::emit_link_table(int seg, const TypeSystem* ts) { diff --git a/test/test_CodeTester.cpp b/test/test_CodeTester.cpp index a70c1e77eb1..a18ffb60ebc 100644 --- a/test/test_CodeTester.cpp +++ b/test/test_CodeTester.cpp @@ -13,23 +13,23 @@ using namespace emitter; -TEST(CodeTester, prologue) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit_push_all_gprs(); - // check we generate the right code for pushing all gpr's - EXPECT_EQ(tester.dump_to_hex_string(), - "50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57"); -} - -TEST(CodeTester, epilogue) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit_pop_all_gprs(); - // check we generate the right code for popping all gpr's - EXPECT_EQ(tester.dump_to_hex_string(), - "41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58"); -} +// TEST(CodeTester, prologue) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit_push_all_gprs(); +// // check we generate the right code for pushing all gpr's +// EXPECT_EQ(tester.dump_to_hex_string(), +// "50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57"); +// } + +// TEST(CodeTester, epilogue) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit_pop_all_gprs(); +// // check we generate the right code for popping all gpr's +// EXPECT_EQ(tester.dump_to_hex_string(), +// "41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58"); +// } TEST(CodeTester, execute_return) { CodeTester tester; diff --git a/test/test_emitter.cpp b/test/test_emitter.cpp index b39ce889a7e..e59486c4516 100644 --- a/test/test_emitter.cpp +++ b/test/test_emitter.cpp @@ -1,3901 +1,3901 @@ -#include "goalc/emitter/CodeTester.h" -#include "goalc/emitter/IGen.h" -#include "gtest/gtest.h" - -using namespace emitter; - -TEST(EmitterIntegerMath, add_gpr64_imm8s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val + imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::add_gpr64_imm8s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::add_gpr64_imm8s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 83 c4 0c"); -} - -TEST(EmitterIntegerMath, add_gpr64_imm32s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val + imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::add_gpr64_imm32s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::add_gpr64_imm32s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 81 c4 0c 00 00 00"); -} - -TEST(EmitterIntegerMath, sub_gpr64_imm8s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val - imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::sub_gpr64_imm8s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::sub_gpr64_imm8s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 83 ec 0c"); -} - -TEST(EmitterIntegerMath, sub_gpr64_imm32s) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; - std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; - - // test the ones that aren't rsp - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (auto val : vals) { - for (auto imm : imms) { - auto expected = val - imm; - - tester.clear(); - tester.emit_push_all_gprs(true); - - // move initial value to register - tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); - // do the add - tester.emit(IGen::sub_gpr64_imm32s(i, imm)); - // move for return - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute_ret(val, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - - tester.clear(); - tester.emit(IGen::sub_gpr64_imm32s(RSP, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 81 ec 0c 00 00 00"); -} - -TEST(EmitterIntegerMath, add_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 + v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::add_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, sub_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 - v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::sub_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, mul_gpr32_gpr32) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = { - 0, 1, -2, -20, 123123, INT32_MIN, INT32_MAX, INT32_MIN + 1, INT32_MAX - 1}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - // this is kind of weird behavior, but it's what the PS2 CPU does, I think. - // the lower 32-bits of the result are sign extended, even if this sign doesn't match - // the sign of the real product. This is true for both signed and unsigned multiply. - auto expected = ((s64(v1) * s64(v2)) << 32) >> 32; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, (s64)v1)); - tester.emit(IGen::mov_gpr64_u64(j, (s64)v2)); - tester.emit(IGen::imul_gpr32_gpr32(i, j)); - tester.emit(IGen::movsx_r64_r32(RAX, i)); // weird PS2 sign extend. - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, or_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 | v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::or_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, and_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 & v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::and_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, xor_gpr64_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (auto v1 : vals) { - for (auto v2 : vals) { - auto expected = v1 ^ v2; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::mov_gpr64_u64(j, v2)); - tester.emit(IGen::xor_gpr64_gpr64(i, j)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterIntegerMath, not_gpr64) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v1 : vals) { - auto expected = ~v1; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v1)); - tester.emit(IGen::not_gpr64(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } -} - -TEST(EmitterIntegerMath, shl_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v << sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::shl_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shr_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), - INT64_MAX, 117, 32, u64(-348473), 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::shr_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, sar_gpr64_cl) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP || i == RCX) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::mov_gpr64_u64(RCX, sa)); - tester.emit(IGen::sar_gpr64_cl(i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shl_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v << sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::shl_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, shr_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), - INT64_MAX, 117, 32, u64(-348473), 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::shr_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, sar_gpr64_u8) { - CodeTester tester; - tester.init_code_buffer(256); - std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, - INT64_MAX, 117, 32, -348473, 83747382}; - std::vector sas = {0, 1, 23, 53, 64}; - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (auto v : vals) { - for (auto sa : sas) { - auto expected = v >> sa; - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, v)); - tester.emit(IGen::sar_gpr64_u8(i, sa)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterIntegerMath, jumps) { - CodeTester tester; - tester.init_code_buffer(256); - - std::vector reads; - - auto x = IGen::jmp_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::je_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jne_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jle_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jge_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jl_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jg_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jbe_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jae_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::jb_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - x = IGen::ja_32(); - reads.push_back(tester.size() + x.offset_of_imm()); - tester.emit(x); - - for (auto off : reads) { - EXPECT_EQ(0, tester.read(off)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "E9000000000F84000000000F85000000000F8E000000000F8D000000000F8C000000000F8F000000000F86" - "000000000F83000000000F82000000000F8700000000"); -} - -TEST(EmitterIntegerMath, null) { - auto instr = IGen::null(); - EXPECT_EQ(0, instr.emit(nullptr)); -} - -TEST(EmitterLoadsAndStores, load_constant_64_and_move_gpr_gpr_64) { - std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; - - // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. - // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after - - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } - } -} - -TEST(EmitterLoadsAndStores, load_constant_32_unsigned) { - std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; - - // test loading 32-bit constants, with all upper 32-bits zero. - // this uses a different opcode than 64-bit loads. - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(r1, UINT64_MAX)); - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } -} - -TEST(EmitterLoadsAndStores, load_constant_32_signed) { - std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; - - // test loading signed 32-bit constants. for values < 0 this will sign extend. - CodeTester tester; - tester.init_code_buffer(256); - - for (auto constant : s32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_s32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 04 1e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f be 24 1e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f be 24 3e"); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f be 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 44 1e fd"); - - auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 84 1e fd ff ff ff"); - - auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 04 1e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b6 24 1e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b6 24 3e"); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b6 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 44 1e fd"); - - auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 84 1e fd ff ff ff"); - - auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); - EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); - EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 04 1e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f bf 24 1e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f bf 24 3e"); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f bf 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 44 1e fd"); - - auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 84 1e fd ff ff ff"); - - auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 04 1e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b7 24 1e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b7 24 3e"); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); - EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b7 24 3e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 44 1e fd"); - - auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 84 1e fd ff ff ff"); - - auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 44 1e fd"); - - auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 84 1e fd ff ff ff"); - - auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "8b 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "8b 44 1e fd"); - - auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "8b 84 1e fd ff ff ff"); - - auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); - EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); - EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_goal_ptr_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 04 1e"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(k, i, j)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 44 1e fd"); - - auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); - - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 84 1e fd ff ff ff"); - - auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // fill k with junk - if (k != i && k != j) { - tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); - } - - // load into k - tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); - - // move k to return register - tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; - - // run! - EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); - EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); - EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); - EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(RAX, RCX, RDX)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 14 01"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 54 01 0c"); - - auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 94 01 0c 00 00 00"); - - auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 7); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 44 01 0c"); - - auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 84 01 0c 00 00 00"); - - auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s16(0xff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12, 0xffffffff12341234, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 0x12341234); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 44 01 0c"); - - auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s32(0xffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "44 89 84 01 0c 00 00 00"); - - auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s32(0xffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 04 08"); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store! - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(i, j, k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24, 0xffffffff12341234, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], 0xffffffff12341234); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 44 01 0c"); - - auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - - tester.clear(); - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 84 01 0c 00 00 00"); - - auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); - - [[maybe_unused]] int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - if (k == RSP || k == j || k == i) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. - - // store - tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - // prepare the memory: - s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); - EXPECT_EQ(memory[2], 3); - EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); - EXPECT_EQ(memory[4], 1); - - iter++; - } - } - } -} - -TEST(EmitterLoadsAndStores, load64_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load64_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load64_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "488B050C000000488B0D0C000000488B150C000000488B1D0C000000488B250C000000488B2D0C00000048" - "8B350C000000488B3D0C0000004C8B050C0000004C8B0D0C0000004C8B150C0000004C8B1D0C0000004C8B" - "250C0000004C8B2D0C0000004C8B350C0000004C8B3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load32s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load32s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 63 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load32s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "4863050C00000048630D0C0000004863150C00000048631D0C0000004863250C00000048632D0C00000048" - "63350C00000048633D0C0000004C63050C0000004C630D0C0000004C63150C0000004C631D0C0000004C63" - "250C0000004C632D0C0000004C63350C0000004C633D0C000000"); -} - -TEST(EmitterLoadsAndStores, load32u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load32u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "8b 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load32u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "8B050C0000008B0D0C0000008B150C0000008B1D0C0000008B250C0000008B2D0C0000008B350C0000008B" - "3D0C000000448B050C000000448B0D0C000000448B150C000000448B1D0C000000448B250C000000448B2D" - "0C000000448B350C000000448B3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load16u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load16u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load16u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FB7050C000000480FB70D0C000000480FB7150C000000480FB71D0C000000480FB7250C000000480FB7" - "2D0C000000480FB7350C000000480FB73D0C0000004C0FB7050C0000004C0FB70D0C0000004C0FB7150C00" - "00004C0FB71D0C0000004C0FB7250C0000004C0FB72D0C0000004C0FB7350C0000004C0FB73D0C000000"); -} - -TEST(EmitterLoadsAndStores, load16s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load16s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load16s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FBF050C000000480FBF0D0C000000480FBF150C000000480FBF1D0C000000480FBF250C000000480FBF" - "2D0C000000480FBF350C000000480FBF3D0C0000004C0FBF050C0000004C0FBF0D0C0000004C0FBF150C00" - "00004C0FBF1D0C0000004C0FBF250C0000004C0FBF2D0C0000004C0FBF350C0000004C0FBF3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load8s_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load8s_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load8s_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FBE050C000000480FBE0D0C000000480FBE150C000000480FBE1D0C000000480FBE250C000000480FBE" - "2D0C000000480FBE350C000000480FBE3D0C0000004C0FBE050C0000004C0FBE0D0C0000004C0FBE150C00" - "00004C0FBE1D0C0000004C0FBE250C0000004C0FBE2D0C0000004C0FBE350C0000004C0FBE3D0C000000"); -} - -TEST(EmitterLoadsAndStores, load8u_rip) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::load8u_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::load8u_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "480FB6050C000000480FB60D0C000000480FB6150C000000480FB61D0C000000480FB6250C000000480FB6" - "2D0C000000480FB6350C000000480FB63D0C0000004C0FB6050C0000004C0FB60D0C0000004C0FB6150C00" - "00004C0FB61D0C0000004C0FB6250C0000004C0FB62D0C0000004C0FB6350C0000004C0FB63D0C000000"); -} - -TEST(EmitterLoadsAndStores, store64_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store64_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "48 89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store64_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "4889050C00000048890D0C0000004889150C00000048891D0C0000004889250C00000048892D0C00000048" - "89350C00000048893D0C0000004C89050C0000004C890D0C0000004C89150C0000004C891D0C0000004C89" - "250C0000004C892D0C0000004C89350C0000004C893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store32_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store32_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store32_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "89050C000000890D0C00000089150C000000891D0C00000089250C000000892D0C00000089350C00000089" - "3D0C0000004489050C00000044890D0C0000004489150C00000044891D0C0000004489250C00000044892D" - "0C0000004489350C00000044893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store16_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store16_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "66 89 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store16_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "6689050C00000066890D0C0000006689150C00000066891D0C0000006689250C00000066892D0C00000066" - "89350C00000066893D0C000000664489050C0000006644890D0C000000664489150C0000006644891D0C00" - "0000664489250C0000006644892D0C000000664489350C0000006644893D0C000000"); -} - -TEST(EmitterLoadsAndStores, store8_rip_s32) { - CodeTester tester; - tester.init_code_buffer(256); - tester.emit(IGen::store8_rip_s32(RAX, 12)); - EXPECT_EQ(tester.dump_to_hex_string(), "88 05 0c 00 00 00"); - - tester.clear(); - for (int i = 0; i < 16; i++) { - tester.emit(IGen::store8_rip_s32(i, 12)); - } - - EXPECT_EQ(tester.dump_to_hex_string(true), - "88050C000000880D0C00000088150C000000881D0C0000004088250C00000040882D0C0000004088350C00" - "000040883D0C0000004488050C00000044880D0C0000004488150C00000044881D0C0000004488250C0000" - "0044882D0C0000004488350C00000044883D0C000000"); -} - -TEST(EmitterLoadsAndStores, static_addr) { - CodeTester tester; - tester.init_code_buffer(512); - - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(i, 12345)); // load test reg with junk - int start_of_lea = tester.size(); - auto lea_instr = IGen::static_addr(i, INT32_MAX); - tester.emit(lea_instr); - // patch instruction to lea the start of this code + 1. - tester.write(-start_of_lea - lea_instr.length() + 1, - start_of_lea + lea_instr.offset_of_disp()); - tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - - auto result = tester.execute(); - EXPECT_EQ(result, (u64)(tester.data()) + 1); - } -} - -#ifdef __linux__ -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM3, RAX, RBX)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 1c 03"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM0 + k, i, j)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float), 0, 0), 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float), 0, 0), 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float), 0, 0), 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float), 0, 0), 0); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RAX, RBX, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 5c 03 ff"); - - auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RBX, RSI, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM0 + k, i, j, -3)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, 0), 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, 0, 0), 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + 3, 0, 0), 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) + 3, 0, 0), 0); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RAX, RBX, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 9c 03 ff ff ff ff"); - - auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RBX, RSI, -1234); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); - - // fill k with junk - tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop args into appropriate register - tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 - tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 - - s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; - - // load into k - tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM0 + k, i, j, offset)); - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) - offset, 0, 0), - 3.45f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) - offset, 0, 0), - 1.23f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) - offset, 0, 0), - 5.67f); - EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) - offset, 0, 0), - 0); - iter++; - } - } - } -} - -namespace { -template -float as_float(T x) { - float result; - memcpy(&result, &x, sizeof(float)); - return result; -} - -u32 as_u32(float x) { - u32 result; - memcpy(&result, &x, 4); - return result; -} -} // namespace - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(RAX, RBX, XMM7)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 3c 03"); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - // store - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(i, j, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s8) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RAX, RBX, XMM3, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 5c 03 ff"); - - auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RBX, RSI, XMM3, -3); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - s64 offset = (iter & 1) ? INT8_MAX : INT8_MIN; - - // load into k - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(i, j, XMM0 + k, offset)); - - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s32) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RAX, RBX, XMM3, -1)); - EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 9c 03 ff ff ff ff"); - - auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RBX, RSI, XMM3, -1234); - u8 buff[256]; - instr.emit(buff); - EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); - - int iter = 0; - for (int i = 0; i < 16; i++) { - if (i == RSP) { - continue; - } - for (int j = 0; j < 16; j++) { - if (j == RSP || j == i) { - continue; - } - for (int k = 0; k < 16; k++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // push args to the stack - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 - tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value - - // pop value into addr1 GPR - tester.emit(IGen::pop_gpr64(i)); - // move to XMM - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); - - // pop addrs - tester.emit(IGen::pop_gpr64(i)); - tester.emit(IGen::pop_gpr64(j)); - - s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; - - // load into k - tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(i, j, XMM0 + k, offset)); - - // move to return - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); - - // return! - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - - // prepare the memory: - float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; - - // run! - tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); - EXPECT_FLOAT_EQ(memory[2], 1.23f); - EXPECT_FLOAT_EQ(memory[3], 1.234f); - EXPECT_FLOAT_EQ(memory[4], 5.67f); - - iter++; - } - } - } -} - -TEST(EmitterXmm32, static_load_xmm32) { - CodeTester tester; - tester.init_code_buffer(512); - for (int i = 0; i < 16; i++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - - auto loc_of_load = tester.size(); - auto load_instr = IGen::static_load_xmm32(XMM0 + i, INT32_MAX); - - tester.emit(load_instr); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto loc_of_float = tester.emit_data(float(1.2345f)); - - // patch offset - tester.write(loc_of_float - loc_of_load - load_instr.length(), - loc_of_load + load_instr.offset_of_disp()); - - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, 1.2345f); - } -} - -TEST(EmitterXmm32, static_store_xmm32) { - CodeTester tester; - tester.init_code_buffer(512); - for (int i = 0; i < 16; i++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, tester.get_c_abi_arg_reg(0))); - - auto loc_of_store = tester.size(); - auto store_instr = IGen::static_store_xmm32(XMM0 + i, INT32_MAX); - - tester.emit(store_instr); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto loc_of_float = tester.emit_data(float(1.2345f)); - - tester.write(loc_of_float - loc_of_store - store_instr.length(), - loc_of_store + store_instr.offset_of_disp()); - tester.execute(as_u32(-44.567f), 0, 0, 0); - EXPECT_FLOAT_EQ(-44.567f, tester.read(loc_of_float)); - } -} - -TEST(EmitterXmm32, ucomiss) { - CodeTester tester; - tester.init_code_buffer(512); - tester.emit(IGen::cmp_flt_flt(XMM13, XMM14)); - EXPECT_EQ("45 0f 2e ee", tester.dump_to_hex_string()); -} - -TEST(EmitterXmm32, mul) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = f * g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::mulss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, div) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g / f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::divss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, add) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g + f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::addss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, sub) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; - - for (auto f : vals) { - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (i == j) { - continue; - } - auto expected = g - f; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &f, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); - tester.emit(IGen::subss_xmm_xmm(XMM0 + j, XMM0 + i)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_FLOAT_EQ(result, expected); - } - } - } - } -} - -TEST(EmitterXmm32, float_to_int) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, - 7.545f, 0.1f, 0.9f, -0.1f, -0.9f}; - - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (j == RSP) { - continue; - } - s32 expected = g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - u64 val = 0; - memcpy(&val, &g, sizeof(float)); - tester.emit(IGen::mov_gpr64_u64(RAX, val)); - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); - tester.emit(IGen::float_to_int32(j, XMM0 + i)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, j)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterXmm32, int_to_float) { - CodeTester tester; - tester.init_code_buffer(512); - - std::vector vals = {0, 1, -1, INT32_MAX, -3457343, 7, INT32_MIN}; - - for (auto g : vals) { - for (int i = 0; i < 16; i++) { - for (int j = 0; j < 16; j++) { - if (j == RSP) { - continue; - } - float expected = g; - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(j, g)); - tester.emit(IGen::int32_to_float(XMM0 + i, j)); - tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - auto result = tester.execute_ret(0, 0, 0, 0); - EXPECT_EQ(result, expected); - } - } - } -} - -TEST(EmitterSlow, xmm32_move) { - std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; - - // test moving between xmms (32-bit) and gprs. - CodeTester tester; - tester.init_code_buffer(512); - - for (auto constant : u32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - for (int r3 = 0; r3 < 16; r3++) { - for (int r4 = 0; r4 < 16; r4++) { - tester.clear(); - tester.emit_push_all_xmms(); - tester.emit_push_all_gprs(true); - // move constant to gpr - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - // move gpr to xmm - tester.emit(IGen::movd_xmm32_gpr32(XMM0 + r3, r1)); - // move xmm to xmm - tester.emit(IGen::mov_xmm32_xmm32(XMM0 + r4, XMM0 + r3)); - // move xmm to gpr - tester.emit(IGen::movd_gpr32_xmm32(r2, XMM0 + r4)); - // return! - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_pop_all_gprs(true); - tester.emit_pop_all_xmms(); - tester.emit_return(); - } - } - } - } - } - // todo - finish this test -} -#endif - -TEST(Emitter, LEA) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3)); - tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3)); - tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3)); - tester.emit(IGen::lea_reg_plus_off(R13, R12, -3)); - tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300)); - tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300)); - tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300)); - tester.emit(IGen::lea_reg_plus_off(R13, R12, -300)); - EXPECT_EQ(tester.dump_to_hex_string(true), - "488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF" - "FF4D8DAC24D4FEFFFF"); -} - -TEST(EmitterXMM, StackLoad32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234)); - tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF"); -} - -TEST(EmitterXMM, StackLoad8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12)); - tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4"); -} - -TEST(EmitterXMM, StackLoadFull32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234)); - tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF"); -} - -TEST(EmitterXMM, StackLoadFull8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12)); - tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4"); -} - -TEST(EmitterXMM, StackStore32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234)); - tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF"); -} - -TEST(EmitterXMM, StackStore8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12)); - tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4"); -} - -TEST(EmitterXMM, StackStoreFull32) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234)); - tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF"); -} - -TEST(EmitterXMM, StackStoreFull8) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12)); - tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4"); -} - -TEST(EmitterXMM, SqrtS) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 2)); - tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 2)); - tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 12)); - tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 12)); - EXPECT_EQ(tester.dump_to_hex_string(true), "F30F51CAF3440F51DAF3410F51CCF3450F51DC"); -} +// #include "goalc/emitter/CodeTester.h" +// #include "goalc/emitter/IGen.h" +// #include "gtest/gtest.h" + +// using namespace emitter; + +// TEST(EmitterIntegerMath, add_gpr64_imm8s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val + imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::add_gpr64_imm8s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::add_gpr64_imm8s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 83 c4 0c"); +// } + +// TEST(EmitterIntegerMath, add_gpr64_imm32s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val + imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::add_gpr64_imm32s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::add_gpr64_imm32s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 81 c4 0c 00 00 00"); +// } + +// TEST(EmitterIntegerMath, sub_gpr64_imm8s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val - imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::sub_gpr64_imm8s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::sub_gpr64_imm8s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 83 ec 0c"); +// } + +// TEST(EmitterIntegerMath, sub_gpr64_imm32s) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; +// std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + +// // test the ones that aren't rsp +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (auto val : vals) { +// for (auto imm : imms) { +// auto expected = val - imm; + +// tester.clear(); +// tester.emit_push_all_gprs(true); + +// // move initial value to register +// tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); +// // do the add +// tester.emit(IGen::sub_gpr64_imm32s(i, imm)); +// // move for return +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute_ret(val, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// tester.clear(); +// tester.emit(IGen::sub_gpr64_imm32s(RSP, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 81 ec 0c 00 00 00"); +// } + +// TEST(EmitterIntegerMath, add_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 + v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::add_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sub_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 - v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::sub_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, mul_gpr32_gpr32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = { +// 0, 1, -2, -20, 123123, INT32_MIN, INT32_MAX, INT32_MIN + 1, INT32_MAX - 1}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// // this is kind of weird behavior, but it's what the PS2 CPU does, I think. +// // the lower 32-bits of the result are sign extended, even if this sign doesn't match +// // the sign of the real product. This is true for both signed and unsigned multiply. +// auto expected = ((s64(v1) * s64(v2)) << 32) >> 32; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, (s64)v1)); +// tester.emit(IGen::mov_gpr64_u64(j, (s64)v2)); +// tester.emit(IGen::imul_gpr32_gpr32(i, j)); +// tester.emit(IGen::movsx_r64_r32(RAX, i)); // weird PS2 sign extend. +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, or_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 | v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::or_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, and_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 & v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::and_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, xor_gpr64_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (auto v1 : vals) { +// for (auto v2 : vals) { +// auto expected = v1 ^ v2; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::mov_gpr64_u64(j, v2)); +// tester.emit(IGen::xor_gpr64_gpr64(i, j)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, not_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v1 : vals) { +// auto expected = ~v1; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v1)); +// tester.emit(IGen::not_gpr64(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } + +// TEST(EmitterIntegerMath, shl_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v << sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::shl_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shr_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), +// INT64_MAX, 117, 32, u64(-348473), 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::shr_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sar_gpr64_cl) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP || i == RCX) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::mov_gpr64_u64(RCX, sa)); +// tester.emit(IGen::sar_gpr64_cl(i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shl_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v << sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::shl_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, shr_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), +// INT64_MAX, 117, 32, u64(-348473), 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::shr_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, sar_gpr64_u8) { +// CodeTester tester; +// tester.init_code_buffer(256); +// std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, +// INT64_MAX, 117, 32, -348473, 83747382}; +// std::vector sas = {0, 1, 23, 53, 64}; + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (auto v : vals) { +// for (auto sa : sas) { +// auto expected = v >> sa; +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, v)); +// tester.emit(IGen::sar_gpr64_u8(i, sa)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterIntegerMath, jumps) { +// CodeTester tester; +// tester.init_code_buffer(256); + +// std::vector reads; + +// auto x = IGen::jmp_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::je_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jne_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jle_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jge_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jl_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jg_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jbe_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jae_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::jb_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// x = IGen::ja_32(); +// reads.push_back(tester.size() + x.offset_of_imm()); +// tester.emit(x); + +// for (auto off : reads) { +// EXPECT_EQ(0, tester.read(off)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "E9000000000F84000000000F85000000000F8E000000000F8D000000000F8C000000000F8F000000000F86" +// "000000000F83000000000F82000000000F8700000000"); +// } + +// TEST(EmitterIntegerMath, null) { +// auto instr = IGen::null(); +// EXPECT_EQ(0, instr.emit(nullptr)); +// } + +// TEST(EmitterLoadsAndStores, load_constant_64_and_move_gpr_gpr_64) { +// std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; + +// // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. +// // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after + +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : u64_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// for (int r2 = 0; r2 < 16; r2++) { +// if (r2 == RSP) { +// continue; +// } +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load_constant_32_unsigned) { +// std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; + +// // test loading 32-bit constants, with all upper 32-bits zero. +// // this uses a different opcode than 64-bit loads. +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : u64_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(r1, UINT64_MAX)); +// tester.emit(IGen::mov_gpr64_u32(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load_constant_32_signed) { +// std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; + +// // test loading signed 32-bit constants. for values < 0 this will sign extend. +// CodeTester tester; +// tester.init_code_buffer(256); + +// for (auto constant : s32_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_s32(r1, constant)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); +// EXPECT_EQ(tester.execute(), constant); +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f be 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f be 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f be 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 44 1e fd"); + +// auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 84 1e fd ff ff ff"); + +// auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b6 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b6 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b6 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 44 1e fd"); + +// auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 84 1e fd ff ff ff"); + +// auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f bf 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f bf 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f bf 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 44 1e fd"); + +// auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 84 1e fd ff ff ff"); + +// auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 04 1e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b7 24 1e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b7 24 3e"); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b7 24 3e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 44 1e fd"); + +// auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 84 1e fd ff ff ff"); + +// auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 44 1e fd"); + +// auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 84 1e fd ff ff ff"); + +// auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 44 1e fd"); + +// auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 84 1e fd ff ff ff"); + +// auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); +// EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); +// EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_goal_ptr_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 04 1e"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(k, i, j)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 44 1e fd"); + +// auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 84 1e fd ff ff ff"); + +// auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // fill k with junk +// if (k != i && k != j) { +// tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); +// } + +// // load into k +// tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + +// // move k to return register +// tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + +// // run! +// EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); +// EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); +// EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); +// EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(RAX, RCX, RDX)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 14 01"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 54 01 0c"); + +// auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 94 01 0c 00 00 00"); + +// auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 7); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 44 01 0c"); + +// auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s16(0xff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12, 0xffffffff12341234, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 0x12341234); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 44 01 0c"); + +// auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s32(0xffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "44 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s32(0xffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 04 08"); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store! +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(i, j, k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24, 0xffffffff12341234, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], 0xffffffff12341234); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 44 01 0c"); + +// auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// tester.clear(); +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 84 01 0c 00 00 00"); + +// auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + +// [[maybe_unused]] int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// if (k == RSP || k == j || k == i) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 +// tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + +// // store +// tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// // prepare the memory: +// s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); +// EXPECT_EQ(memory[2], 3); +// EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); +// EXPECT_EQ(memory[4], 1); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterLoadsAndStores, load64_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load64_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load64_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "488B050C000000488B0D0C000000488B150C000000488B1D0C000000488B250C000000488B2D0C00000048" +// "8B350C000000488B3D0C0000004C8B050C0000004C8B0D0C0000004C8B150C0000004C8B1D0C0000004C8B" +// "250C0000004C8B2D0C0000004C8B350C0000004C8B3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load32s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load32s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 63 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load32s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "4863050C00000048630D0C0000004863150C00000048631D0C0000004863250C00000048632D0C00000048" +// "63350C00000048633D0C0000004C63050C0000004C630D0C0000004C63150C0000004C631D0C0000004C63" +// "250C0000004C632D0C0000004C63350C0000004C633D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load32u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load32u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "8b 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load32u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "8B050C0000008B0D0C0000008B150C0000008B1D0C0000008B250C0000008B2D0C0000008B350C0000008B" +// "3D0C000000448B050C000000448B0D0C000000448B150C000000448B1D0C000000448B250C000000448B2D" +// "0C000000448B350C000000448B3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load16u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load16u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load16u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FB7050C000000480FB70D0C000000480FB7150C000000480FB71D0C000000480FB7250C000000480FB7" +// "2D0C000000480FB7350C000000480FB73D0C0000004C0FB7050C0000004C0FB70D0C0000004C0FB7150C00" +// "00004C0FB71D0C0000004C0FB7250C0000004C0FB72D0C0000004C0FB7350C0000004C0FB73D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load16s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load16s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load16s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FBF050C000000480FBF0D0C000000480FBF150C000000480FBF1D0C000000480FBF250C000000480FBF" +// "2D0C000000480FBF350C000000480FBF3D0C0000004C0FBF050C0000004C0FBF0D0C0000004C0FBF150C00" +// "00004C0FBF1D0C0000004C0FBF250C0000004C0FBF2D0C0000004C0FBF350C0000004C0FBF3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load8s_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load8s_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load8s_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FBE050C000000480FBE0D0C000000480FBE150C000000480FBE1D0C000000480FBE250C000000480FBE" +// "2D0C000000480FBE350C000000480FBE3D0C0000004C0FBE050C0000004C0FBE0D0C0000004C0FBE150C00" +// "00004C0FBE1D0C0000004C0FBE250C0000004C0FBE2D0C0000004C0FBE350C0000004C0FBE3D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, load8u_rip) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::load8u_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::load8u_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "480FB6050C000000480FB60D0C000000480FB6150C000000480FB61D0C000000480FB6250C000000480FB6" +// "2D0C000000480FB6350C000000480FB63D0C0000004C0FB6050C0000004C0FB60D0C0000004C0FB6150C00" +// "00004C0FB61D0C0000004C0FB6250C0000004C0FB62D0C0000004C0FB6350C0000004C0FB63D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store64_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store64_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "48 89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store64_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "4889050C00000048890D0C0000004889150C00000048891D0C0000004889250C00000048892D0C00000048" +// "89350C00000048893D0C0000004C89050C0000004C890D0C0000004C89150C0000004C891D0C0000004C89" +// "250C0000004C892D0C0000004C89350C0000004C893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store32_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store32_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store32_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "89050C000000890D0C00000089150C000000891D0C00000089250C000000892D0C00000089350C00000089" +// "3D0C0000004489050C00000044890D0C0000004489150C00000044891D0C0000004489250C00000044892D" +// "0C0000004489350C00000044893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store16_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store16_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "66 89 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store16_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "6689050C00000066890D0C0000006689150C00000066891D0C0000006689250C00000066892D0C00000066" +// "89350C00000066893D0C000000664489050C0000006644890D0C000000664489150C0000006644891D0C00" +// "0000664489250C0000006644892D0C000000664489350C0000006644893D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, store8_rip_s32) { +// CodeTester tester; +// tester.init_code_buffer(256); +// tester.emit(IGen::store8_rip_s32(RAX, 12)); +// EXPECT_EQ(tester.dump_to_hex_string(), "88 05 0c 00 00 00"); + +// tester.clear(); +// for (int i = 0; i < 16; i++) { +// tester.emit(IGen::store8_rip_s32(i, 12)); +// } + +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "88050C000000880D0C00000088150C000000881D0C0000004088250C00000040882D0C0000004088350C00" +// "000040883D0C0000004488050C00000044880D0C0000004488150C00000044881D0C0000004488250C0000" +// "0044882D0C0000004488350C00000044883D0C000000"); +// } + +// TEST(EmitterLoadsAndStores, static_addr) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// tester.clear(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(i, 12345)); // load test reg with junk +// int start_of_lea = tester.size(); +// auto lea_instr = IGen::static_addr(i, INT32_MAX); +// tester.emit(lea_instr); +// // patch instruction to lea the start of this code + 1. +// tester.write(-start_of_lea - lea_instr.length() + 1, +// start_of_lea + lea_instr.offset_of_disp()); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_return(); + +// auto result = tester.execute(); +// EXPECT_EQ(result, (u64)(tester.data()) + 1); +// } +// } + +// #ifdef __linux__ +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM3, RAX, RBX)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 1c 03"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM0 + k, i, j)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float), 0, 0), 3.45f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float), 0, 0), 1.23f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float), 0, 0), 5.67f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float), 0, 0), 0); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RAX, RBX, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 5c 03 ff"); + +// auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RBX, RSI, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM0 + k, i, j, -3)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, 0), 3.45f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, 0, 0), 1.23f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + 3, 0, 0), 5.67f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) + 3, 0, 0), 0); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RAX, RBX, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 9c 03 ff ff ff ff"); + +// auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RBX, RSI, -1234); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + +// // fill k with junk +// tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop args into appropriate register +// tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 +// tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + +// s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + +// // load into k +// tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM0 + k, i, j, offset)); +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) - offset, 0, 0), +// 3.45f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) - offset, 0, 0), +// 1.23f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) - offset, 0, 0), +// 5.67f); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) - offset, 0, 0), +// 0); +// iter++; +// } +// } +// } +// } + +// namespace { +// template +// float as_float(T x) { +// float result; +// memcpy(&result, &x, sizeof(float)); +// return result; +// } + +// u32 as_u32(float x) { +// u32 result; +// memcpy(&result, &x, 4); +// return result; +// } +// } // namespace + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(RAX, RBX, XMM7)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 3c 03"); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } + +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } + +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack + +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// // store +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(i, j, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s8) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RAX, RBX, XMM3, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 5c 03 ff"); + +// auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RBX, RSI, XMM3, -3); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// s64 offset = (iter & 1) ? INT8_MAX : INT8_MIN; + +// // load into k +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(i, j, XMM0 + k, offset)); + +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RAX, RBX, XMM3, -1)); +// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 9c 03 ff ff ff ff"); + +// auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RBX, RSI, XMM3, -1234); +// u8 buff[256]; +// instr.emit(buff); +// EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + +// int iter = 0; +// for (int i = 0; i < 16; i++) { +// if (i == RSP) { +// continue; +// } +// for (int j = 0; j < 16; j++) { +// if (j == RSP || j == i) { +// continue; +// } +// for (int k = 0; k < 16; k++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // push args to the stack +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 +// tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + +// // pop value into addr1 GPR +// tester.emit(IGen::pop_gpr64(i)); +// // move to XMM +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + +// // pop addrs +// tester.emit(IGen::pop_gpr64(i)); +// tester.emit(IGen::pop_gpr64(j)); + +// s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + +// // load into k +// tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(i, j, XMM0 + k, offset)); + +// // move to return +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + +// // return! +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); + +// // prepare the memory: +// float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + +// // run! +// tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); +// EXPECT_FLOAT_EQ(memory[2], 1.23f); +// EXPECT_FLOAT_EQ(memory[3], 1.234f); +// EXPECT_FLOAT_EQ(memory[4], 5.67f); + +// iter++; +// } +// } +// } +// } + +// TEST(EmitterXmm32, static_load_xmm32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// for (int i = 0; i < 16; i++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); + +// auto loc_of_load = tester.size(); +// auto load_instr = IGen::static_load_xmm32(XMM0 + i, INT32_MAX); + +// tester.emit(load_instr); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto loc_of_float = tester.emit_data(float(1.2345f)); + +// // patch offset +// tester.write(loc_of_float - loc_of_load - load_instr.length(), +// loc_of_load + load_instr.offset_of_disp()); + +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, 1.2345f); +// } +// } + +// TEST(EmitterXmm32, static_store_xmm32) { +// CodeTester tester; +// tester.init_code_buffer(512); +// for (int i = 0; i < 16; i++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, tester.get_c_abi_arg_reg(0))); + +// auto loc_of_store = tester.size(); +// auto store_instr = IGen::static_store_xmm32(XMM0 + i, INT32_MAX); + +// tester.emit(store_instr); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto loc_of_float = tester.emit_data(float(1.2345f)); + +// tester.write(loc_of_float - loc_of_store - store_instr.length(), +// loc_of_store + store_instr.offset_of_disp()); +// tester.execute(as_u32(-44.567f), 0, 0, 0); +// EXPECT_FLOAT_EQ(-44.567f, tester.read(loc_of_float)); +// } +// } + +// TEST(EmitterXmm32, ucomiss) { +// CodeTester tester; +// tester.init_code_buffer(512); +// tester.emit(IGen::cmp_flt_flt(XMM13, XMM14)); +// EXPECT_EQ("45 0f 2e ee", tester.dump_to_hex_string()); +// } + +// TEST(EmitterXmm32, mul) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = f * g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::mulss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, div) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g / f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::divss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, add) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g + f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::addss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, sub) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + +// for (auto f : vals) { +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (i == j) { +// continue; +// } +// auto expected = g - f; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &f, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); +// tester.emit(IGen::subss_xmm_xmm(XMM0 + j, XMM0 + i)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_FLOAT_EQ(result, expected); +// } +// } +// } +// } +// } + +// TEST(EmitterXmm32, float_to_int) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, +// 7.545f, 0.1f, 0.9f, -0.1f, -0.9f}; + +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (j == RSP) { +// continue; +// } +// s32 expected = g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// u64 val = 0; +// memcpy(&val, &g, sizeof(float)); +// tester.emit(IGen::mov_gpr64_u64(RAX, val)); +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); +// tester.emit(IGen::float_to_int32(j, XMM0 + i)); +// tester.emit(IGen::mov_gpr64_gpr64(RAX, j)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterXmm32, int_to_float) { +// CodeTester tester; +// tester.init_code_buffer(512); + +// std::vector vals = {0, 1, -1, INT32_MAX, -3457343, 7, INT32_MIN}; + +// for (auto g : vals) { +// for (int i = 0; i < 16; i++) { +// for (int j = 0; j < 16; j++) { +// if (j == RSP) { +// continue; +// } +// float expected = g; +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// tester.emit(IGen::mov_gpr64_u64(j, g)); +// tester.emit(IGen::int32_to_float(XMM0 + i, j)); +// tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// auto result = tester.execute_ret(0, 0, 0, 0); +// EXPECT_EQ(result, expected); +// } +// } +// } +// } + +// TEST(EmitterSlow, xmm32_move) { +// std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; + +// // test moving between xmms (32-bit) and gprs. +// CodeTester tester; +// tester.init_code_buffer(512); + +// for (auto constant : u32_constants) { +// for (int r1 = 0; r1 < 16; r1++) { +// if (r1 == RSP) { +// continue; +// } +// for (int r2 = 0; r2 < 16; r2++) { +// if (r2 == RSP) { +// continue; +// } +// for (int r3 = 0; r3 < 16; r3++) { +// for (int r4 = 0; r4 < 16; r4++) { +// tester.clear(); +// tester.emit_push_all_xmms(); +// tester.emit_push_all_gprs(true); +// // move constant to gpr +// tester.emit(IGen::mov_gpr64_u32(r1, constant)); +// // move gpr to xmm +// tester.emit(IGen::movd_xmm32_gpr32(XMM0 + r3, r1)); +// // move xmm to xmm +// tester.emit(IGen::mov_xmm32_xmm32(XMM0 + r4, XMM0 + r3)); +// // move xmm to gpr +// tester.emit(IGen::movd_gpr32_xmm32(r2, XMM0 + r4)); +// // return! +// tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); +// tester.emit_pop_all_gprs(true); +// tester.emit_pop_all_xmms(); +// tester.emit_return(); +// } +// } +// } +// } +// } +// // todo - finish this test +// } +// #endif + +// TEST(Emitter, LEA) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3)); +// tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3)); +// tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3)); +// tester.emit(IGen::lea_reg_plus_off(R13, R12, -3)); +// tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300)); +// tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300)); +// tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300)); +// tester.emit(IGen::lea_reg_plus_off(R13, R12, -300)); +// EXPECT_EQ(tester.dump_to_hex_string(true), +// "488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF" +// "FF4D8DAC24D4FEFFFF"); +// } + +// TEST(EmitterXMM, StackLoad32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234)); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackLoad8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12)); +// tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4"); +// } + +// TEST(EmitterXMM, StackLoadFull32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234)); +// tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackLoadFull8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12)); +// tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4"); +// } + +// TEST(EmitterXMM, StackStore32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234)); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackStore8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12)); +// tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4"); +// } + +// TEST(EmitterXMM, StackStoreFull32) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234)); +// tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF"); +// } + +// TEST(EmitterXMM, StackStoreFull8) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12)); +// tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4"); +// } + +// TEST(EmitterXMM, SqrtS) { +// CodeTester tester; +// tester.init_code_buffer(1024); +// tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 2)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 2)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 1, XMM0 + 12)); +// tester.emit(IGen::sqrts_xmm(XMM0 + 11, XMM0 + 12)); +// EXPECT_EQ(tester.dump_to_hex_string(true), "F30F51CAF3440F51DAF3410F51CCF3450F51DC"); +// } From e57d43080d3100b03bceaa6bfbf3bf8bd7bd444b Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Tue, 31 Oct 2023 23:54:56 -0300 Subject: [PATCH 05/12] arm-wip: figured out a minimally invasive instruction pattern, stub out arm instructions --- goalc/compiler/IR.cpp | 8 +- goalc/debugger/disassemble.h | 4 +- goalc/emitter/CodeTester.cpp | 2 +- goalc/emitter/IGenARM64.cpp | 1951 +++--------------------------- goalc/emitter/Instruction.h | 1046 +++++++++++++++- goalc/emitter/InstructionARM64.h | 29 - goalc/emitter/InstructionX86.h | 1011 ---------------- 7 files changed, 1247 insertions(+), 2804 deletions(-) delete mode 100644 goalc/emitter/InstructionARM64.h delete mode 100644 goalc/emitter/InstructionX86.h diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 124e3c20fa6..3e9f3cc4c8e 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -862,8 +862,8 @@ RegAllocInstr IR_ConditionalBranch::to_rai() { void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, const AllocationResult& allocs, emitter::IR_Record irec) { - Instruction jump_instr; #ifndef __aarch64__ + Instruction jump_instr; jump_instr = InstructionX86(0); ASSERT(m_resolved); switch (condition.kind) { @@ -905,9 +905,6 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, default: ASSERT(false); } - #else - // TODO - ARM64 - #endif if (condition.is_float) { gen->add_instr( @@ -921,6 +918,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, auto jump_rec = gen->add_instr(jump_instr, irec); gen->link_instruction_jump(jump_rec, gen->get_future_ir_record_in_same_func(irec, label.idx)); + #else + // TODO - ARM64 + #endif } ///////////////////// diff --git a/goalc/debugger/disassemble.h b/goalc/debugger/disassemble.h index d95d6ef121f..3985b09b300 100644 --- a/goalc/debugger/disassemble.h +++ b/goalc/debugger/disassemble.h @@ -22,10 +22,10 @@ struct InstructionInfo { int ir_idx = -1; int offset = -1; - InstructionInfo(const emitter::Instruction& _instruction, Kind _kind) + InstructionInfo(const emitter::Instruction _instruction, Kind _kind) : instruction(_instruction), kind(_kind) {} - InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx) + InstructionInfo(const emitter::Instruction _instruction, Kind _kind, int _ir_idx) : instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {} }; diff --git a/goalc/emitter/CodeTester.cpp b/goalc/emitter/CodeTester.cpp index 2f4aa81b335..7d104d74a40 100644 --- a/goalc/emitter/CodeTester.cpp +++ b/goalc/emitter/CodeTester.cpp @@ -50,7 +50,7 @@ std::string CodeTester::dump_to_hex_string(bool nospace) { /*! * Add an instruction to the buffer. */ -void CodeTester::emit(const Instruction& instr) { +void CodeTester::emit(const emitter::Instruction& instr) { code_buffer_size += instr.emit(code_buffer + code_buffer_size); ASSERT(code_buffer_size <= code_buffer_capacity); } diff --git a/goalc/emitter/IGenARM64.cpp b/goalc/emitter/IGenARM64.cpp index d0934f2ecd8..2363ac6123c 100644 --- a/goalc/emitter/IGenARM64.cpp +++ b/goalc/emitter/IGenARM64.cpp @@ -1,8 +1,7 @@ -#include "goalc/emitter/InstructionARM64.h" +#include #ifdef __aarch64__ #include "IGen.h" -#include "goalc/emitter/InstructionX86.h" namespace emitter { namespace IGen { @@ -13,49 +12,21 @@ namespace IGen { * Move data from src to dst. Moves all 64-bits of the GPR. */ Instruction mov_gpr64_gpr64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Move a 64-bit constant into a register. */ Instruction mov_gpr64_u64(Register dst, uint64_t val) { - ASSERT(dst.is_gpr()); - bool rex_b = false; - auto dst_hw_id = dst.hw_id(); - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - InstructionX86 instr(0xb8 + dst_hw_id); - instr.set(REX(true, false, false, rex_b)); - instr.set(Imm(8, val)); - return instr; + return Instruction(0b0); } /*! * Move a 32-bit constant into a register. Zeros the upper 32 bits. */ Instruction mov_gpr64_u32(Register dst, uint64_t val) { - ASSERT(val <= UINT32_MAX); - ASSERT(dst.is_gpr()); - auto dst_hw_id = dst.hw_id(); - bool rex_b = false; - if (dst_hw_id >= 8) { - dst_hw_id -= 8; - rex_b = true; - } - - InstructionX86 instr(0xb8 + dst_hw_id); - if (rex_b) { - instr.set(REX(false, false, false, rex_b)); - } - instr.set(Imm(4, val)); - return instr; + return Instruction(0b0); } /*! @@ -64,82 +35,42 @@ Instruction mov_gpr64_u32(Register dst, uint64_t val) { * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. */ Instruction mov_gpr64_s32(Register dst, int64_t val) { - ASSERT(val >= INT32_MIN && val <= INT32_MAX); - ASSERT(dst.is_gpr()); - InstructionX86 instr(0xc7); - instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); - instr.set(Imm(4, val)); - return instr; + return Instruction(0b0); } /*! * Move 32-bits of xmm to 32 bits of gpr (no sign extension). */ Instruction movd_gpr32_xmm32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Move 32-bits of gpr to 32-bits of xmm (no sign extension) */ Instruction movd_xmm32_gpr32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - InstructionX86 instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Move 64-bits of xmm to 64 bits of gpr (no sign extension). */ Instruction movq_gpr64_xmm64(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x7e); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Move 64-bits of gpr to 64-bits of xmm (no sign extension) */ Instruction movq_xmm64_gpr64(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - InstructionX86 instr(0x66); - instr.set_op2(0x0f); - instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Move 32-bits between xmm's */ Instruction mov_xmm32_xmm32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } // todo - GPR64 -> XMM64 (zext) @@ -155,108 +86,39 @@ Instruction mov_xmm32_xmm32(Register dst, Register src) { * Cannot use rsp. */ Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; + return Instruction(0b0); } Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; + return Instruction(0b0); } Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; + return Instruction(0b0); } Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x88); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - if (value.id() > RBX) { - instr.add_rex(); - } - return instr; + return Instruction(0b0); } /*! @@ -265,53 +127,21 @@ Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, * Cannot use rsp. */ Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; + return Instruction(0b0); } Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } /*! @@ -320,105 +150,39 @@ Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, * Cannot use rsp. */ Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; + return Instruction(0b0); } Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - instr.swap_op0_rex(); // why????? - return instr; + return Instruction(0b0); } Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; + return Instruction(0b0); } Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - instr.swap_op0_rex(); // why????? - return instr; + return Instruction(0b0); } Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } /*! @@ -427,53 +191,21 @@ Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, * Cannot use rsp. */ Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, - false); - return instr; + return Instruction(0b0); } Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } /*! @@ -482,95 +214,39 @@ Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, * Cannot use rsp. */ Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; + return Instruction(0b0); } Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; + return Instruction(0b0); } Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; + return Instruction(0b0); } Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x63); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; + return Instruction(0b0); } /*! @@ -579,49 +255,21 @@ Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, * Cannot use rsp. */ Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); - return instr; + return Instruction(0b0); } Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; + return Instruction(0b0); } Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - return instr; + return Instruction(0b0); } /*! @@ -630,168 +278,51 @@ Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, * Cannot use rsp. */ Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; + return Instruction(0b0); } Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), true); - return instr; + return Instruction(0b0); } Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register value, s64 offset) { - ASSERT(value.is_gpr()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, true); - return instr; + return Instruction(0b0); } Instruction store_goal_vf(Register addr, Register value, Register off, s64 offset) { - if (offset == 0) { - return storevf_gpr64_plus_gpr64(value, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return storevf_gpr64_plus_gpr64_plus_s8(value, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return storevf_gpr64_plus_gpr64_plus_s32(value, addr, off, offset); - } - ASSERT(false); - return InstructionX86(0); + return Instruction(0b0); } Instruction store_goal_gpr(Register addr, Register value, Register off, int offset, int size) { - switch (size) { - case 1: - if (offset == 0) { - return store8_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - return store16_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - return store32_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return store64_gpr64_gpr64_plus_gpr64(addr, off, value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); - } else { - ASSERT(false); - } - default: - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) { - if (offset == 0) { - return loadvf_gpr64_plus_gpr64(dst, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return loadvf_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } /*! @@ -804,332 +335,90 @@ Instruction load_goal_gpr(Register dst, int offset, int size, bool sign_extend) { - switch (size) { - case 1: - if (offset == 0) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 2: - if (offset == 0) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 4: - if (offset == 0) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); - } else { - return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); - } - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - } - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - if (sign_extend) { - return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } else { - return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - } - } else { - ASSERT(false); - } - case 8: - if (offset == 0) { - return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); - - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); - - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); - - } else { - ASSERT(false); - } - default: - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // LOADS n' STORES - XMM32 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, Register addr1, Register addr2, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true); - instr.set(Imm(4, offset)); - return instr; + return Instruction(0b0); } Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x8d); - instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true); - instr.set(Imm(1, offset)); - return instr; + return Instruction(0b0); } Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return lea_reg_plus_off8(dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return lea_reg_plus_off32(dest, base, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, Register addr1, Register addr2, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(base.is_gpr()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { - if (offset == 0) { - return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { - if (offset == 0) { - return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_value.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { - ASSERT(base.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1140,109 +429,35 @@ Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) * Store a 128-bit xmm into an address stored in a register, no offset */ Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x7f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false); - instr.set(Imm(4, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { - ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x66); - // InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x6f); - instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false); - instr.set(Imm(1, offset)); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) { - if (offset == 0) { - return load128_xmm128_gpr64(xmm_dest, base); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return load128_xmm128_gpr64_s8(xmm_dest, base, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return load128_xmm128_gpr64_s32(xmm_dest, base, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) { - if (offset == 0) { - return store128_gpr64_xmm128(base, xmm_val); - } else if (offset >= INT8_MIN && offset <= INT8_MAX) { - return store128_gpr64_xmm128_s8(base, xmm_val, offset); - } else if (offset >= INT32_MIN && offset <= INT32_MAX) { - return store128_gpr64_xmm128_s32(base, xmm_val, offset); - } else { - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1250,207 +465,81 @@ Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offs //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Instruction load64_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction load32s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x63); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction load32u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8b); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); - return instr; + return Instruction(0b0); } Instruction load16u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb7); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction load16s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbf); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction load8u_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xb6); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction load8s_rip_s32(Register dest, s64 offset) { - ASSERT(dest.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0xf); - instr.set_op2(0xbe); - instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { - switch (size) { - case 1: - if (sign_extend) { - return load8s_rip_s32(dest, offset); - } else { - return load8u_rip_s32(dest, offset); - } - break; - case 2: - if (sign_extend) { - return load16s_rip_s32(dest, offset); - } else { - return load16u_rip_s32(dest, offset); - } - break; - case 4: - if (sign_extend) { - return load32s_rip_s32(dest, offset); - } else { - return load32u_rip_s32(dest, offset); - } - break; - case 8: - return load64_rip_s32(dest, offset); - default: - ASSERT(false); - } + return Instruction(0b0); } Instruction store64_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction store32_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - return instr; + return Instruction(0b0); } Instruction store16_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x66); - instr.set_op2(0x89); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction store8_rip_s32(Register src, s64 offset) { - ASSERT(src.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x88); - instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); - if (src.id() > RBX) { - instr.add_rex(); - } - return instr; + return Instruction(0b0); } Instruction static_store(Register value, s64 offset, int size) { - switch (size) { - case 1: - return store8_rip_s32(value, offset); - case 2: - return store16_rip_s32(value, offset); - case 4: - return store32_rip_s32(value, offset); - case 8: - return store64_rip_s32(value, offset); - default: - ASSERT(false); - } + return Instruction(0b0); } Instruction static_addr(Register dst, s64 offset) { - ASSERT(dst.is_gpr()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x8d); - instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); - return instr; + return Instruction(0b0); } Instruction static_load_xmm32(Register xmm_dest, s64 offset) { - ASSERT(xmm_dest.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x10); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction static_store_xmm32(Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x11); - instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); - - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } // TODO, special load/stores of 128 bit values. // TODO, consider specialized stack loads and stores? Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src_reg) { - ASSERT(dst_reg.is_gpr()); - ASSERT(src_reg.is_gpr()); - InstructionX86 instr(0x8b); - instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; + return Instruction(0b0); } /*! * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. */ Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { - ASSERT(addr.is_gpr()); - ASSERT(value.is_gpr()); - InstructionX86 instr(0x89); - instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true); - instr.set_disp(Imm(4, offset)); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1467,142 +556,63 @@ Instruction ret() { * Instruction to push gpr (64-bits) onto the stack */ Instruction push_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = InstructionX86(0x50 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return InstructionX86(0x50 + reg.hw_id()); + return Instruction(0b0); } /*! * Instruction to pop 64 bit gpr from the stack */ Instruction pop_gpr64(Register reg) { - ASSERT(reg.is_gpr()); - if (reg.hw_id() >= 8) { - auto i = InstructionX86(0x58 + reg.hw_id() - 8); - i.set(REX(false, false, false, true)); - return i; - } - return InstructionX86(0x58 + reg.hw_id()); + return Instruction(0b0); } /*! * Call a function stored in a 64-bit gpr */ Instruction call_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - InstructionX86 instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 2; - mrm.mod = 3; - instr.set(mrm); - return instr; + return Instruction(0b0); } /*! * Jump to an x86-64 address stored in a 64-bit gpr. */ Instruction jmp_r64(Register reg_) { - ASSERT(reg_.is_gpr()); - auto reg = reg_.hw_id(); - InstructionX86 instr(0xff); - if (reg >= 8) { - instr.set(REX(false, false, false, true)); - reg -= 8; - } - ASSERT(reg < 8); - ModRM mrm; - mrm.rm = reg; - mrm.reg_op = 4; - mrm.mod = 3; - instr.set(mrm); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // INTEGER MATH //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT8_MIN && imm <= INT8_MAX); - // SUB r/m64, imm8 : REX.W + 83 /5 ib - InstructionX86 instr(0x83); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { - ASSERT(reg.is_gpr()); - ASSERT(imm >= INT32_MIN && imm <= INT32_MAX); - InstructionX86 instr(0x81); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(4, imm)); - return instr; + return Instruction(0b0); } Instruction add_gpr64_imm8s(Register reg, int64_t v) { - ASSERT(v >= INT8_MIN && v <= INT8_MAX); - InstructionX86 instr(0x83); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(1, v)); - return instr; + return Instruction(0b0); } Instruction add_gpr64_imm32s(Register reg, int64_t v) { - ASSERT(v >= INT32_MIN && v <= INT32_MAX); - InstructionX86 instr(0x81); - instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); - instr.set(Imm(4, v)); - return instr; + return Instruction(0b0); } Instruction add_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return add_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return add_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `add` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } + return Instruction(0b0); } Instruction sub_gpr64_imm(Register reg, int64_t imm) { - if (imm >= INT8_MIN && imm <= INT8_MAX) { - return sub_gpr64_imm8s(reg, imm); - } else if (imm >= INT32_MIN && imm <= INT32_MAX) { - return sub_gpr64_imm32s(reg, imm); - } else { - throw std::runtime_error("Invalid `sub` with reg[" + reg.print() + "]/imm[" + - std::to_string(imm) + "]"); - } + return Instruction(0b0); } Instruction add_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0x01); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; + return Instruction(0b0); } Instruction sub_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0x29); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! @@ -1610,12 +620,7 @@ Instruction sub_gpr64_gpr64(Register dst, Register src) { * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) */ Instruction imul_gpr32_gpr32(Register dst, Register src) { - InstructionX86 instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - return instr; + return Instruction(0b0); } /*! @@ -1623,37 +628,25 @@ Instruction imul_gpr32_gpr32(Register dst, Register src) { * DANGER - this treats all operands as 64-bit. This is not like the EE. */ Instruction imul_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0xf); - instr.set_op2(0xaf); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Divide (idiv, 32 bit) */ Instruction idiv_gpr32(Register reg) { - InstructionX86 instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); - return instr; + return Instruction(0b0); } Instruction unsigned_div_gpr32(Register reg) { - InstructionX86 instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(6, reg.hw_id(), 3, false); - return instr; + return Instruction(0b0); } /*! * Convert doubleword to quadword for division. */ Instruction cdq() { - InstructionX86 instr(0x99); - return instr; + return Instruction(0b0); } /*! @@ -1661,11 +654,7 @@ Instruction cdq() { * Needed for multiplication/divsion madness. */ Instruction movsx_r64_r32(Register dst, Register src) { - InstructionX86 instr(0x63); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! @@ -1673,11 +662,7 @@ Instruction movsx_r64_r32(Register dst, Register src) { * todo UNTESTED */ Instruction cmp_gpr64_gpr64(Register a, Register b) { - InstructionX86 instr(0x3b); - ASSERT(a.is_gpr()); - ASSERT(b.is_gpr()); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1688,43 +673,28 @@ Instruction cmp_gpr64_gpr64(Register a, Register b) { * Or of two gprs */ Instruction or_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0x0b); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * And of two gprs */ Instruction and_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0x23); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Xor of two gprs */ Instruction xor_gpr64_gpr64(Register dst, Register src) { - InstructionX86 instr(0x33); - ASSERT(dst.is_gpr()); - ASSERT(src.is_gpr()); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Bitwise not a gpr */ Instruction not_gpr64(Register reg) { - InstructionX86 instr(0xf7); - ASSERT(reg.is_gpr()); - instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1735,63 +705,42 @@ Instruction not_gpr64(Register reg) { * Shift 64-bit gpr left by CL register */ Instruction shl_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xd3); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Shift 64-bit gpr right (logical) by CL register */ Instruction shr_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xd3); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Shift 64-bit gpr right (arithmetic) by CL register */ Instruction sar_gpr64_cl(Register reg) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xd3); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - return instr; + return Instruction(0b0); } /*! * Shift 64-ptr left (logical) by the constant shift amount "sa". */ Instruction shl_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xc1); - instr.set_modrm_and_rex(4, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; + return Instruction(0b0); } /*! * Shift 64-ptr right (logical) by the constant shift amount "sa". */ Instruction shr_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xc1); - instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; + return Instruction(0b0); } /*! * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". */ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { - ASSERT(reg.is_gpr()); - InstructionX86 instr(0xc1); - instr.set_modrm_and_rex(7, reg.hw_id(), 3, true); - instr.set(Imm(1, sa)); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1802,109 +751,77 @@ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. */ Instruction jmp_32() { - InstructionX86 instr(0xe9); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump if equal. */ Instruction je_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x84); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump not equal. */ Instruction jne_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x85); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump less than or equal. */ Instruction jle_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x8e); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump greater than or equal. */ Instruction jge_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x8d); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump less than */ Instruction jl_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x8c); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump greater than */ Instruction jg_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x8f); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump below or equal */ Instruction jbe_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x86); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump above or equal */ Instruction jae_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x83); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump below */ Instruction jb_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x82); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } /*! * Jump above */ Instruction ja_32() { - InstructionX86 instr(0x0f); - instr.set_op2(0x87); - instr.set(Imm(4, 0)); - return instr; + return Instruction(0b0); } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1915,141 +832,71 @@ Instruction ja_32() { * Compare two floats and set flag register for jump (ucomiss) */ Instruction cmp_flt_flt(Register a, Register b) { - ASSERT(a.is_xmm()); - ASSERT(b.is_xmm()); - InstructionX86 instr(0x0f); - instr.set_op2(0x2e); - instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); - return instr; + return Instruction(0b0); } Instruction sqrts_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x51); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Multiply two floats in xmm's */ Instruction mulss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x59); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Divide two floats in xmm's */ Instruction divss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5e); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Subtract two floats in xmm's */ Instruction subss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Add two floats in xmm's */ Instruction addss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x58); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Floating point minimum. */ Instruction minss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5d); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Floating point maximum. */ Instruction maxss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x5f); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Convert GPR int32 to XMM float (single precision) */ Instruction int32_to_float(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_gpr()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2a); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } /*! * Convert XMM float to GPR int32(single precision) (truncate) */ Instruction float_to_int32(Register dst, Register src) { - ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xf3); - instr.set_op2(0x0f); - instr.set_op3(0x2c); - instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - instr.swap_op0_rex(); - return instr; + return Instruction(0b0); } Instruction nop() { - // NOP - InstructionX86 instr(0x90); - return instr; + return Instruction(0b0); } // TODO - rsqrt / abs / sqrt @@ -2059,14 +906,12 @@ Instruction nop() { //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; /*! - * A "null" instruction. This instruction does not generate any bytes + * A "null" instruction. This Instruction does not generate any bytes * but can be referred to by a label. Useful to insert in place of a real instruction - * if the real instruction has been optimized out. + * if the real Instruction has been optimized out. */ Instruction null() { - InstructionX86 i(0); - i.m_flags |= InstructionX86::kIsNull; - return i; + return Instruction(0b0); } ///////////////////////////// @@ -2074,167 +919,65 @@ Instruction null() { ///////////////////////////// Instruction nop_vf() { - InstructionX86 instr(0xd9); // FNOP - instr.set_op2(0xd0); - return instr; + return Instruction(0b0); } Instruction wait_vf() { - InstructionX86 instr(0x9B); // FWAIT / WAIT - return instr; + return Instruction(0b0); } Instruction mov_vf_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - - if (src.hw_id() >= 8 && dst.hw_id() < 8) { - // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the - // 2 byte VEX prefix, where the 0x28 encoding would require an extra byte. - // compilers/assemblers seem to prefer 0x28, unless 0x29 would save you a byte. - InstructionX86 instr(0x29); - instr.set_vex_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } else { - InstructionX86 instr(0x28); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, VEX3::LeadingBytes::P_0F, false); - return instr; - } + return Instruction(0b0); } Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x28); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - InstructionX86 instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, Register addr1, Register addr2, s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); - InstructionX86 instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, Register addr1, Register addr2, s64 offset) { - ASSERT(value.is_xmm()); - ASSERT(addr1.is_gpr()); - ASSERT(addr2.is_gpr()); - ASSERT(addr1 != addr2); - ASSERT(addr1 != RSP); - ASSERT(addr2 != RSP); - ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); - InstructionX86 instr(0x29); - instr.set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), - offset, VEX3::LeadingBytes::P_0F, false); - return instr; + return Instruction(0b0); } Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { - ASSERT(dest.is_xmm()); - ASSERT(offset >= INT32_MIN); - ASSERT(offset <= INT32_MAX); - InstructionX86 instr(0x28); - instr.set_vex_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset); - return instr; + return Instruction(0b0); } // TODO - rip relative loads and stores. Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { - ASSERT(!(mask & 0b11110000)); - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x0c); // VBLENDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, src1.hw_id(), - false, VexPrefix::P_66); - instr.set(Imm(1, mask)); - return instr; + return Instruction(0b0); } Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - ASSERT(dx < 4); - ASSERT(dy < 4); - ASSERT(dz < 4); - ASSERT(dw < 4); - u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6); - return swizzle_vf(dst, src, imm); - - // SSE encoding version: - // InstructionX86 instr(0x0f); - // instr.set_op2(0xc6); - // instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); - // instr.set(Imm(1, imm)); - // return instr; + return Instruction(0b0); } /* @@ -2256,16 +999,7 @@ Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { > (4.5, 1.5, 2.5, 3.5) */ Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0xC6); // VSHUFPS - - // we use the AVX "VEX" encoding here. This is a three-operand form, - // but we just set both source - // to the same register. It seems like this is one byte longer but is faster maybe? - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id()); - instr.set(Imm(1, controlBytes)); - return instr; + return Instruction(0b0); } /* @@ -2277,214 +1011,82 @@ Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { xmm1 = (4, 4, 4, 4) */ Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { - switch (element) { - case Register::VF_ELEMENT::X: // Least significant element - return swizzle_vf(dst, src, 0b00000000); - break; - case Register::VF_ELEMENT::Y: - return swizzle_vf(dst, src, 0b01010101); - break; - case Register::VF_ELEMENT::Z: - return swizzle_vf(dst, src, 0b10101010); - break; - case Register::VF_ELEMENT::W: // Most significant element - return swizzle_vf(dst, src, 0b11111111); - break; - default: - ASSERT(false); - return InstructionX86(0); - } + return Instruction(0b0); } Instruction xor_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x57); // VXORPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction sub_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x5c); // VSUBPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction add_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x58); // VADDPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction mul_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x59); // VMULPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction max_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x5F); // VMAXPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction min_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x5D); // VMINPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction div_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); - InstructionX86 instr(0x5E); // VDIVPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); - return instr; + return Instruction(0b0); } Instruction sqrt_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0x51); // VSQRTPS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); - return instr; + return Instruction(0b0); } Instruction itof_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - InstructionX86 instr(0x5b); // VCVTDQ2PS - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); - return instr; + return Instruction(0b0); } Instruction ftoi_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 - InstructionX86 instr(0x5b); // VCVTTPS2DQ - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - return instr; + return Instruction(0b0); } Instruction pw_sra(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 - InstructionX86 instr(0x72); - instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction pw_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 - InstructionX86 instr(0x72); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction ph_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /2 ib VPSRLW - InstructionX86 instr(0x71); - instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction pw_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 - InstructionX86 instr(0x72); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction ph_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 - InstructionX86 instr(0x71); - instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction parallel_add_byte(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0xFC); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0xEB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0xEF); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0xDB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Reminder - a word in MIPS = 32bits = a DWORD in x86 @@ -2497,251 +1099,94 @@ Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { // -- Unpack High Data Instructions Instruction pextub_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x68); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction pextuh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x69); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction pextuw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x6a); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // -- Unpack Low Data Instructions Instruction pextlb_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x60); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction pextlh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x61); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction pextlw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x62); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Equal to than comparison as 16 bytes (8 bits) Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x74); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Equal to than comparison as 8 halfwords (16 bits) Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x75); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Equal to than comparison as 4 words (32 bits) Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x76); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Greater than comparison as 16 bytes (8 bits) Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x64); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Greater than comparison as 8 halfwords (16 bits) Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x65); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } // Greater than comparison as 4 words (32 bits) Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x66); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x6c); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { - return vpunpcklqdq(dst, src0, src1); + return Instruction(0b0); } Instruction pcpyud(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - InstructionX86 instr(0x6d); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction vpsubd(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 - // reg, vec, r/m - InstructionX86 instr(0xfa); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } Instruction vpsrldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 - InstructionX86 instr(0x73); - instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction vpslldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 - InstructionX86 instr(0x73); - instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, - VexPrefix::P_66); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction vpshuflw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 - InstructionX86 instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F2); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction vpshufhw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); - // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 - InstructionX86 instr(0x70); - instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, - VexPrefix::P_F3); - instr.set(Imm(1, imm)); - return instr; + return Instruction(0b0); } Instruction vpackuswb(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); - // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - - InstructionX86 instr(0x67); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; + return Instruction(0b0); } } // namespace IGen } // namespace emitter diff --git a/goalc/emitter/Instruction.h b/goalc/emitter/Instruction.h index 4168b54d2f9..3a32a353395 100644 --- a/goalc/emitter/Instruction.h +++ b/goalc/emitter/Instruction.h @@ -1,16 +1,1054 @@ #pragma once +#include #include "common/common_types.h" +#include "common/util/Assert.h" namespace emitter { /*! * A high-level description of a opcode. It can emit itself. */ -struct Instruction { +template +struct InstructionImpl { + /*! + * Emit into a buffer and return how many bytes written (can be zero) + */ + u8 emit(u8* buffer) const { + return static_cast(this)->emit(buffer); + } + + u8 length() const { + return static_cast(this)->length(); + } +}; + +struct InstructionARM64 : InstructionImpl { + // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a single 32-bit word in that stream. + // The encoding of an ARM instruction is: + // TODO + // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf + u32 instruction_encoding; + + InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} + + uint8_t emit(uint8_t* buffer) const { + memcpy(buffer, &instruction_encoding, 4); + return 4; + } + + uint8_t length() const { + return 4; + } +}; + +/*! + * The ModRM byte + */ +struct ModRM { + uint8_t mod; + uint8_t reg_op; + uint8_t rm; + + uint8_t operator()() const { return (mod << 6) | (reg_op << 3) | (rm << 0); } +}; + +/*! + * The SIB Byte + */ +struct SIB { + uint8_t scale, index, base; + + uint8_t operator()() const { return (scale << 6) | (index << 3) | (base << 0); } +}; + +/*! + * An Immediate (either imm or disp) + */ +struct Imm { + Imm() = default; + Imm(uint8_t sz, uint64_t v) : size(sz), value(v) {} + uint8_t size; + union { + uint64_t value; + uint8_t v_arr[8]; + }; +}; + +/*! + * The REX prefix byte + */ +struct REX { + explicit REX(bool w = false, bool r = false, bool x = false, bool b = false) + : W(w), R(r), X(x), B(b) {} + // W - 64-bit operands + // R - reg extension + // X - SIB i extnsion + // B - other extension + bool W, R, X, B; + uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); } +}; + +enum class VexPrefix : u8 { P_NONE = 0, P_66 = 1, P_F3 = 2, P_F2 = 3 }; + +/*! + * The "VEX" 3-byte format for AVX instructions + */ +struct VEX3 { + bool W, R, X, B; + enum class LeadingBytes : u8 { P_INVALID = 0, P_0F = 1, P_0F_38 = 2, P_0F_3A = 3 } leading_bytes; + u8 reg_id; + VexPrefix prefix; + bool L; + + u8 emit(u8 byte) const { + if (byte == 0) { + return 0b11000100; + } else if (byte == 1) { + u8 result = 0; + result |= ((!R) << 7); + result |= ((!X) << 6); + result |= ((!B) << 5); + result |= (0b11111 & u8(leading_bytes)); + return result; + } else if (byte == 2) { + u8 result = 0; + result |= (W << 7); // this may be inverted? + result |= ((~reg_id) & 0b1111) << 3; + result |= (L << 2); + result |= (u8(prefix) & 0b11); + return result; + } else { + ASSERT(false); + return -1; + } + } + + VEX3(bool w, + bool r, + bool x, + bool b, + LeadingBytes _leading_bytes, + u8 _reg_id = 0, + VexPrefix _prefix = VexPrefix::P_NONE, + bool l = false) + : W(w), + R(r), + X(x), + B(b), + leading_bytes(_leading_bytes), + reg_id(_reg_id), + prefix(_prefix), + L(l) {} +}; + +struct VEX2 { + bool R; + u8 reg_id; + VexPrefix prefix; + bool L; + + u8 emit(u8 byte) const { + if (byte == 0) { + return 0b11000101; + } else if (byte == 1) { + u8 result = 0; + result |= ((!R) << 7); + result |= ((~reg_id) & 0b1111) << 3; + result |= (L << 2); + result |= (u8(prefix) & 0b11); + return result; + } else { + ASSERT(false); + return -1; + } + } + + VEX2(bool r, u8 _reg_id = 0, VexPrefix _prefix = VexPrefix::P_NONE, bool l = false) + : R(r), reg_id(_reg_id), prefix(_prefix), L(l) {} +}; + +struct InstructionX86 : InstructionImpl { + enum Flags { + kOp2Set = (1 << 0), + kOp3Set = (1 << 1), + kIsNull = (1 << 2), + kSetRex = (1 << 3), + kSetModrm = (1 << 4), + kSetSib = (1 << 5), + kSetDispImm = (1 << 6), + kSetImm = (1 << 7), + }; + + InstructionX86(u8 opcode) : op(opcode) {} + + u8 op; + + u8 m_flags = 0; + + u8 op2; + + u8 op3; + + u8 n_vex = 0; + u8 vex[3] = {0, 0, 0}; + + // the rex byte + u8 m_rex = 0; + + // the modrm byte + u8 m_modrm = 0; + + // the sib byte + u8 m_sib = 0; + + // the displacement + Imm disp; + + // the immediate + Imm imm; + + /*! + * Move opcode byte 0 to before the rex prefix. + */ + void swap_op0_rex() { + if (!(m_flags & kSetRex)) + return; + auto temp = op; + op = m_rex; + m_rex = temp; + } + + void set(REX r) { + m_rex = r(); + m_flags |= kSetRex; + } + + void set(ModRM modrm) { + m_modrm = modrm(); + m_flags |= kSetModrm; + } + + void set(SIB sib) { + m_sib = sib(); + m_flags |= kSetSib; + } + + void set(VEX3 vex3) { + n_vex = 3; + for (int i = 0; i < n_vex; i++) { + vex[i] = vex3.emit(i); + } + } + + void set(VEX2 vex2) { + n_vex = 2; + for (int i = 0; i < n_vex; i++) { + vex[i] = vex2.emit(i); + } + } + + void set_disp(Imm i) { + disp = i; + m_flags |= kSetDispImm; + } + + void set(Imm i) { + imm = i; + m_flags |= kSetImm; + } + + void set_op2(uint8_t b) { + m_flags |= kOp2Set; + op2 = b; + } + + void set_op3(uint8_t b) { + m_flags |= kOp3Set; + op3 = b; + } + + int get_imm_size() const { + if (m_flags & kSetImm) { + return imm.size; + } else { + return 0; + } + } + + int get_disp_size() const { + if (m_flags & kSetDispImm) { + return disp.size; + } else { + return 0; + } + } + + /*! + * Set modrm and rex as needed for two regs. + */ + void set_modrm_and_rex(uint8_t reg, uint8_t rm, uint8_t mod, bool rex_w = false) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = mod; + modrm.reg_op = reg; + modrm.rm = rm; + + set(modrm); + + if (rex_b || rex_w || rex_r) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + void set_vex_modrm_and_rex(uint8_t reg, + uint8_t rm, + VEX3::LeadingBytes lb, + uint8_t vex_reg = 0, + bool rex_w = false, + VexPrefix prefix = VexPrefix::P_NONE) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 3; + modrm.reg_op = reg; + modrm.rm = rm; + + set(modrm); + if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, rex_b, lb, vex_reg, prefix)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + set(VEX2(rex_r, vex_reg, prefix)); + } + } + /*! - * Emit into a buffer and return how many bytes written (can be zero) + * Set VEX prefix for REX as needed for two registers. */ - virtual u8 emit(u8* buffer) const = 0; - virtual u8 length() const = 0; + void set_vex_modrm_and_rex(uint8_t reg, + uint8_t rm, + uint8_t mod, + VEX3::LeadingBytes lb, + bool rex_w = false) { + bool rex_b = false; + bool rex_r = false; + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = mod; + modrm.reg_op = reg; + modrm.rm = rm; + set(modrm); + if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, rex_b, lb)); + } else { + // can get away with two byte version + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + set(VEX2(rex_r)); + } + } + + void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s8 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 1; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(1, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s8 offset, + VEX3::LeadingBytes lb, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 1; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(1, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s32 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 2; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(4, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s32 offset, + VEX3::LeadingBytes lb, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 2; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(4, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.index != 4); + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + bool rex_w = false, + bool rex_always = false) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + if (addr1 == 5 && addr2 == 5) { + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + modrm.mod = 1; + set_disp(Imm(1, 0)); + + } else { + // default addr1 in index + bool flipped = (addr1 == 4) || (addr2 == 5); + + if (flipped) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.base != 5); + ASSERT(sib.index != 4); + } + + if (rex_b || rex_w || rex_r || rex_x || rex_always) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + } + + void set_vex_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + VEX3::LeadingBytes lb, + bool rex_w = false) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + if (addr1 == 5 && addr2 == 5) { + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + modrm.mod = 1; + set_disp(Imm(1, 0)); + + } else { + // default addr1 in index + bool flipped = (addr1 == 4) || (addr2 == 5); + + if (flipped) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + ASSERT(sib.base != 5); + ASSERT(sib.index != 4); + } + + if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_b); + ASSERT(!rex_w); + ASSERT(!rex_x); + set(VEX2(rex_r)); + } + + set(modrm); + set(sib); + } + + /*! + * Set modrm and rex as needed for two regs for an addressing mode. + * Will set SIB if R12 or RSP indexing is used. + */ + void set_modrm_and_rex_for_reg_addr(uint8_t reg, uint8_t rm, bool rex_w = false) { + bool rex_b = false, rex_r = false; + + if (rm >= 8) { + rm -= 8; + rex_b = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = rm; + + if (rm == 4) { + SIB sib; + sib.scale = 0; + sib.base = 4; + sib.index = 4; + + set(sib); + } + + if (rm == 5) { + modrm.mod = 1; // 1 byte imm + set_disp(Imm(1, 0)); + } + + set(modrm); + if (rex_b || rex_w || rex_r) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + void set_modrm_and_rex_for_rip_plus_s32(uint8_t reg, s32 offset, bool rex_w = false) { + bool rex_r = false; + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = 5; // use the RIP addressing mode + set(modrm); + + if (rex_r || rex_w) { + set(REX(rex_w, rex_r, false, false)); + } + + set_disp(Imm(4, offset)); + } + + void add_rex() { + if (!(m_flags & kSetRex)) { + set(REX()); + } + } + + void set_vex_modrm_and_rex_for_rip_plus_s32(uint8_t reg, + s32 offset, + VEX3::LeadingBytes lb = VEX3::LeadingBytes::P_0F, + bool rex_w = false) { + bool rex_r = false; + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = 5; // use the RIP addressing mode + set(modrm); + + if (rex_w || lb != VEX3::LeadingBytes::P_0F) { + // need three byte version + set(VEX3(rex_w, rex_r, false, false, lb)); + } else { + ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f + ASSERT(!rex_w); + set(VEX2(rex_r)); + } + + set_disp(Imm(4, offset)); + } + + /*! + * Set up modrm and rex for the commonly used immediate displacement indexing mode. + */ + void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) { + ModRM modrm; + + bool rex_r = false; + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + modrm.reg_op = reg; + + modrm.mod = mod; + + modrm.rm = 4; // use sib + + SIB sib; + sib.scale = 0; + sib.index = 4; + bool rex_b = false; + if (rm >= 8) { + rex_b = true; + rm -= 8; + } + + sib.base = rm; + + set(modrm); + set(sib); + + if (rex_r || rex_w || rex_b) { + set(REX(rex_w, rex_r, false, rex_b)); + } + } + + /*! + * Get the position of the disp immediate relative to the start of the instruction + */ + int offset_of_disp() const { + if (m_flags & kIsNull) + return 0; + ASSERT(m_flags & kSetDispImm); + int offset = 0; + offset += n_vex; + if (m_flags & kSetRex) + offset++; + offset++; // opcode + if (m_flags & kOp2Set) + offset++; + if (m_flags & kOp3Set) + offset++; + if (m_flags & kSetModrm) + offset++; + if (m_flags & kSetSib) + offset++; + return offset; + } + + /*! + * Get the position of the imm immediate relative to the start of the instruction + */ + int offset_of_imm() const { + if (m_flags & kIsNull) + return 0; + ASSERT(m_flags & kSetImm); + int offset = 0; + offset += n_vex; + if (m_flags & kSetRex) + offset++; + offset++; // opcode + if (m_flags & kOp2Set) + offset++; + if (m_flags & kOp3Set) + offset++; + if (m_flags & kSetModrm) + offset++; + if (m_flags & kSetSib) + offset++; + if (m_flags & kSetDispImm) + offset += disp.size; + return offset; + } + + uint8_t emit(uint8_t* buffer) const { + if (m_flags & kIsNull) + return 0; + uint8_t count = 0; + + for (int i = 0; i < n_vex; i++) { + buffer[count++] = vex[i]; + } + + if (m_flags & kSetRex) { + buffer[count++] = m_rex; + } + + buffer[count++] = op; + + if (m_flags & kOp2Set) { + buffer[count++] = op2; + } + + if (m_flags & kOp3Set) { + buffer[count++] = op3; + } + + if (m_flags & kSetModrm) { + buffer[count++] = m_modrm; + } + + if (m_flags & kSetSib) { + buffer[count++] = m_sib; + } + + if (m_flags & kSetDispImm) { + for (int i = 0; i < disp.size; i++) { + buffer[count++] = disp.v_arr[i]; + } + } + + if (m_flags & kSetImm) { + for (int i = 0; i < imm.size; i++) { + buffer[count++] = imm.v_arr[i]; + } + } + return count; + } + + uint8_t length() const { + if (m_flags & kIsNull) + return 0; + uint8_t count = 0; + + count += n_vex; + + if (m_flags & kSetRex) { + count++; + } + + count++; + + if (m_flags & kOp2Set) { + count++; + } + + if (m_flags & kOp3Set) { + count++; + } + + if (m_flags & kSetModrm) { + count++; + } + + if (m_flags & kSetSib) { + count++; + } + + if (m_flags & kSetDispImm) { + for (int i = 0; i < disp.size; i++) { + count++; + } + } + + if (m_flags & kSetImm) { + for (int i = 0; i < imm.size; i++) { + count++; + } + } + return count; + } }; + +#ifdef __aarch64__ +using Instruction = InstructionARM64; +#else +using Instruction = InstructionX86; +#endif + } // namespace emitter diff --git a/goalc/emitter/InstructionARM64.h b/goalc/emitter/InstructionARM64.h deleted file mode 100644 index a39d5bb67a3..00000000000 --- a/goalc/emitter/InstructionARM64.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifdef __aarch64__ - -#pragma once - -#include -#include "Instruction.h" - -namespace emitter { -struct InstructionARM64 : Instruction { - // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a single 32-bit word in that stream. - // The encoding of an ARM instruction is: - // TODO - // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf - u32 instruction_encoding; - - InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} - - uint8_t emit(uint8_t* buffer) const override { - memcpy(buffer, &instruction_encoding, 4); - return 4; - } - - uint8_t length() const override { - return 4; - } - -}; -} // namespace emitter -#endif \ No newline at end of file diff --git a/goalc/emitter/InstructionX86.h b/goalc/emitter/InstructionX86.h deleted file mode 100644 index 3eda421f318..00000000000 --- a/goalc/emitter/InstructionX86.h +++ /dev/null @@ -1,1011 +0,0 @@ -#pragma once - -#include "Instruction.h" - -#include "common/util/Assert.h" - -namespace emitter { -/*! - * The ModRM byte - */ -struct ModRM { - uint8_t mod; - uint8_t reg_op; - uint8_t rm; - - uint8_t operator()() const { return (mod << 6) | (reg_op << 3) | (rm << 0); } -}; - -/*! - * The SIB Byte - */ -struct SIB { - uint8_t scale, index, base; - - uint8_t operator()() const { return (scale << 6) | (index << 3) | (base << 0); } -}; - -/*! - * An Immediate (either imm or disp) - */ -struct Imm { - Imm() = default; - Imm(uint8_t sz, uint64_t v) : size(sz), value(v) {} - uint8_t size; - union { - uint64_t value; - uint8_t v_arr[8]; - }; -}; - -/*! - * The REX prefix byte - */ -struct REX { - explicit REX(bool w = false, bool r = false, bool x = false, bool b = false) - : W(w), R(r), X(x), B(b) {} - // W - 64-bit operands - // R - reg extension - // X - SIB i extnsion - // B - other extension - bool W, R, X, B; - uint8_t operator()() const { return (1 << 6) | (W << 3) | (R << 2) | (X << 1) | (B << 0); } -}; - -enum class VexPrefix : u8 { P_NONE = 0, P_66 = 1, P_F3 = 2, P_F2 = 3 }; - -/*! - * The "VEX" 3-byte format for AVX instructions - */ -struct VEX3 { - bool W, R, X, B; - enum class LeadingBytes : u8 { P_INVALID = 0, P_0F = 1, P_0F_38 = 2, P_0F_3A = 3 } leading_bytes; - u8 reg_id; - VexPrefix prefix; - bool L; - - u8 emit(u8 byte) const { - if (byte == 0) { - return 0b11000100; - } else if (byte == 1) { - u8 result = 0; - result |= ((!R) << 7); - result |= ((!X) << 6); - result |= ((!B) << 5); - result |= (0b11111 & u8(leading_bytes)); - return result; - } else if (byte == 2) { - u8 result = 0; - result |= (W << 7); // this may be inverted? - result |= ((~reg_id) & 0b1111) << 3; - result |= (L << 2); - result |= (u8(prefix) & 0b11); - return result; - } else { - ASSERT(false); - return -1; - } - } - - VEX3(bool w, - bool r, - bool x, - bool b, - LeadingBytes _leading_bytes, - u8 _reg_id = 0, - VexPrefix _prefix = VexPrefix::P_NONE, - bool l = false) - : W(w), - R(r), - X(x), - B(b), - leading_bytes(_leading_bytes), - reg_id(_reg_id), - prefix(_prefix), - L(l) {} -}; - -struct VEX2 { - bool R; - u8 reg_id; - VexPrefix prefix; - bool L; - - u8 emit(u8 byte) const { - if (byte == 0) { - return 0b11000101; - } else if (byte == 1) { - u8 result = 0; - result |= ((!R) << 7); - result |= ((~reg_id) & 0b1111) << 3; - result |= (L << 2); - result |= (u8(prefix) & 0b11); - return result; - } else { - ASSERT(false); - return -1; - } - } - - VEX2(bool r, u8 _reg_id = 0, VexPrefix _prefix = VexPrefix::P_NONE, bool l = false) - : R(r), reg_id(_reg_id), prefix(_prefix), L(l) {} -}; - -struct InstructionX86 : Instruction { - enum Flags { - kOp2Set = (1 << 0), - kOp3Set = (1 << 1), - kIsNull = (1 << 2), - kSetRex = (1 << 3), - kSetModrm = (1 << 4), - kSetSib = (1 << 5), - kSetDispImm = (1 << 6), - kSetImm = (1 << 7), - }; - - InstructionX86(u8 opcode) : op(opcode) {} - - u8 op; - - u8 m_flags = 0; - - u8 op2; - - u8 op3; - - u8 n_vex = 0; - u8 vex[3] = {0, 0, 0}; - - // the rex byte - u8 m_rex = 0; - - // the modrm byte - u8 m_modrm = 0; - - // the sib byte - u8 m_sib = 0; - - // the displacement - Imm disp; - - // the immediate - Imm imm; - - /*! - * Move opcode byte 0 to before the rex prefix. - */ - void swap_op0_rex() { - if (!(m_flags & kSetRex)) - return; - auto temp = op; - op = m_rex; - m_rex = temp; - } - - void set(REX r) { - m_rex = r(); - m_flags |= kSetRex; - } - - void set(ModRM modrm) { - m_modrm = modrm(); - m_flags |= kSetModrm; - } - - void set(SIB sib) { - m_sib = sib(); - m_flags |= kSetSib; - } - - void set(VEX3 vex3) { - n_vex = 3; - for (int i = 0; i < n_vex; i++) { - vex[i] = vex3.emit(i); - } - } - - void set(VEX2 vex2) { - n_vex = 2; - for (int i = 0; i < n_vex; i++) { - vex[i] = vex2.emit(i); - } - } - - void set_disp(Imm i) { - disp = i; - m_flags |= kSetDispImm; - } - - void set(Imm i) { - imm = i; - m_flags |= kSetImm; - } - - void set_op2(uint8_t b) { - m_flags |= kOp2Set; - op2 = b; - } - - void set_op3(uint8_t b) { - m_flags |= kOp3Set; - op3 = b; - } - - int get_imm_size() const { - if (m_flags & kSetImm) { - return imm.size; - } else { - return 0; - } - } - - int get_disp_size() const { - if (m_flags & kSetDispImm) { - return disp.size; - } else { - return 0; - } - } - - /*! - * Set modrm and rex as needed for two regs. - */ - void set_modrm_and_rex(uint8_t reg, uint8_t rm, uint8_t mod, bool rex_w = false) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = mod; - modrm.reg_op = reg; - modrm.rm = rm; - - set(modrm); - - if (rex_b || rex_w || rex_r) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - void set_vex_modrm_and_rex(uint8_t reg, - uint8_t rm, - VEX3::LeadingBytes lb, - uint8_t vex_reg = 0, - bool rex_w = false, - VexPrefix prefix = VexPrefix::P_NONE) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 3; - modrm.reg_op = reg; - modrm.rm = rm; - - set(modrm); - if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, rex_b, lb, vex_reg, prefix)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - set(VEX2(rex_r, vex_reg, prefix)); - } - } - - /*! - * Set VEX prefix for REX as needed for two registers. - */ - void set_vex_modrm_and_rex(uint8_t reg, - uint8_t rm, - uint8_t mod, - VEX3::LeadingBytes lb, - bool rex_w = false) { - bool rex_b = false; - bool rex_r = false; - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = mod; - modrm.reg_op = reg; - modrm.rm = rm; - set(modrm); - if (rex_b || rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, rex_b, lb)); - } else { - // can get away with two byte version - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - set(VEX2(rex_r)); - } - } - - void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s8 offset, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 1; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(1, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_r || rex_x) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s8 offset, - VEX3::LeadingBytes lb, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 1; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(1, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s32 offset, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 2; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(4, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_r || rex_x) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - s32 offset, - VEX3::LeadingBytes lb, - bool rex_w) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 2; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - Imm imm2(4, offset); - - // default addr1 in index - if (addr1 == 4) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.index != 4); - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - set_disp(imm2); - } - - void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - bool rex_w = false, - bool rex_always = false) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - if (addr1 == 5 && addr2 == 5) { - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - modrm.mod = 1; - set_disp(Imm(1, 0)); - - } else { - // default addr1 in index - bool flipped = (addr1 == 4) || (addr2 == 5); - - if (flipped) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.base != 5); - ASSERT(sib.index != 4); - } - - if (rex_b || rex_w || rex_r || rex_x || rex_always) { - set(REX(rex_w, rex_r, rex_x, rex_b)); - } - - set(modrm); - set(sib); - } - - void set_vex_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, - uint8_t addr1, - uint8_t addr2, - VEX3::LeadingBytes lb, - bool rex_w = false) { - bool rex_b = false, rex_r = false, rex_x = false; - bool addr1_ext = false; - bool addr2_ext = false; - - if (addr1 >= 8) { - addr1 -= 8; - addr1_ext = true; - } - - if (addr2 >= 8) { - addr2 -= 8; - addr2_ext = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; // no disp - modrm.rm = 4; // sib! - modrm.reg_op = reg; - - SIB sib; - sib.scale = 0; - - if (addr1 == 5 && addr2 == 5) { - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - modrm.mod = 1; - set_disp(Imm(1, 0)); - - } else { - // default addr1 in index - bool flipped = (addr1 == 4) || (addr2 == 5); - - if (flipped) { - sib.index = addr2; - sib.base = addr1; - rex_x = addr2_ext; - rex_b = addr1_ext; - } else { - // addr1 in index - sib.index = addr1; - sib.base = addr2; - rex_x = addr1_ext; - rex_b = addr2_ext; - } - ASSERT(sib.base != 5); - ASSERT(sib.index != 4); - } - - if (rex_b || rex_w || rex_x || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, rex_x, rex_b, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_b); - ASSERT(!rex_w); - ASSERT(!rex_x); - set(VEX2(rex_r)); - } - - set(modrm); - set(sib); - } - - /*! - * Set modrm and rex as needed for two regs for an addressing mode. - * Will set SIB if R12 or RSP indexing is used. - */ - void set_modrm_and_rex_for_reg_addr(uint8_t reg, uint8_t rm, bool rex_w = false) { - bool rex_b = false, rex_r = false; - - if (rm >= 8) { - rm -= 8; - rex_b = true; - } - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = rm; - - if (rm == 4) { - SIB sib; - sib.scale = 0; - sib.base = 4; - sib.index = 4; - - set(sib); - } - - if (rm == 5) { - modrm.mod = 1; // 1 byte imm - set_disp(Imm(1, 0)); - } - - set(modrm); - if (rex_b || rex_w || rex_r) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - void set_modrm_and_rex_for_rip_plus_s32(uint8_t reg, s32 offset, bool rex_w = false) { - bool rex_r = false; - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = 5; // use the RIP addressing mode - set(modrm); - - if (rex_r || rex_w) { - set(REX(rex_w, rex_r, false, false)); - } - - set_disp(Imm(4, offset)); - } - - void add_rex() { - if (!(m_flags & kSetRex)) { - set(REX()); - } - } - - void set_vex_modrm_and_rex_for_rip_plus_s32(uint8_t reg, - s32 offset, - VEX3::LeadingBytes lb = VEX3::LeadingBytes::P_0F, - bool rex_w = false) { - bool rex_r = false; - - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - - ModRM modrm; - modrm.mod = 0; - modrm.reg_op = reg; - modrm.rm = 5; // use the RIP addressing mode - set(modrm); - - if (rex_w || lb != VEX3::LeadingBytes::P_0F) { - // need three byte version - set(VEX3(rex_w, rex_r, false, false, lb)); - } else { - ASSERT(lb == VEX3::LeadingBytes::P_0F); // vex2 implies 0x0f - ASSERT(!rex_w); - set(VEX2(rex_r)); - } - - set_disp(Imm(4, offset)); - } - - /*! - * Set up modrm and rex for the commonly used immediate displacement indexing mode. - */ - void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) { - ModRM modrm; - - bool rex_r = false; - if (reg >= 8) { - reg -= 8; - rex_r = true; - } - modrm.reg_op = reg; - - modrm.mod = mod; - - modrm.rm = 4; // use sib - - SIB sib; - sib.scale = 0; - sib.index = 4; - bool rex_b = false; - if (rm >= 8) { - rex_b = true; - rm -= 8; - } - - sib.base = rm; - - set(modrm); - set(sib); - - if (rex_r || rex_w || rex_b) { - set(REX(rex_w, rex_r, false, rex_b)); - } - } - - /*! - * Get the position of the disp immediate relative to the start of the instruction - */ - int offset_of_disp() const { - if (m_flags & kIsNull) - return 0; - ASSERT(m_flags & kSetDispImm); - int offset = 0; - offset += n_vex; - if (m_flags & kSetRex) - offset++; - offset++; // opcode - if (m_flags & kOp2Set) - offset++; - if (m_flags & kOp3Set) - offset++; - if (m_flags & kSetModrm) - offset++; - if (m_flags & kSetSib) - offset++; - return offset; - } - - /*! - * Get the position of the imm immediate relative to the start of the instruction - */ - int offset_of_imm() const { - if (m_flags & kIsNull) - return 0; - ASSERT(m_flags & kSetImm); - int offset = 0; - offset += n_vex; - if (m_flags & kSetRex) - offset++; - offset++; // opcode - if (m_flags & kOp2Set) - offset++; - if (m_flags & kOp3Set) - offset++; - if (m_flags & kSetModrm) - offset++; - if (m_flags & kSetSib) - offset++; - if (m_flags & kSetDispImm) - offset += disp.size; - return offset; - } - - uint8_t emit(uint8_t* buffer) const override { - if (m_flags & kIsNull) - return 0; - uint8_t count = 0; - - for (int i = 0; i < n_vex; i++) { - buffer[count++] = vex[i]; - } - - if (m_flags & kSetRex) { - buffer[count++] = m_rex; - } - - buffer[count++] = op; - - if (m_flags & kOp2Set) { - buffer[count++] = op2; - } - - if (m_flags & kOp3Set) { - buffer[count++] = op3; - } - - if (m_flags & kSetModrm) { - buffer[count++] = m_modrm; - } - - if (m_flags & kSetSib) { - buffer[count++] = m_sib; - } - - if (m_flags & kSetDispImm) { - for (int i = 0; i < disp.size; i++) { - buffer[count++] = disp.v_arr[i]; - } - } - - if (m_flags & kSetImm) { - for (int i = 0; i < imm.size; i++) { - buffer[count++] = imm.v_arr[i]; - } - } - return count; - } - - uint8_t length() const override { - if (m_flags & kIsNull) - return 0; - uint8_t count = 0; - - count += n_vex; - - if (m_flags & kSetRex) { - count++; - } - - count++; - - if (m_flags & kOp2Set) { - count++; - } - - if (m_flags & kOp3Set) { - count++; - } - - if (m_flags & kSetModrm) { - count++; - } - - if (m_flags & kSetSib) { - count++; - } - - if (m_flags & kSetDispImm) { - for (int i = 0; i < disp.size; i++) { - count++; - } - } - - if (m_flags & kSetImm) { - for (int i = 0; i < imm.size; i++) { - count++; - } - } - return count; - } -}; -} // namespace emitter From 5581536c14ca21703ee8fc22c8d9f1af5be9a2de Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 17:43:32 -0500 Subject: [PATCH 06/12] goalc: start the process of splitting up architecture impl --- goalc/compiler/CodeGenerator.cpp | 18 +- goalc/compiler/IR.cpp | 10 +- goalc/compiler/compilation/Function.cpp | 4 +- goalc/emitter/CodeTester.cpp | 14 + goalc/emitter/IGenARM64.cpp | 284 ++---------- goalc/emitter/IGenX86.cpp | 581 +++++++----------------- goalc/emitter/Instruction.h | 43 +- goalc/emitter/ObjectGenerator.cpp | 27 +- goalc/emitter/Register.h | 19 +- test/test_CodeTester.cpp | 36 +- test/test_emitter.cpp | 12 +- 11 files changed, 300 insertions(+), 748 deletions(-) diff --git a/goalc/compiler/CodeGenerator.cpp b/goalc/compiler/CodeGenerator.cpp index a39eb250283..51d7db76fe9 100644 --- a/goalc/compiler/CodeGenerator.cpp +++ b/goalc/compiler/CodeGenerator.cpp @@ -88,7 +88,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { // count how many xmm's we have to backup int n_xmm_backups = 0; for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { n_xmm_backups++; } } @@ -105,7 +105,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { // back up xmms int i = 0; for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { int offset = i * XMM_SIZE; m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset), InstructionInfo::Kind::PROLOGUE); @@ -116,7 +116,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { } else { // back up xmms (currently not aligned) for (auto& saved_reg : allocs.used_saved_regs) { - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::Kind::PROLOGUE); m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg), @@ -183,12 +183,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { m_gen.add_instr(IGen::load64_gpr64_plus_s32( op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP), i_rec); - } else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { + } else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { // load xmm32 off of the stack m_gen.add_instr(IGen::load_reg_offset_xmm32( op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), i_rec); - } else if (op.reg.is_xmm() && + } else if (op.reg.is_128bit_simd() && (op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { m_gen.add_instr(IGen::load128_xmm128_reg_offset( op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), @@ -210,12 +210,12 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { m_gen.add_instr(IGen::store64_gpr64_plus_s32( RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg), i_rec); - } else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) { + } else if (op.reg.is_128bit_simd() && op.reg_class == RegClass::FLOAT) { // store xmm32 on the stack m_gen.add_instr(IGen::store_reg_offset_xmm32( RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), i_rec); - } else if (op.reg.is_xmm() && + } else if (op.reg.is_128bit_simd() && (op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) { m_gen.add_instr(IGen::store128_xmm128_reg_offset( RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE), @@ -254,7 +254,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { int j = n_xmm_backups; for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { j--; int offset = j * XMM_SIZE; m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset), @@ -268,7 +268,7 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) { } else { for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); - if (saved_reg.is_xmm()) { + if (saved_reg.is_128bit_simd()) { m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP), InstructionInfo::Kind::EPILOGUE); m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 3e9f3cc4c8e..37ec8ee8168 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -240,7 +240,7 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen, auto dest_reg = get_reg(m_dest, allocs, irec); if (m_name == "#f") { static_assert(false_symbol_offset() == 0, "false symbol location"); - if (dest_reg.is_xmm()) { + if (dest_reg.is_128bit_simd()) { gen->add_instr(IGen::movq_xmm64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); } else { gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec); @@ -862,7 +862,7 @@ RegAllocInstr IR_ConditionalBranch::to_rai() { void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, const AllocationResult& allocs, emitter::IR_Record irec) { - #ifndef __aarch64__ +#ifndef __aarch64__ Instruction jump_instr; jump_instr = InstructionX86(0); ASSERT(m_resolved); @@ -918,9 +918,9 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, auto jump_rec = gen->add_instr(jump_instr, irec); gen->link_instruction_jump(jump_rec, gen->get_future_ir_record_in_same_func(irec, label.idx)); - #else - // TODO - ARM64 - #endif +#else +// TODO - ARM64 +#endif } ///////////////////// diff --git a/goalc/compiler/compilation/Function.cpp b/goalc/compiler/compilation/Function.cpp index 1fb519a7fb5..b781f5bc333 100644 --- a/goalc/compiler/compilation/Function.cpp +++ b/goalc/compiler/compilation/Function.cpp @@ -591,7 +591,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, auto cc = get_function_calling_convention(function->type(), m_ts); RegClass ret_reg_class = RegClass::GPR_64; - if (cc.return_reg && cc.return_reg->is_xmm()) { + if (cc.return_reg && cc.return_reg->is_128bit_simd()) { ret_reg_class = RegClass::INT_128; } @@ -625,7 +625,7 @@ Val* Compiler::compile_real_function_call(const goos::Object& form, const auto& arg = args.at(i); auto reg = cc.arg_regs.at(i); arg_outs.push_back( - env->make_ireg(arg->type(), reg.is_xmm() ? RegClass::INT_128 : RegClass::GPR_64)); + env->make_ireg(arg->type(), reg.is_128bit_simd() ? RegClass::INT_128 : RegClass::GPR_64)); arg_outs.back()->mark_as_settable(); env->emit_ir(form, arg_outs.back(), arg); } diff --git a/goalc/emitter/CodeTester.cpp b/goalc/emitter/CodeTester.cpp index 7d104d74a40..f2dd8d9cf91 100644 --- a/goalc/emitter/CodeTester.cpp +++ b/goalc/emitter/CodeTester.cpp @@ -67,11 +67,18 @@ void CodeTester::emit_return() { * Pops RSP always, which is weird, but doesn't cause issues. */ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { +#ifndef __aarch64__ for (int i = 16; i-- > 0;) { if (i != RAX || !exclude_rax) { emit(IGen::pop_gpr64(i)); } } +#else + // TODO find uses for excluding RAX + for (int i = 0; i < 32; i++) { + emit(IGen::pop_gpr64(i)); + } +#endif } /*! @@ -79,11 +86,18 @@ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { * Pushes RSP always, which is weird, but doesn't cause issues. */ void CodeTester::emit_push_all_gprs(bool exclude_rax) { +#ifndef __aarch64__ for (int i = 0; i < 16; i++) { if (i != RAX || !exclude_rax) { emit(IGen::push_gpr64(i)); } } +#else + // TODO find uses for excluding RAX + for (int i = 0; i < 32; i++) { + emit(IGen::push_gpr64(i)); + } +#endif } /*! diff --git a/goalc/emitter/IGenARM64.cpp b/goalc/emitter/IGenARM64.cpp index 2363ac6123c..03e75e5be0a 100644 --- a/goalc/emitter/IGenARM64.cpp +++ b/goalc/emitter/IGenARM64.cpp @@ -1,74 +1,51 @@ -#include + +#include "goalc/emitter/Instruction.h" #ifdef __aarch64__ +#include #include "IGen.h" +// https://armconverter.com/?code=ret +// https://developer.arm.com/documentation/ddi0487/latest + namespace emitter { namespace IGen { //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // MOVES //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Move data from src to dst. Moves all 64-bits of the GPR. - */ + Instruction mov_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Move a 64-bit constant into a register. - */ Instruction mov_gpr64_u64(Register dst, uint64_t val) { return Instruction(0b0); } -/*! - * Move a 32-bit constant into a register. Zeros the upper 32 bits. - */ Instruction mov_gpr64_u32(Register dst, uint64_t val) { return Instruction(0b0); } -/*! - * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. - * When possible prefer mov_gpr64_u32. (use this only for negative values...) - * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. - */ Instruction mov_gpr64_s32(Register dst, int64_t val) { return Instruction(0b0); } -/*! - * Move 32-bits of xmm to 32 bits of gpr (no sign extension). - */ Instruction movd_gpr32_xmm32(Register dst, Register src) { return Instruction(0b0); } -/*! - * Move 32-bits of gpr to 32-bits of xmm (no sign extension) - */ Instruction movd_xmm32_gpr32(Register dst, Register src) { return Instruction(0b0); } -/*! - * Move 64-bits of xmm to 64 bits of gpr (no sign extension). - */ Instruction movq_gpr64_xmm64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Move 64-bits of gpr to 64-bits of xmm (no sign extension) - */ Instruction movq_xmm64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Move 32-bits between xmm's - */ Instruction mov_xmm32_xmm32(Register dst, Register src) { return Instruction(0b0); } @@ -80,11 +57,6 @@ Instruction mov_xmm32_xmm32(Register dst, Register src) { // GOAL Loads and Stores //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * movsx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -121,11 +93,6 @@ Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, return Instruction(0b0); } -/*! - * movzx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -144,11 +111,6 @@ Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return Instruction(0b0); } -/*! - * movsx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -185,11 +147,6 @@ Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return Instruction(0b0); } -/*! - * movzx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -208,11 +165,6 @@ Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return Instruction(0b0); } -/*! - * movsxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -249,11 +201,6 @@ Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, return Instruction(0b0); } -/*! - * movzxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -272,11 +219,6 @@ Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return Instruction(0b0); } -/*! - * mov dst, QWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { return Instruction(0b0); } @@ -325,10 +267,6 @@ Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offs return Instruction(0b0); } -/*! - * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. - * This will pick the appropriate fancy addressing mode instruction. - */ Instruction load_goal_gpr(Register dst, Register addr, Register off, @@ -422,13 +360,20 @@ Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) } //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -// LOADS n' STORES - XMM128 +// LOADS n' STORES - SIMD (128-bit, QWORDS) //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Store a 128-bit xmm into an address stored in a register, no offset - */ -Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { +Instruction store128_gpr64_simd_reg(Register gpr_addr, Register simd_reg) { + // STR Qs, [Xd] + // ASSERT(gpr_addr.is_gpr()); + // ASSERT(simd_reg.is_128bit_simd()); + // return Instruction(0b11111001); + // InstructionX86 instr(0x66); + // instr.set_op2(0x0f); + // instr.set_op3(0x7f); + // instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); + // instr.swap_op0_rex(); + // return instr; return Instruction(0b0); } @@ -535,9 +480,6 @@ Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src return Instruction(0b0); } -/*! - * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. - */ Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { return Instruction(0b0); } @@ -545,37 +487,30 @@ Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // FUNCTION STUFF //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. - */ + Instruction ret() { - return InstructionARM64(0b11010110010111110000001111000000); + // pg. 1850 + return Instruction(0b11010110010111110000001111000000); } -/*! - * Instruction to push gpr (64-bits) onto the stack - */ Instruction push_gpr64(Register reg) { - return Instruction(0b0); + // pg. 1998 + ASSERT(reg.is_gpr()); + // TODO - is hw_id needed? + return Instruction(0b11111000001); // TODO - finish } -/*! - * Instruction to pop 64 bit gpr from the stack - */ Instruction pop_gpr64(Register reg) { - return Instruction(0b0); + // pg. 1998 + ASSERT(reg.is_gpr()); + // TODO - is hw_id needed? + return Instruction(0b11111000011); // TODO - finish } -/*! - * Call a function stored in a 64-bit gpr - */ Instruction call_r64(Register reg_) { return Instruction(0b0); } -/*! - * Jump to an x86-64 address stored in a 64-bit gpr. - */ Instruction jmp_r64(Register reg_) { return Instruction(0b0); } @@ -615,25 +550,14 @@ Instruction sub_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Multiply gprs (32-bit, signed). - * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) - */ Instruction imul_gpr32_gpr32(Register dst, Register src) { return Instruction(0b0); } -/*! - * Multiply gprs (64-bit, signed). - * DANGER - this treats all operands as 64-bit. This is not like the EE. - */ Instruction imul_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Divide (idiv, 32 bit) - */ Instruction idiv_gpr32(Register reg) { return Instruction(0b0); } @@ -642,25 +566,14 @@ Instruction unsigned_div_gpr32(Register reg) { return Instruction(0b0); } -/*! - * Convert doubleword to quadword for division. - */ Instruction cdq() { return Instruction(0b0); } -/*! - * Move from gpr32 to gpr64, with sign extension. - * Needed for multiplication/divsion madness. - */ Instruction movsx_r64_r32(Register dst, Register src) { return Instruction(0b0); } -/*! - * Compare gpr64. This sets the flags for the jumps. - * todo UNTESTED - */ Instruction cmp_gpr64_gpr64(Register a, Register b) { return Instruction(0b0); } @@ -669,30 +582,18 @@ Instruction cmp_gpr64_gpr64(Register a, Register b) { // BIT STUFF //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Or of two gprs - */ Instruction or_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * And of two gprs - */ Instruction and_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Xor of two gprs - */ Instruction xor_gpr64_gpr64(Register dst, Register src) { return Instruction(0b0); } -/*! - * Bitwise not a gpr - */ Instruction not_gpr64(Register reg) { return Instruction(0b0); } @@ -701,44 +602,26 @@ Instruction not_gpr64(Register reg) { // SHIFTS //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Shift 64-bit gpr left by CL register - */ Instruction shl_gpr64_cl(Register reg) { return Instruction(0b0); } -/*! - * Shift 64-bit gpr right (logical) by CL register - */ Instruction shr_gpr64_cl(Register reg) { return Instruction(0b0); } -/*! - * Shift 64-bit gpr right (arithmetic) by CL register - */ Instruction sar_gpr64_cl(Register reg) { return Instruction(0b0); } -/*! - * Shift 64-ptr left (logical) by the constant shift amount "sa". - */ Instruction shl_gpr64_u8(Register reg, uint8_t sa) { return Instruction(0b0); } -/*! - * Shift 64-ptr right (logical) by the constant shift amount "sa". - */ Instruction shr_gpr64_u8(Register reg, uint8_t sa) { return Instruction(0b0); } -/*! - * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". - */ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { return Instruction(0b0); } @@ -747,79 +630,46 @@ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { // CONTROL FLOW //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. - */ Instruction jmp_32() { return Instruction(0b0); } -/*! - * Jump if equal. - */ Instruction je_32() { return Instruction(0b0); } -/*! - * Jump not equal. - */ Instruction jne_32() { return Instruction(0b0); } -/*! - * Jump less than or equal. - */ Instruction jle_32() { return Instruction(0b0); } -/*! - * Jump greater than or equal. - */ Instruction jge_32() { return Instruction(0b0); } -/*! - * Jump less than - */ Instruction jl_32() { return Instruction(0b0); } -/*! - * Jump greater than - */ Instruction jg_32() { return Instruction(0b0); } -/*! - * Jump below or equal - */ Instruction jbe_32() { return Instruction(0b0); } -/*! - * Jump above or equal - */ Instruction jae_32() { return Instruction(0b0); } -/*! - * Jump below - */ Instruction jb_32() { return Instruction(0b0); } -/*! - * Jump above - */ Instruction ja_32() { return Instruction(0b0); } @@ -828,9 +678,6 @@ Instruction ja_32() { // FLOAT MATH //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Compare two floats and set flag register for jump (ucomiss) - */ Instruction cmp_flt_flt(Register a, Register b) { return Instruction(0b0); } @@ -839,58 +686,34 @@ Instruction sqrts_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Multiply two floats in xmm's - */ Instruction mulss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Divide two floats in xmm's - */ Instruction divss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Subtract two floats in xmm's - */ Instruction subss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Add two floats in xmm's - */ Instruction addss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Floating point minimum. - */ Instruction minss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Floating point maximum. - */ Instruction maxss_xmm_xmm(Register dst, Register src) { return Instruction(0b0); } -/*! - * Convert GPR int32 to XMM float (single precision) - */ Instruction int32_to_float(Register dst, Register src) { return Instruction(0b0); } -/*! - * Convert XMM float to GPR int32(single precision) (truncate) - */ Instruction float_to_int32(Register dst, Register src) { return Instruction(0b0); } @@ -905,11 +728,6 @@ Instruction nop() { // UTILITIES //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * A "null" instruction. This Instruction does not generate any bytes - * but can be referred to by a label. Useful to insert in place of a real instruction - * if the real Instruction has been optimized out. - */ Instruction null() { return Instruction(0b0); } @@ -980,36 +798,10 @@ Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { return Instruction(0b0); } -/* - Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. - Here's a brief run-down: - - 8-bits / 4 groups of 2 bits - - Right-to-left, each group is used to determine which element in `src` gets copied into - `dst`'s element (W->X). - - GROUP OPTIONS - - 00b - Copy the least-significant element (X) - - 01b - Copy the second element (from the right) (Y) - - 10b - Copy the third element (from the right) (Z) - - 11b - Copy the most significant element (W) - Examples - ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) - SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions - > (1.5, 1.5, 1.5, 1.5) - SHUFPS xmm1, xmm1, 0x39 ; Rotate right - > (4.5, 1.5, 2.5, 3.5) - */ Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { return Instruction(0b0); } -/* - Splats a single element in 'src' to all elements in 'dst' - For example (pseudocode): - xmm1 = (1.5, 2.5, 3.5, 4.5) - xmm2 = (1, 2, 3, 4) - splat_vf(xmm1, xmm2, XMM_ELEMENT::X); - xmm1 = (4, 4, 4, 4) - */ Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { return Instruction(0b0); } @@ -1089,15 +881,6 @@ Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Reminder - a word in MIPS = 32bits = a DWORD in x86 -// MIPS || x86 -// ----------------------- -// byte || byte -// halfword || word -// word || dword -// doubleword || quadword - -// -- Unpack High Data Instructions Instruction pextub_swapped(Register dst, Register src0, Register src1) { return Instruction(0b0); } @@ -1110,7 +893,6 @@ Instruction pextuw_swapped(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// -- Unpack Low Data Instructions Instruction pextlb_swapped(Register dst, Register src0, Register src1) { return Instruction(0b0); } @@ -1123,32 +905,26 @@ Instruction pextlw_swapped(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Equal to than comparison as 16 bytes (8 bits) Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Equal to than comparison as 8 halfwords (16 bits) Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Equal to than comparison as 4 words (32 bits) Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Greater than comparison as 16 bytes (8 bits) Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Greater than comparison as 8 halfwords (16 bits) Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { return Instruction(0b0); } -// Greater than comparison as 4 words (32 bits) Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { return Instruction(0b0); } diff --git a/goalc/emitter/IGenX86.cpp b/goalc/emitter/IGenX86.cpp index 1cbcf46c0ab..024be6d8b19 100644 --- a/goalc/emitter/IGenX86.cpp +++ b/goalc/emitter/IGenX86.cpp @@ -1,16 +1,13 @@ #ifndef __aarch64__ #include "IGen.h" -#include "goalc/emitter/InstructionX86.h" namespace emitter { namespace IGen { //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // MOVES //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Move data from src to dst. Moves all 64-bits of the GPR. - */ + Instruction mov_gpr64_gpr64(Register dst, Register src) { ASSERT(dst.is_gpr()); ASSERT(src.is_gpr()); @@ -19,9 +16,6 @@ Instruction mov_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * Move a 64-bit constant into a register. - */ Instruction mov_gpr64_u64(Register dst, uint64_t val) { ASSERT(dst.is_gpr()); bool rex_b = false; @@ -36,9 +30,6 @@ Instruction mov_gpr64_u64(Register dst, uint64_t val) { return instr; } -/*! - * Move a 32-bit constant into a register. Zeros the upper 32 bits. - */ Instruction mov_gpr64_u32(Register dst, uint64_t val) { ASSERT(val <= UINT32_MAX); ASSERT(dst.is_gpr()); @@ -57,11 +48,6 @@ Instruction mov_gpr64_u32(Register dst, uint64_t val) { return instr; } -/*! - * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. - * When possible prefer mov_gpr64_u32. (use this only for negative values...) - * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. - */ Instruction mov_gpr64_s32(Register dst, int64_t val) { ASSERT(val >= INT32_MIN && val <= INT32_MAX); ASSERT(dst.is_gpr()); @@ -71,12 +57,9 @@ Instruction mov_gpr64_s32(Register dst, int64_t val) { return instr; } -/*! - * Move 32-bits of xmm to 32 bits of gpr (no sign extension). - */ Instruction movd_gpr32_xmm32(Register dst, Register src) { ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0x66); instr.set_op2(0x0f); instr.set_op3(0x7e); @@ -85,11 +68,8 @@ Instruction movd_gpr32_xmm32(Register dst, Register src) { return instr; } -/*! - * Move 32-bits of gpr to 32-bits of xmm (no sign extension) - */ Instruction movd_xmm32_gpr32(Register dst, Register src) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(src.is_gpr()); InstructionX86 instr(0x66); instr.set_op2(0x0f); @@ -99,12 +79,9 @@ Instruction movd_xmm32_gpr32(Register dst, Register src) { return instr; } -/*! - * Move 64-bits of xmm to 64 bits of gpr (no sign extension). - */ Instruction movq_gpr64_xmm64(Register dst, Register src) { ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0x66); instr.set_op2(0x0f); instr.set_op3(0x7e); @@ -113,11 +90,8 @@ Instruction movq_gpr64_xmm64(Register dst, Register src) { return instr; } -/*! - * Move 64-bits of gpr to 64-bits of xmm (no sign extension) - */ Instruction movq_xmm64_gpr64(Register dst, Register src) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(src.is_gpr()); InstructionX86 instr(0x66); instr.set_op2(0x0f); @@ -127,12 +101,9 @@ Instruction movq_xmm64_gpr64(Register dst, Register src) { return instr; } -/*! - * Move 32-bits between xmm's - */ Instruction mov_xmm32_xmm32(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x10); @@ -148,11 +119,6 @@ Instruction mov_xmm32_xmm32(Register dst, Register src) { // GOAL Loads and Stores //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * movsx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -258,11 +224,6 @@ Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, return instr; } -/*! - * movzx dst, BYTE PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -313,11 +274,6 @@ Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return instr; } -/*! - * movsx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -420,11 +376,6 @@ Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return instr; } -/*! - * movzx dst, WORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -475,11 +426,6 @@ Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return instr; } -/*! - * movsxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -572,11 +518,6 @@ Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, return instr; } -/*! - * movzxd dst, DWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -623,11 +564,6 @@ Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, return instr; } -/*! - * mov dst, QWORD PTR [addr1 + addr2] - * addr1 and addr2 have to be different registers. - * Cannot use rsp. - */ Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { ASSERT(dst.is_gpr()); ASSERT(addr1.is_gpr()); @@ -793,10 +729,6 @@ Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offs } } -/*! - * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. - * This will pick the appropriate fancy addressing mode instruction. - */ Instruction load_goal_gpr(Register dst, Register addr, Register off, @@ -893,7 +825,7 @@ Instruction load_goal_gpr(Register dst, // LOADS n' STORES - XMM32 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Register xmm_value) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); @@ -907,7 +839,7 @@ Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, Register addr2, Regis } Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, Register addr1, Register addr2) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); @@ -924,7 +856,7 @@ Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, Register addr2, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); @@ -943,7 +875,7 @@ Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, Register addr1, Register addr2, s64 offset) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); @@ -962,7 +894,7 @@ Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, Register addr2, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); @@ -1009,7 +941,7 @@ Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) { } Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(base.is_gpr()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0xf3); @@ -1022,7 +954,7 @@ Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 } Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(base.is_gpr()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); InstructionX86 instr(0xf3); @@ -1038,7 +970,7 @@ Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, Register addr1, Register addr2, s64 offset) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); @@ -1054,7 +986,7 @@ Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, } Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(base.is_gpr()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0xf3); @@ -1067,7 +999,7 @@ Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 of } Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(base.is_gpr()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); InstructionX86 instr(0xf3); @@ -1107,7 +1039,7 @@ Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s6 Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) { ASSERT(base.is_gpr()); - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); if (offset >= INT8_MIN && offset <= INT8_MAX) { return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset); } else if (offset >= INT32_MIN && offset <= INT32_MAX) { @@ -1120,7 +1052,7 @@ Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) { ASSERT(base.is_gpr()); - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); if (offset >= INT8_MIN && offset <= INT8_MAX) { return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset); } else if (offset >= INT32_MIN && offset <= INT32_MAX) { @@ -1135,12 +1067,9 @@ Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) // LOADS n' STORES - XMM128 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Store a 128-bit xmm into an address stored in a register, no offset - */ Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); instr.set_op2(0x0f); @@ -1152,7 +1081,7 @@ Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); @@ -1166,7 +1095,7 @@ Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); @@ -1180,7 +1109,7 @@ Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); instr.set_op2(0x0f); @@ -1192,7 +1121,7 @@ Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); @@ -1206,7 +1135,7 @@ Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 o Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) { ASSERT(gpr_addr.is_gpr()); - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(offset >= INT8_MIN && offset <= INT8_MAX); InstructionX86 instr(0x66); // InstructionX86 instr(0xf3); @@ -1403,7 +1332,7 @@ Instruction static_addr(Register dst, s64 offset) { } Instruction static_load_xmm32(Register xmm_dest, s64 offset) { - ASSERT(xmm_dest.is_xmm()); + ASSERT(xmm_dest.is_128bit_simd()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0xf3); @@ -1416,7 +1345,7 @@ Instruction static_load_xmm32(Register xmm_dest, s64 offset) { } Instruction static_store_xmm32(Register xmm_value, s64 offset) { - ASSERT(xmm_value.is_xmm()); + ASSERT(xmm_value.is_128bit_simd()); ASSERT(offset >= INT32_MIN && offset <= INT32_MAX); InstructionX86 instr(0xf3); @@ -1440,9 +1369,6 @@ Instruction load64_gpr64_plus_s32(Register dst_reg, int32_t offset, Register src return instr; } -/*! - * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. - */ Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value) { ASSERT(addr.is_gpr()); ASSERT(value.is_gpr()); @@ -1455,16 +1381,11 @@ Instruction store64_gpr64_plus_s32(Register addr, int32_t offset, Register value //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // FUNCTION STUFF //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. - */ + Instruction ret() { return InstructionX86(0xc3); } -/*! - * Instruction to push gpr (64-bits) onto the stack - */ Instruction push_gpr64(Register reg) { ASSERT(reg.is_gpr()); if (reg.hw_id() >= 8) { @@ -1475,9 +1396,6 @@ Instruction push_gpr64(Register reg) { return InstructionX86(0x50 + reg.hw_id()); } -/*! - * Instruction to pop 64 bit gpr from the stack - */ Instruction pop_gpr64(Register reg) { ASSERT(reg.is_gpr()); if (reg.hw_id() >= 8) { @@ -1488,9 +1406,6 @@ Instruction pop_gpr64(Register reg) { return InstructionX86(0x58 + reg.hw_id()); } -/*! - * Call a function stored in a 64-bit gpr - */ Instruction call_r64(Register reg_) { ASSERT(reg_.is_gpr()); auto reg = reg_.hw_id(); @@ -1508,9 +1423,6 @@ Instruction call_r64(Register reg_) { return instr; } -/*! - * Jump to an x86-64 address stored in a 64-bit gpr. - */ Instruction jmp_r64(Register reg_) { ASSERT(reg_.is_gpr()); auto reg = reg_.hw_id(); @@ -1604,10 +1516,6 @@ Instruction sub_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * Multiply gprs (32-bit, signed). - * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) - */ Instruction imul_gpr32_gpr32(Register dst, Register src) { InstructionX86 instr(0xf); instr.set_op2(0xaf); @@ -1617,10 +1525,6 @@ Instruction imul_gpr32_gpr32(Register dst, Register src) { return instr; } -/*! - * Multiply gprs (64-bit, signed). - * DANGER - this treats all operands as 64-bit. This is not like the EE. - */ Instruction imul_gpr64_gpr64(Register dst, Register src) { InstructionX86 instr(0xf); instr.set_op2(0xaf); @@ -1630,9 +1534,6 @@ Instruction imul_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * Divide (idiv, 32 bit) - */ Instruction idiv_gpr32(Register reg) { InstructionX86 instr(0xf7); ASSERT(reg.is_gpr()); @@ -1647,18 +1548,11 @@ Instruction unsigned_div_gpr32(Register reg) { return instr; } -/*! - * Convert doubleword to quadword for division. - */ Instruction cdq() { InstructionX86 instr(0x99); return instr; } -/*! - * Move from gpr32 to gpr64, with sign extension. - * Needed for multiplication/divsion madness. - */ Instruction movsx_r64_r32(Register dst, Register src) { InstructionX86 instr(0x63); ASSERT(dst.is_gpr()); @@ -1667,10 +1561,6 @@ Instruction movsx_r64_r32(Register dst, Register src) { return instr; } -/*! - * Compare gpr64. This sets the flags for the jumps. - * todo UNTESTED - */ Instruction cmp_gpr64_gpr64(Register a, Register b) { InstructionX86 instr(0x3b); ASSERT(a.is_gpr()); @@ -1683,9 +1573,6 @@ Instruction cmp_gpr64_gpr64(Register a, Register b) { // BIT STUFF //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Or of two gprs - */ Instruction or_gpr64_gpr64(Register dst, Register src) { InstructionX86 instr(0x0b); ASSERT(dst.is_gpr()); @@ -1694,9 +1581,6 @@ Instruction or_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * And of two gprs - */ Instruction and_gpr64_gpr64(Register dst, Register src) { InstructionX86 instr(0x23); ASSERT(dst.is_gpr()); @@ -1705,9 +1589,6 @@ Instruction and_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * Xor of two gprs - */ Instruction xor_gpr64_gpr64(Register dst, Register src) { InstructionX86 instr(0x33); ASSERT(dst.is_gpr()); @@ -1716,9 +1597,6 @@ Instruction xor_gpr64_gpr64(Register dst, Register src) { return instr; } -/*! - * Bitwise not a gpr - */ Instruction not_gpr64(Register reg) { InstructionX86 instr(0xf7); ASSERT(reg.is_gpr()); @@ -1730,9 +1608,6 @@ Instruction not_gpr64(Register reg) { // SHIFTS //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Shift 64-bit gpr left by CL register - */ Instruction shl_gpr64_cl(Register reg) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xd3); @@ -1740,9 +1615,6 @@ Instruction shl_gpr64_cl(Register reg) { return instr; } -/*! - * Shift 64-bit gpr right (logical) by CL register - */ Instruction shr_gpr64_cl(Register reg) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xd3); @@ -1750,9 +1622,6 @@ Instruction shr_gpr64_cl(Register reg) { return instr; } -/*! - * Shift 64-bit gpr right (arithmetic) by CL register - */ Instruction sar_gpr64_cl(Register reg) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xd3); @@ -1760,9 +1629,6 @@ Instruction sar_gpr64_cl(Register reg) { return instr; } -/*! - * Shift 64-ptr left (logical) by the constant shift amount "sa". - */ Instruction shl_gpr64_u8(Register reg, uint8_t sa) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xc1); @@ -1771,9 +1637,6 @@ Instruction shl_gpr64_u8(Register reg, uint8_t sa) { return instr; } -/*! - * Shift 64-ptr right (logical) by the constant shift amount "sa". - */ Instruction shr_gpr64_u8(Register reg, uint8_t sa) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xc1); @@ -1782,9 +1645,6 @@ Instruction shr_gpr64_u8(Register reg, uint8_t sa) { return instr; } -/*! - * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". - */ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { ASSERT(reg.is_gpr()); InstructionX86 instr(0xc1); @@ -1797,18 +1657,12 @@ Instruction sar_gpr64_u8(Register reg, uint8_t sa) { // CONTROL FLOW //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. - */ Instruction jmp_32() { InstructionX86 instr(0xe9); instr.set(Imm(4, 0)); return instr; } -/*! - * Jump if equal. - */ Instruction je_32() { InstructionX86 instr(0x0f); instr.set_op2(0x84); @@ -1816,9 +1670,6 @@ Instruction je_32() { return instr; } -/*! - * Jump not equal. - */ Instruction jne_32() { InstructionX86 instr(0x0f); instr.set_op2(0x85); @@ -1826,9 +1677,6 @@ Instruction jne_32() { return instr; } -/*! - * Jump less than or equal. - */ Instruction jle_32() { InstructionX86 instr(0x0f); instr.set_op2(0x8e); @@ -1836,9 +1684,6 @@ Instruction jle_32() { return instr; } -/*! - * Jump greater than or equal. - */ Instruction jge_32() { InstructionX86 instr(0x0f); instr.set_op2(0x8d); @@ -1846,9 +1691,6 @@ Instruction jge_32() { return instr; } -/*! - * Jump less than - */ Instruction jl_32() { InstructionX86 instr(0x0f); instr.set_op2(0x8c); @@ -1856,9 +1698,6 @@ Instruction jl_32() { return instr; } -/*! - * Jump greater than - */ Instruction jg_32() { InstructionX86 instr(0x0f); instr.set_op2(0x8f); @@ -1866,9 +1705,6 @@ Instruction jg_32() { return instr; } -/*! - * Jump below or equal - */ Instruction jbe_32() { InstructionX86 instr(0x0f); instr.set_op2(0x86); @@ -1876,9 +1712,6 @@ Instruction jbe_32() { return instr; } -/*! - * Jump above or equal - */ Instruction jae_32() { InstructionX86 instr(0x0f); instr.set_op2(0x83); @@ -1886,9 +1719,6 @@ Instruction jae_32() { return instr; } -/*! - * Jump below - */ Instruction jb_32() { InstructionX86 instr(0x0f); instr.set_op2(0x82); @@ -1896,9 +1726,6 @@ Instruction jb_32() { return instr; } -/*! - * Jump above - */ Instruction ja_32() { InstructionX86 instr(0x0f); instr.set_op2(0x87); @@ -1910,12 +1737,9 @@ Instruction ja_32() { // FLOAT MATH //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * Compare two floats and set flag register for jump (ucomiss) - */ Instruction cmp_flt_flt(Register a, Register b) { - ASSERT(a.is_xmm()); - ASSERT(b.is_xmm()); + ASSERT(a.is_128bit_simd()); + ASSERT(b.is_128bit_simd()); InstructionX86 instr(0x0f); instr.set_op2(0x2e); instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); @@ -1923,8 +1747,8 @@ Instruction cmp_flt_flt(Register a, Register b) { } Instruction sqrts_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x51); @@ -1933,12 +1757,9 @@ Instruction sqrts_xmm(Register dst, Register src) { return instr; } -/*! - * Multiply two floats in xmm's - */ Instruction mulss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x59); @@ -1947,12 +1768,9 @@ Instruction mulss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Divide two floats in xmm's - */ Instruction divss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x5e); @@ -1961,12 +1779,9 @@ Instruction divss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Subtract two floats in xmm's - */ Instruction subss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x5c); @@ -1975,12 +1790,9 @@ Instruction subss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Add two floats in xmm's - */ Instruction addss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x58); @@ -1989,12 +1801,9 @@ Instruction addss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Floating point minimum. - */ Instruction minss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x5d); @@ -2003,12 +1812,9 @@ Instruction minss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Floating point maximum. - */ Instruction maxss_xmm_xmm(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x5f); @@ -2017,11 +1823,8 @@ Instruction maxss_xmm_xmm(Register dst, Register src) { return instr; } -/*! - * Convert GPR int32 to XMM float (single precision) - */ Instruction int32_to_float(Register dst, Register src) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(src.is_gpr()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); @@ -2031,12 +1834,9 @@ Instruction int32_to_float(Register dst, Register src) { return instr; } -/*! - * Convert XMM float to GPR int32(single precision) (truncate) - */ Instruction float_to_int32(Register dst, Register src) { ASSERT(dst.is_gpr()); - ASSERT(src.is_xmm()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x2c); @@ -2057,11 +1857,6 @@ Instruction nop() { // UTILITIES //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/*! - * A "null" instruction. This instruction does not generate any bytes - * but can be referred to by a label. Useful to insert in place of a real instruction - * if the real instruction has been optimized out. - */ Instruction null() { InstructionX86 i(0); i.m_flags |= InstructionX86::kIsNull; @@ -2084,8 +1879,8 @@ Instruction wait_vf() { } Instruction mov_vf_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); if (src.hw_id() >= 8 && dst.hw_id() < 8) { // in this case, we can use the 0x29 encoding, which swaps src and dst, in order to use the @@ -2102,7 +1897,7 @@ Instruction mov_vf_vf(Register dst, Register src) { } Instruction loadvf_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2118,7 +1913,7 @@ Instruction loadvf_gpr64_plus_gpr64_plus_s8(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2135,7 +1930,7 @@ Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, Register addr1, Register addr2, s64 offset) { - ASSERT(dst.is_xmm()); + ASSERT(dst.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2149,7 +1944,7 @@ Instruction loadvf_gpr64_plus_gpr64_plus_s32(Register dst, } Instruction storevf_gpr64_plus_gpr64(Register value, Register addr1, Register addr2) { - ASSERT(value.is_xmm()); + ASSERT(value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2165,7 +1960,7 @@ Instruction storevf_gpr64_plus_gpr64_plus_s8(Register value, Register addr1, Register addr2, s64 offset) { - ASSERT(value.is_xmm()); + ASSERT(value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2182,7 +1977,7 @@ Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, Register addr1, Register addr2, s64 offset) { - ASSERT(value.is_xmm()); + ASSERT(value.is_128bit_simd()); ASSERT(addr1.is_gpr()); ASSERT(addr2.is_gpr()); ASSERT(addr1 != addr2); @@ -2196,7 +1991,7 @@ Instruction storevf_gpr64_plus_gpr64_plus_s32(Register value, } Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { - ASSERT(dest.is_xmm()); + ASSERT(dest.is_128bit_simd()); ASSERT(offset >= INT32_MIN); ASSERT(offset <= INT32_MAX); InstructionX86 instr(0x28); @@ -2208,9 +2003,9 @@ Instruction loadvf_rip_plus_s32(Register dest, s64 offset) { Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { ASSERT(!(mask & 0b11110000)); - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x0c); // VBLENDPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A, src1.hw_id(), false, VexPrefix::P_66); @@ -2219,8 +2014,8 @@ Instruction blend_vf(Register dst, Register src1, Register src2, u8 mask) { } Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); ASSERT(dx < 4); ASSERT(dy < 4); ASSERT(dz < 4); @@ -2236,27 +2031,9 @@ Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) { // return instr; } -/* - Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved. - Here's a brief run-down: - - 8-bits / 4 groups of 2 bits - - Right-to-left, each group is used to determine which element in `src` gets copied into - `dst`'s element (W->X). - - GROUP OPTIONS - - 00b - Copy the least-significant element (X) - - 01b - Copy the second element (from the right) (Y) - - 10b - Copy the third element (from the right) (Z) - - 11b - Copy the most significant element (W) - Examples - ; xmm1 = (1.5, 2.5, 3.5, 4.5) (W,Z,Y,X in x86 land) - SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions - > (1.5, 1.5, 1.5, 1.5) - SHUFPS xmm1, xmm1, 0x39 ; Rotate right - > (4.5, 1.5, 2.5, 3.5) - */ Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0xC6); // VSHUFPS // we use the AVX "VEX" encoding here. This is a three-operand form, @@ -2267,14 +2044,6 @@ Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) { return instr; } -/* - Splats a single element in 'src' to all elements in 'dst' - For example (pseudocode): - xmm1 = (1.5, 2.5, 3.5, 4.5) - xmm2 = (1, 2, 3, 4) - splat_vf(xmm1, xmm2, XMM_ELEMENT::X); - xmm1 = (4, 4, 4, 4) - */ Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { switch (element) { case Register::VF_ELEMENT::X: // Least significant element @@ -2296,87 +2065,87 @@ Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) { } Instruction xor_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x57); // VXORPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction sub_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x5c); // VSUBPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction add_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x58); // VADDPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction mul_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x59); // VMULPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction max_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x5F); // VMAXPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction min_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x5D); // VMINPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction div_vf(Register dst, Register src1, Register src2) { - ASSERT(dst.is_xmm()); - ASSERT(src1.is_xmm()); - ASSERT(src2.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); + ASSERT(src2.is_128bit_simd()); InstructionX86 instr(0x5E); // VDIVPS instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id()); return instr; } Instruction sqrt_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0x51); // VSQRTPS instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); return instr; } Instruction itof_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); InstructionX86 instr(0x5b); // VCVTDQ2PS instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); return instr; } Instruction ftoi_vf(Register dst, Register src) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.F3.0F.WIG 5B /r VCVTTPS2DQ xmm1, xmm2/m128 InstructionX86 instr(0x5b); // VCVTTPS2DQ instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, @@ -2385,8 +2154,8 @@ Instruction ftoi_vf(Register dst, Register src) { } Instruction pw_sra(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 InstructionX86 instr(0x72); instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2396,8 +2165,8 @@ Instruction pw_sra(Register dst, Register src, u8 imm) { } Instruction pw_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 InstructionX86 instr(0x72); instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2407,8 +2176,8 @@ Instruction pw_srl(Register dst, Register src, u8 imm) { } Instruction ph_srl(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 71 /2 ib VPSRLW InstructionX86 instr(0x71); instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2418,8 +2187,8 @@ Instruction ph_srl(Register dst, Register src, u8 imm) { } Instruction pw_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 InstructionX86 instr(0x72); instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2428,8 +2197,8 @@ Instruction pw_sll(Register dst, Register src, u8 imm) { return instr; } Instruction ph_sll(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 71 /6 ib VPSLLW xmm1, xmm2, imm8 InstructionX86 instr(0x71); instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2439,9 +2208,9 @@ Instruction ph_sll(Register dst, Register src, u8 imm) { } Instruction parallel_add_byte(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG FC /r VPADDB xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0xFC); @@ -2451,9 +2220,9 @@ Instruction parallel_add_byte(Register dst, Register src0, Register src1) { } Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0xEB); @@ -2463,9 +2232,9 @@ Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { } Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0xEF); @@ -2475,9 +2244,9 @@ Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { } Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0xDB); @@ -2486,19 +2255,10 @@ Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { return instr; } -// Reminder - a word in MIPS = 32bits = a DWORD in x86 -// MIPS || x86 -// ----------------------- -// byte || byte -// halfword || word -// word || dword -// doubleword || quadword - -// -- Unpack High Data Instructions Instruction pextub_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x68); @@ -2508,9 +2268,9 @@ Instruction pextub_swapped(Register dst, Register src0, Register src1) { } Instruction pextuh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x69); @@ -2520,9 +2280,9 @@ Instruction pextuh_swapped(Register dst, Register src0, Register src1) { } Instruction pextuw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x6a); @@ -2531,11 +2291,10 @@ Instruction pextuw_swapped(Register dst, Register src0, Register src1) { return instr; } -// -- Unpack Low Data Instructions Instruction pextlb_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x60); @@ -2545,9 +2304,9 @@ Instruction pextlb_swapped(Register dst, Register src0, Register src1) { } Instruction pextlh_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x61); @@ -2557,9 +2316,9 @@ Instruction pextlh_swapped(Register dst, Register src0, Register src1) { } Instruction pextlw_swapped(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x62); @@ -2568,11 +2327,10 @@ Instruction pextlw_swapped(Register dst, Register src0, Register src1) { return instr; } -// Equal to than comparison as 16 bytes (8 bits) Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x74); @@ -2581,11 +2339,10 @@ Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { return instr; } -// Equal to than comparison as 8 halfwords (16 bits) Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x75); @@ -2594,11 +2351,10 @@ Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { return instr; } -// Equal to than comparison as 4 words (32 bits) Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x76); @@ -2607,11 +2363,10 @@ Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { return instr; } -// Greater than comparison as 16 bytes (8 bits) Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x64); @@ -2620,11 +2375,10 @@ Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { return instr; } -// Greater than comparison as 8 halfwords (16 bits) Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x65); @@ -2633,11 +2387,10 @@ Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { return instr; } -// Greater than comparison as 4 words (32 bits) Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x66); @@ -2647,9 +2400,9 @@ Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { } Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x6c); @@ -2663,9 +2416,9 @@ Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { } Instruction pcpyud(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 // reg, vex, r/m InstructionX86 instr(0x6d); @@ -2675,9 +2428,9 @@ Instruction pcpyud(Register dst, Register src0, Register src1) { } Instruction vpsubd(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG FA /r VPSUBD xmm1, xmm2, xmm3/m128 // reg, vec, r/m InstructionX86 instr(0xfa); @@ -2687,8 +2440,8 @@ Instruction vpsubd(Register dst, Register src0, Register src1) { } Instruction vpsrldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8 InstructionX86 instr(0x73); instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2698,8 +2451,8 @@ Instruction vpsrldq(Register dst, Register src, u8 imm) { } Instruction vpslldq(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8 InstructionX86 instr(0x73); instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, @@ -2709,8 +2462,8 @@ Instruction vpslldq(Register dst, Register src, u8 imm) { } Instruction vpshuflw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8 InstructionX86 instr(0x70); instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, @@ -2720,8 +2473,8 @@ Instruction vpshuflw(Register dst, Register src, u8 imm) { } Instruction vpshufhw(Register dst, Register src, u8 imm) { - ASSERT(dst.is_xmm()); - ASSERT(src.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src.is_128bit_simd()); // VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8 InstructionX86 instr(0x70); instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, @@ -2731,9 +2484,9 @@ Instruction vpshufhw(Register dst, Register src, u8 imm) { } Instruction vpackuswb(Register dst, Register src0, Register src1) { - ASSERT(dst.is_xmm()); - ASSERT(src0.is_xmm()); - ASSERT(src1.is_xmm()); + ASSERT(dst.is_128bit_simd()); + ASSERT(src0.is_128bit_simd()); + ASSERT(src1.is_128bit_simd()); // VEX.128.66.0F.WIG 67 /r VPACKUSWB xmm1, xmm2, xmm3/m128 // reg, vex, r/m diff --git a/goalc/emitter/Instruction.h b/goalc/emitter/Instruction.h index 3a32a353395..e22055a6d3d 100644 --- a/goalc/emitter/Instruction.h +++ b/goalc/emitter/Instruction.h @@ -1,6 +1,7 @@ #pragma once #include + #include "common/common_types.h" #include "common/util/Assert.h" @@ -10,35 +11,31 @@ namespace emitter { */ template struct InstructionImpl { - /*! - * Emit into a buffer and return how many bytes written (can be zero) - */ - u8 emit(u8* buffer) const { - return static_cast(this)->emit(buffer); - } + /*! + * Emit into a buffer and return how many bytes written (can be zero) + */ + u8 emit(u8* buffer) const { return static_cast(this)->emit(buffer); } - u8 length() const { - return static_cast(this)->length(); - } + u8 length() const { return static_cast(this)->length(); } }; +// TODO probably separate these because x86 has a ton + struct InstructionARM64 : InstructionImpl { - // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a single 32-bit word in that stream. - // The encoding of an ARM instruction is: - // TODO - // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf - u32 instruction_encoding; + // The ARM instruction stream is a sequence of word-aligned words. Each ARM instruction is a + // single 32-bit word in that stream. The encoding of an ARM instruction is: + // TODO + // https://iitd-plos.github.io/col718/ref/arm-instructionset.pdf + u32 instruction_encoding; - InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} + InstructionARM64(u32 encoding) : instruction_encoding(encoding) {} - uint8_t emit(uint8_t* buffer) const { - memcpy(buffer, &instruction_encoding, 4); - return 4; - } + uint8_t emit(uint8_t* buffer) const { + memcpy(buffer, &instruction_encoding, 4); + return 4; + } - uint8_t length() const { - return 4; - } + uint8_t length() const { return 4; } }; /*! @@ -168,7 +165,7 @@ struct VEX2 { }; struct InstructionX86 : InstructionImpl { - enum Flags { + enum Flags { kOp2Set = (1 << 0), kOp3Set = (1 << 1), kIsNull = (1 << 2), diff --git a/goalc/emitter/ObjectGenerator.cpp b/goalc/emitter/ObjectGenerator.cpp index 6735c6db738..c98bcca2b14 100644 --- a/goalc/emitter/ObjectGenerator.cpp +++ b/goalc/emitter/ObjectGenerator.cpp @@ -386,7 +386,7 @@ void ObjectGenerator::handle_temp_static_ptr_links(int seg) { * m_jump_temp_links_by_seg patching after memory layout is done */ void ObjectGenerator::handle_temp_jump_links(int seg) { - #ifndef __aarch64__ +#ifndef __aarch64__ for (const auto& link : m_jump_temp_links_by_seg.at(seg)) { // we need to compute three offsets, all relative to the start of data. // 1). the location of the patch (the immediate of the opcode) @@ -412,10 +412,9 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { patch_data(seg, patch_location, dest_rip - source_rip); } - #else - // TODO - ARM64 - #endif - +#else +// TODO - ARM64 +#endif } /*! @@ -424,7 +423,7 @@ void ObjectGenerator::handle_temp_jump_links(int seg) { * after memory layout is done and before link tables are generated */ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { - #ifndef __aarch64__ +#ifndef __aarch64__ for (const auto& links : m_symbol_instr_temp_links_by_seg.at(seg)) { const auto& sym_name = links.first; for (const auto& link : links.second) { @@ -442,10 +441,9 @@ void ObjectGenerator::handle_temp_instr_sym_links(int seg) { m_sym_links_by_seg.at(seg)[sym_name].push_back(offset_of_instruction + offset_in_instruction); } } - #else - // TODO - ARM64 - #endif - +#else +// TODO - ARM64 +#endif } void ObjectGenerator::handle_temp_rip_func_links(int seg) { @@ -549,7 +547,7 @@ void ObjectGenerator::emit_link_ptr(int seg) { } void ObjectGenerator::emit_link_rip(int seg) { - #ifndef __aarch64__ +#ifndef __aarch64__ auto& out = m_link_by_seg.at(seg); for (auto& rec : m_rip_links_by_seg.at(seg)) { // kind (u8) @@ -575,10 +573,9 @@ void ObjectGenerator::emit_link_rip(int seg) { src_func.instruction_to_byte_in_data.at(rec.instr.instr_id) + src_instr.offset_of_disp(), out); } - #else - // TODO - ARM64 - #endif - +#else +// TODO - ARM64 +#endif } void ObjectGenerator::emit_link_table(int seg, const TypeSystem* ts) { diff --git a/goalc/emitter/Register.h b/goalc/emitter/Register.h index 44ff8df3323..9adc48db2df 100644 --- a/goalc/emitter/Register.h +++ b/goalc/emitter/Register.h @@ -145,12 +145,25 @@ class Register { // intentionally not explicit so we can use X86_REGs in place of Registers Register(int id) : m_id(id) {} - bool is_xmm() const { return m_id >= XMM0 && m_id <= XMM15; } + bool is_128bit_simd() const { +#ifndef __aarch64__ + return m_id >= XMM0 && m_id <= XMM15; +#else + return m_id >= Q0 && m_id <= Q31; +#endif + } - bool is_gpr() const { return m_id >= RAX && m_id <= R15; } + bool is_gpr() const { +#ifndef __aarch64__ + return m_id >= RAX && m_id <= R15; +#else + return m_id >= X0 && m_id <= X30; +#endif + } int hw_id() const { - if (is_xmm()) { + // TODO - ARM64, even needed? + if (is_128bit_simd()) { return m_id - XMM0; } else if (is_gpr()) { return m_id - RAX; diff --git a/test/test_CodeTester.cpp b/test/test_CodeTester.cpp index a18ffb60ebc..4df693677e8 100644 --- a/test/test_CodeTester.cpp +++ b/test/test_CodeTester.cpp @@ -13,23 +13,23 @@ using namespace emitter; -// TEST(CodeTester, prologue) { -// CodeTester tester; -// tester.init_code_buffer(256); -// tester.emit_push_all_gprs(); -// // check we generate the right code for pushing all gpr's -// EXPECT_EQ(tester.dump_to_hex_string(), -// "50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57"); -// } - -// TEST(CodeTester, epilogue) { -// CodeTester tester; -// tester.init_code_buffer(256); -// tester.emit_pop_all_gprs(); -// // check we generate the right code for popping all gpr's -// EXPECT_EQ(tester.dump_to_hex_string(), -// "41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58"); -// } +TEST(CodeTester, prologue) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit_push_all_gprs(); + // check we generate the right code for pushing all gpr's + EXPECT_EQ(tester.dump_to_hex_string(), + "50 51 52 53 54 55 56 57 41 50 41 51 41 52 41 53 41 54 41 55 41 56 41 57"); +} + +TEST(CodeTester, epilogue) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit_pop_all_gprs(); + // check we generate the right code for popping all gpr's + EXPECT_EQ(tester.dump_to_hex_string(), + "41 5f 41 5e 41 5d 41 5c 41 5b 41 5a 41 59 41 58 5f 5e 5d 5c 5b 5a 59 58"); +} TEST(CodeTester, execute_return) { CodeTester tester; @@ -50,7 +50,7 @@ TEST(CodeTester, execute_push_pop_gprs) { tester.execute(); } -TEST(CodeTester, xmm_store_128) { +TEST(CodeTester, simd_store_128) { CodeTester tester; tester.init_code_buffer(256); // movdqa [rbx], xmm3 diff --git a/test/test_emitter.cpp b/test/test_emitter.cpp index e59486c4516..46ecedfd49f 100644 --- a/test/test_emitter.cpp +++ b/test/test_emitter.cpp @@ -627,7 +627,8 @@ // std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; // // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. -// // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after +// // rsp is skipping because that's the stack pointer and would prevent us from popping gprs +// after // CodeTester tester; // tester.init_code_buffer(256); @@ -3216,10 +3217,11 @@ // float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; // // run! -// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, 0), 3.45f); -// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, 0, 0), 1.23f); -// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + 3, 0, 0), 5.67f); -// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) + 3, 0, 0), 0); +// EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, +// 0), 3.45f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, +// 0, 0), 1.23f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + +// 3, 0, 0), 5.67f); EXPECT_FLOAT_EQ(tester.execute_ret((u64)memory, 5 * +// sizeof(float) + 3, 0, 0), 0); // iter++; // } From 19b5eea1660af3e2e3f2aaeaa42de14abf943cbc Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 17:51:04 -0500 Subject: [PATCH 07/12] goalc: remove some temp changes --- goalc/emitter/IGenARM64.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/goalc/emitter/IGenARM64.cpp b/goalc/emitter/IGenARM64.cpp index 03e75e5be0a..9212edbee4e 100644 --- a/goalc/emitter/IGenARM64.cpp +++ b/goalc/emitter/IGenARM64.cpp @@ -363,17 +363,7 @@ Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) // LOADS n' STORES - SIMD (128-bit, QWORDS) //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -Instruction store128_gpr64_simd_reg(Register gpr_addr, Register simd_reg) { - // STR Qs, [Xd] - // ASSERT(gpr_addr.is_gpr()); - // ASSERT(simd_reg.is_128bit_simd()); - // return Instruction(0b11111001); - // InstructionX86 instr(0x66); - // instr.set_op2(0x0f); - // instr.set_op3(0x7f); - // instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); - // instr.swap_op0_rex(); - // return instr; +Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { return Instruction(0b0); } From 1e281d37d686c1d13ea9aed9c661a76c7bdcb681 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 17:28:14 -0500 Subject: [PATCH 08/12] ci: build MacOS arm64 on x86 runners --- .github/workflows/build-matrix.yaml | 16 ++++++++-------- .github/workflows/macos-build-arm.yaml | 2 +- CMakePresets.json | 10 ++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-matrix.yaml b/.github/workflows/build-matrix.yaml index 59601c577fe..e96f7766c06 100644 --- a/.github/workflows/build-matrix.yaml +++ b/.github/workflows/build-matrix.yaml @@ -52,11 +52,11 @@ jobs: cmakePreset: "Release-macos-clang" cachePrefix: "" - # Q4 2023 there will hopefully be native arm64 runners - # https://github.com/github/roadmap/issues/528 - # build_macos_arm: - # name: "🍎 MacOS" - # uses: ./.github/workflows/macos-build-arm.yaml - # with: - # cmakePreset: "Release-macos-clang" - # cachePrefix: "" + # There are ARM64 macOS runners, but they aren't free _yet_ + # limited to the large runners right now + build_macos_arm: + name: "🍎 MacOS" + uses: ./.github/workflows/macos-build-arm.yaml + with: + cmakePreset: "Release-macos-arm64-clang" + cachePrefix: "" diff --git a/.github/workflows/macos-build-arm.yaml b/.github/workflows/macos-build-arm.yaml index cc7b56ac29f..24c6edadc72 100644 --- a/.github/workflows/macos-build-arm.yaml +++ b/.github/workflows/macos-build-arm.yaml @@ -13,7 +13,7 @@ on: jobs: build: name: ARM - runs-on: macos-latest + runs-on: macos-12 timeout-minutes: 120 env: # overrides: https://github.com/mbitsnbites/buildcache/blob/master/doc/configuration.md diff --git a/CMakePresets.json b/CMakePresets.json index 2e75ce28cbf..3b2a9940e56 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -154,6 +154,16 @@ "description": "Build with Clang as Release without Debug Symbols", "inherits": ["base-macos-release", "base-clang"] }, + { + "name": "Release-macos-arm64-clang", + "displayName": "MacOS ARM64 Release (clang)", + "description": "Build with Clang, cross compiled for ARM64, as Release without Debug Symbols", + "inherits": ["base-macos-release", "base-clang"], + "cacheVariables": { + "CMAKE_C_COMPILER_TARGET": "arm64-apple-darwin", + "CMAKE_CXX_COMPILER_TARGET": "arm64-apple-darwin" + } + }, { "name": "Release-macos-clang-static", "displayName": "MacOS Static Release (clang)", From 0ca6a5a5940f71362bd8cba29a685daae17e60b6 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 17:56:47 -0500 Subject: [PATCH 09/12] ci: disable macOS ARM tests for now --- .github/workflows/macos-build-arm.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/macos-build-arm.yaml b/.github/workflows/macos-build-arm.yaml index 24c6edadc72..2cb040c5a01 100644 --- a/.github/workflows/macos-build-arm.yaml +++ b/.github/workflows/macos-build-arm.yaml @@ -51,8 +51,9 @@ jobs: - name: Build Project run: cmake --build build --parallel $((`sysctl -n hw.logicalcpu`)) - - name: Run Tests - run: ./test.sh + # TODO - soon TM + # - name: Run Tests + # run: ./test.sh - name: Upload artifact uses: actions/upload-artifact@v3 From 8bb8dfe2724b7ada826af07534d86ef4912f913a Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 21:00:15 -0500 Subject: [PATCH 10/12] ci: no need to install rosetta (i think) --- .github/workflows/macos-build-arm.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/macos-build-arm.yaml b/.github/workflows/macos-build-arm.yaml index 2cb040c5a01..34e77c286b7 100644 --- a/.github/workflows/macos-build-arm.yaml +++ b/.github/workflows/macos-build-arm.yaml @@ -27,9 +27,6 @@ jobs: - name: Checkout Repository uses: actions/checkout@v4 - - name: Set up ARM64 environment - run: sudo softwareupdate --install-rosetta --agree-to-license - - name: Install Package Dependencies run: arch -arm64 brew install cmake ninja From 37678479eca8d4db5723d1ba9ec8159295ba26f0 Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 21:01:45 -0500 Subject: [PATCH 11/12] goalc: resolve x86 compilation error --- .github/workflows/macos-build-arm.yaml | 7 ++++++- game/CMakeLists.txt | 3 ++- goalc/compiler/IR.cpp | 3 +-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/macos-build-arm.yaml b/.github/workflows/macos-build-arm.yaml index 34e77c286b7..9a387559508 100644 --- a/.github/workflows/macos-build-arm.yaml +++ b/.github/workflows/macos-build-arm.yaml @@ -27,8 +27,13 @@ jobs: - name: Checkout Repository uses: actions/checkout@v4 + # TODO - not relevant on an intel runner + # - name: Set up ARM64 environment + # run: sudo softwareupdate --install-rosetta --agree-to-license + - name: Install Package Dependencies - run: arch -arm64 brew install cmake ninja + # TODO - arch -arm64 + run: brew install cmake ninja - name: Setup Buildcache uses: mikehardy/buildcache-action@v2.1.0 diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 30e2778c363..b28c9f1bd57 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -10,7 +10,8 @@ else() message(STATUS "Non-ARM64 architecture detected") endif() -if(ARM64_ARCH) +# TODO - kinda a gross hack, should pass in an explicit flag later +if(ARM64_ARCH OR CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin") # Add your ARM64-specific configuration or build options here set(OG_ASM_FUNCS_FILE kernel/asm_funcs_arm64.s) enable_language(ASM) diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 37ec8ee8168..f9e954d7ac3 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -863,8 +863,7 @@ void IR_ConditionalBranch::do_codegen(emitter::ObjectGenerator* gen, const AllocationResult& allocs, emitter::IR_Record irec) { #ifndef __aarch64__ - Instruction jump_instr; - jump_instr = InstructionX86(0); + Instruction jump_instr = InstructionX86(0); ASSERT(m_resolved); switch (condition.kind) { case ConditionKind::EQUAL: From 92ee8921273b88f5b3db30b16b19cebf90d7c00c Mon Sep 17 00:00:00 2001 From: Tyler Wilding Date: Wed, 3 Jan 2024 21:54:29 -0500 Subject: [PATCH 12/12] cmake: coerce cmake to work --- CMakeLists.txt | 6 +++++- game/CMakeLists.txt | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 791677828ca..f434926f55c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,7 +103,6 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") message(STATUS "AppleClang detected - Setting Defaults") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ - -march=native \ -Wall \ -Winit-self \ -ggdb \ @@ -121,6 +120,11 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") -fdiagnostics-color=always" ) + # TODO - make a proper flag for arm compiling + if(NOT CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + endif() + # additional c++ flags for release mode for our projects if(CMAKE_BUILD_TYPE MATCHES "Release") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index b28c9f1bd57..cfb503270f9 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -16,8 +16,7 @@ if(ARM64_ARCH OR CMAKE_CXX_COMPILER_TARGET STREQUAL "arm64-apple-darwin") set(OG_ASM_FUNCS_FILE kernel/asm_funcs_arm64.s) enable_language(ASM) set(CMAKE_ASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_SOURCE_FILE_EXTENSIONS} s) - # set(CMAKE_ASM_COMPILE_OBJECT "${CMAKE_ASM_COMPILER} -o ") - set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-g") + set_source_files_properties(${OG_ASM_FUNCS_FILE} PROPERTIES COMPILE_FLAGS "-arch arm64 -g") else() set(OG_ASM_FUNCS_FILE kernel/asm_funcs_x86_64.asm) enable_language(ASM_NASM)