From f9713b68d1570025267739bf784c2b27a2847820 Mon Sep 17 00:00:00 2001 From: pp4io Date: Wed, 6 May 2020 13:32:43 +0200 Subject: [PATCH 1/2] Add keva, safex. Disable GPU support by default Integrate newest xmrig changes --- CMakeLists.txt | 4 +- xmrstak/backend/cpu/cpuType.cpp | 7 + xmrstak/backend/cpu/cpuType.hpp | 8 + xmrstak/backend/cpu/crypto/cryptonight.h | 2 +- .../backend/cpu/crypto/cryptonight_aesni.h | 4 + .../backend/cpu/crypto/randomx/aes_hash.cpp | 23 +- .../randomx/asm/program_epilogue_store.inc | 1 + .../crypto/randomx/asm/program_loop_load.inc | 20 +- .../randomx/asm/program_loop_load_xop.inc | 24 + .../crypto/randomx/asm/program_loop_store.inc | 4 +- .../randomx/asm/program_xmm_constants.inc | 2 +- .../cpu/crypto/randomx/blake2/blake2b.c | 2 +- .../cpu/crypto/randomx/configuration.h | 2 +- .../crypto/randomx/jit_compiler_fallback.hpp | 3 +- .../cpu/crypto/randomx/jit_compiler_x86.cpp | 958 +++++++++--------- .../cpu/crypto/randomx/jit_compiler_x86.hpp | 59 +- .../crypto/randomx/jit_compiler_x86_static.S | 14 +- .../randomx/jit_compiler_x86_static.asm | 17 +- .../randomx/jit_compiler_x86_static.hpp | 1 + .../backend/cpu/crypto/randomx/randomx.cpp | 76 +- xmrstak/backend/cpu/crypto/randomx/randomx.h | 19 +- .../cpu/crypto/randomx/virtual_machine.hpp | 4 + .../cpu/crypto/randomx/vm_compiled.cpp | 7 +- xmrstak/backend/cpu/minethd.cpp | 20 +- xmrstak/backend/cryptonight.hpp | 18 +- xmrstak/jconf.cpp | 4 + xmrstak/pools.tpl | 5 + 27 files changed, 771 insertions(+), 537 deletions(-) create mode 100644 xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load_xop.inc diff --git a/CMakeLists.txt b/CMakeLists.txt index 79a0fab34..21bc949a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF) #option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) #set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "Use the static version of the CUDA runtime library if available" FORCE) -option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" ON) +option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" OFF) if(CUDA_ENABLE) find_package(CUDA 9.0) @@ -205,7 +205,7 @@ endif() # Find OpenCL ############################################################################### -option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" ON) +option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" OFF) if(OpenCL_ENABLE) # try to find AMD OpenCL before NVIDIA OpenCL find_path(OpenCL_INCLUDE_DIR diff --git a/xmrstak/backend/cpu/cpuType.cpp b/xmrstak/backend/cpu/cpuType.cpp index 239fe4160..ff7ce74cb 100644 --- a/xmrstak/backend/cpu/cpuType.cpp +++ b/xmrstak/backend/cpu/cpuType.cpp @@ -107,5 +107,12 @@ Model getModel() return result; } +bool firstHasBMI2() +{ + int32_t cpu_info[4]; + cpuid(7, 0, cpu_info); + return has_feature(cpu_info[1], 8); +} + } // namespace cpu } // namespace xmrstak diff --git a/xmrstak/backend/cpu/cpuType.hpp b/xmrstak/backend/cpu/cpuType.hpp index 057d1bc74..f9f2e616c 100644 --- a/xmrstak/backend/cpu/cpuType.hpp +++ b/xmrstak/backend/cpu/cpuType.hpp @@ -27,5 +27,13 @@ Model getModel(); */ int32_t get_masked(int32_t val, int32_t h, int32_t l); +bool firstHasBMI2(); + +inline bool hasBMI2() +{ + static bool bmi2 = firstHasBMI2(); + return bmi2; +} + } // namespace cpu } // namespace xmrstak diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h index 0b5ec04e2..83ea50c4d 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight.h +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -124,7 +124,7 @@ struct randomX_global_ctx } printer::inst()->print_msg(LDEBUG,"allocate dataset/cache for numa %u", uint32_t(numaId)); #ifdef __linux__ - randomx_dataset* dataset = randomx_alloc_dataset(static_cast(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_LARGE_PAGES_1G)); + randomx_dataset* dataset = randomx_alloc_dataset(static_cast(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_1GB_PAGES)); if (!dataset) { printer::inst()->print_msg(LDEBUG,"Warning: dataset allocation with 1 GiB pages failed"); diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 10b590bf0..0201ddf93 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -150,6 +150,10 @@ struct RandomX_generator randomx_apply_config(RandomX_WowneroConfig); else if(ALGO == randomX_arqma) randomx_apply_config(RandomX_ArqmaConfig); + else if(ALGO == randomX_safex) + randomx_apply_config(RandomX_SafexConfig); + else if(ALGO == randomX_keva) + randomx_apply_config(RandomX_KevaConfig); } for(size_t i = 0; i < N; i++) diff --git a/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp b/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp index 4a400d0a8..1898a2c55 100644 --- a/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp +++ b/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp @@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2); rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3); - constexpr int PREFETCH_DISTANCE = 4096; + constexpr int PREFETCH_DISTANCE = 7168; const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE; scratchpadEnd -= PREFETCH_DISTANCE; @@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_prefetch_t0(prefetchPtr); - scratchpadPtr += 64; - prefetchPtr += 64; + hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4)); + hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5)); + hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6)); + hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7)); + + fill_state0 = aesdec(fill_state0, key0); + fill_state1 = aesenc(fill_state1, key1); + fill_state2 = aesdec(fill_state2, key2); + fill_state3 = aesenc(fill_state3, key3); + + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3); + + rx_prefetch_t0(prefetchPtr + 64); + + scratchpadPtr += 128; + prefetchPtr += 128; } prefetchPtr = (const char*) scratchpad; scratchpadEnd += PREFETCH_DISTANCE; diff --git a/xmrstak/backend/cpu/crypto/randomx/asm/program_epilogue_store.inc b/xmrstak/backend/cpu/crypto/randomx/asm/program_epilogue_store.inc index b94fa4d99..82067d191 100644 --- a/xmrstak/backend/cpu/crypto/randomx/asm/program_epilogue_store.inc +++ b/xmrstak/backend/cpu/crypto/randomx/asm/program_epilogue_store.inc @@ -1,4 +1,5 @@ ;# save VM register values + add rsp, 40 pop rcx mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 diff --git a/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load.inc b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load.inc index c29332313..5d8a84918 100644 --- a/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load.inc +++ b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load.inc @@ -1,5 +1,5 @@ lea rcx, [rsi+rax] - push rcx + mov [rsp+16], rcx xor r8, qword ptr [rcx+0] xor r9, qword ptr [rcx+8] xor r10, qword ptr [rcx+16] @@ -9,7 +9,7 @@ xor r14, qword ptr [rcx+48] xor r15, qword ptr [rcx+56] lea rcx, [rsi+rdx] - push rcx + mov [rsp+24], rcx cvtdq2pd xmm0, qword ptr [rcx+0] cvtdq2pd xmm1, qword ptr [rcx+8] cvtdq2pd xmm2, qword ptr [rcx+16] @@ -18,11 +18,11 @@ cvtdq2pd xmm5, qword ptr [rcx+40] cvtdq2pd xmm6, qword ptr [rcx+48] cvtdq2pd xmm7, qword ptr [rcx+56] - andps xmm4, xmm13 - andps xmm5, xmm13 - andps xmm6, xmm13 - andps xmm7, xmm13 - orps xmm4, xmm14 - orps xmm5, xmm14 - orps xmm6, xmm14 - orps xmm7, xmm14 + andpd xmm4, xmm13 + andpd xmm5, xmm13 + andpd xmm6, xmm13 + andpd xmm7, xmm13 + orpd xmm4, xmm14 + orpd xmm5, xmm14 + orpd xmm6, xmm14 + orpd xmm7, xmm14 diff --git a/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load_xop.inc b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load_xop.inc new file mode 100644 index 000000000..560559a0a --- /dev/null +++ b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load_xop.inc @@ -0,0 +1,24 @@ + lea rcx, [rsi+rax] + mov [rsp+16], rcx + xor r8, qword ptr [rcx+0] + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] + lea rcx, [rsi+rdx] + mov [rsp+24], rcx + cvtdq2pd xmm0, qword ptr [rcx+0] + cvtdq2pd xmm1, qword ptr [rcx+8] + cvtdq2pd xmm2, qword ptr [rcx+16] + cvtdq2pd xmm3, qword ptr [rcx+24] + cvtdq2pd xmm4, qword ptr [rcx+32] + cvtdq2pd xmm5, qword ptr [rcx+40] + cvtdq2pd xmm6, qword ptr [rcx+48] + cvtdq2pd xmm7, qword ptr [rcx+56] + vpcmov xmm4, xmm4, xmm14, xmm13 + vpcmov xmm5, xmm5, xmm14, xmm13 + vpcmov xmm6, xmm6, xmm14, xmm13 + vpcmov xmm7, xmm7, xmm14, xmm13 diff --git a/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc index 1ba1635c6..f579bb0c5 100644 --- a/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc +++ b/xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc @@ -1,4 +1,4 @@ - pop rcx + mov rcx, [rsp+24] mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 mov qword ptr [rcx+16], r10 @@ -7,7 +7,7 @@ mov qword ptr [rcx+40], r13 mov qword ptr [rcx+48], r14 mov qword ptr [rcx+56], r15 - pop rcx + mov rcx, [rsp+16] xorpd xmm0, xmm4 xorpd xmm1, xmm5 xorpd xmm2, xmm6 diff --git a/xmrstak/backend/cpu/crypto/randomx/asm/program_xmm_constants.inc b/xmrstak/backend/cpu/crypto/randomx/asm/program_xmm_constants.inc index 296237a45..cb4b5430d 100644 --- a/xmrstak/backend/cpu/crypto/randomx/asm/program_xmm_constants.inc +++ b/xmrstak/backend/cpu/crypto/randomx/asm/program_xmm_constants.inc @@ -1,5 +1,5 @@ mantissaMask: - db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0 + db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0 exp240: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 scaleMask: diff --git a/xmrstak/backend/cpu/crypto/randomx/blake2/blake2b.c b/xmrstak/backend/cpu/crypto/randomx/blake2/blake2b.c index d2e028878..24305c3c0 100644 --- a/xmrstak/backend/cpu/crypto/randomx/blake2/blake2b.c +++ b/xmrstak/backend/cpu/crypto/randomx/blake2/blake2b.c @@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] = { UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; -static const unsigned int blake2b_sigma[12][16] = { +static const uint8_t blake2b_sigma[12][16] = { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, diff --git a/xmrstak/backend/cpu/crypto/randomx/configuration.h b/xmrstak/backend/cpu/crypto/randomx/configuration.h index 678cb2f8b..e51b2a92a 100644 --- a/xmrstak/backend/cpu/crypto/randomx/configuration.h +++ b/xmrstak/backend/cpu/crypto/randomx/configuration.h @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RANDOMX_DATASET_MAX_SIZE 2181038080 // Increase it if some configs use larger programs -#define RANDOMX_PROGRAM_MAX_SIZE 512 +#define RANDOMX_PROGRAM_MAX_SIZE 320 // Increase it if some configs use larger scratchpad #define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152 diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_fallback.hpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_fallback.hpp index bc3638589..225076637 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_fallback.hpp +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_fallback.hpp @@ -44,7 +44,8 @@ namespace randomx { JitCompilerFallback() { throw std::runtime_error("JIT compilation is not supported on this platform"); } - void generateProgram(Program&, ProgramConfiguration&) { + void prepare() {} + void generateProgram(Program&, ProgramConfiguration&, uint32_t) { } void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp index ffc4b1ab8..6e92a48bd 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp @@ -85,118 +85,41 @@ namespace randomx { */ - const uint8_t* codePrefetchScratchpad = (uint8_t*)&randomx_prefetch_scratchpad; - const uint8_t* codePrefetchScratchpadEnd = (uint8_t*)&randomx_prefetch_scratchpad_end; - const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; - const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; - const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; - const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; - const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; - const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; - const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; - const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; - const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; - const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; - const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue; - const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end; - const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load; - const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch; - const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end; - const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init; - - const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad; - const int32_t prologueSize = codeLoopBegin - codePrologue; - const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; - const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset; - const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; - const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; - const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; - const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; - const int32_t epilogueSize = codeShhLoad - codeEpilogue; - const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad; - const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch; - const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; - - const int32_t epilogueOffset = (CodeSize - epilogueSize) & ~63; - constexpr int32_t superScalarHashOffset = 32768; + #define codePrefetchScratchpad ((uint8_t*)&randomx_prefetch_scratchpad) + #define codePrefetchScratchpadEnd ((uint8_t*)&randomx_prefetch_scratchpad_end) + #define codePrologue ((uint8_t*)&randomx_program_prologue) + #define codeLoopBegin ((uint8_t*)&randomx_program_loop_begin) + #define codeLoopLoad ((uint8_t*)&randomx_program_loop_load) + #define codeLoopLoadXOP ((uint8_t*)&randomx_program_loop_load_xop) + #define codeProgamStart ((uint8_t*)&randomx_program_start) + #define codeReadDatasetLightSshInit ((uint8_t*)&randomx_program_read_dataset_sshash_init) + #define codeReadDatasetLightSshFin ((uint8_t*)&randomx_program_read_dataset_sshash_fin) + #define codeDatasetInit ((uint8_t*)&randomx_dataset_init) + #define codeLoopStore ((uint8_t*)&randomx_program_loop_store) + #define codeLoopEnd ((uint8_t*)&randomx_program_loop_end) + #define codeEpilogue ((uint8_t*)&randomx_program_epilogue) + #define codeProgramEnd ((uint8_t*)&randomx_program_end) + #define codeShhLoad ((uint8_t*)&randomx_sshash_load) + #define codeShhPrefetch ((uint8_t*)&randomx_sshash_prefetch) + #define codeShhEnd ((uint8_t*)&randomx_sshash_end) + #define codeShhInit ((uint8_t*)&randomx_sshash_init) + + #define prefetchScratchpadSize (codePrefetchScratchpadEnd - codePrefetchScratchpad) + #define prologueSize (codeLoopBegin - codePrologue) + #define loopLoadSize (codeLoopLoadXOP - codeLoopLoad) + #define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP) + #define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit) + #define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin) + #define loopStoreSize (codeLoopEnd - codeLoopStore) + #define datasetInitSize (codeEpilogue - codeDatasetInit) + #define epilogueSize (codeShhLoad - codeEpilogue) + #define codeSshLoadSize (codeShhPrefetch - codeShhLoad) + #define codeSshPrefetchSize (codeShhEnd - codeShhPrefetch) + #define codeSshInitSize (codeProgramEnd - codeShhInit) + + #define epilogueOffset ((CodeSize - epilogueSize) & ~63) - static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; - static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 }; - static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b }; - static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b }; - static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b }; - static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b }; - static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b }; - static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf }; - static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 }; - static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf }; - static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 }; - static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 }; - static const uint8_t REX_81[] = { 0x49, 0x81 }; - static const uint8_t AND_EAX_I = 0x25; - static const uint8_t MOV_EAX_I = 0xb8; - static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 }; - static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 }; - static const uint8_t REX_LEA[] = { 0x4f, 0x8d }; - static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e }; - static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e }; - static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 }; - static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 }; - static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 }; - static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea }; - static const uint8_t REX_SH[] = { 0x49, 0xc1 }; - static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f }; - static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 }; - static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 }; - static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 }; - static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 }; - static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 }; - static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 }; - static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 }; - static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 }; - static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA }; - static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 }; - static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 }; - static const uint8_t REX_NEG[] = { 0x49, 0xF7 }; - static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; - static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; - static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 }; - static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 }; - static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 }; - static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 }; - static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 }; - static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 }; - static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c }; - static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 }; - static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 }; - static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f }; - static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e }; - static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; - static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC }; - static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; - static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; - static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; - static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 }; - static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 }; - static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 }; - static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 }; - static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 }; - static const uint8_t JNZ[] = { 0x0f, 0x85 }; - static const uint8_t JMP = 0xe9; - static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; - static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; - static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; - static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; - static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; - static const uint8_t CALL = 0xe8; - static const uint8_t REX_ADD_I[] = { 0x49, 0x81 }; - static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; - static const uint8_t JZ[] = { 0x0f, 0x84 }; - static const uint8_t JZ_SHORT = 0x74; - static const uint8_t RET = 0xc3; - static const uint8_t LEA_32[] = { 0x41, 0x8d }; - static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; - static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; + constexpr int32_t superScalarHashOffset = 32768; static const uint8_t NOP1[] = { 0x90 }; static const uint8_t NOP2[] = { 0x66, 0x90 }; @@ -241,14 +164,8 @@ namespace randomx { # endif } - std::atomic JitCompilerX86::flags_set(0); - uint64_t JitCompilerX86::flags = 0; // CPU-specific tweaks void JitCompilerX86::applyTweaks() { - - if(flags_set.fetch_add(1) != 0) - return; - int32_t info[4]; cpuid(0, info); @@ -258,45 +175,36 @@ namespace randomx { manufacturer[2] = info[2]; manufacturer[3] = 0; - struct - { - unsigned int stepping : 4; - unsigned int model : 4; - unsigned int family : 4; - unsigned int processor_type : 2; - unsigned int reserved1 : 2; - unsigned int ext_model : 4; - unsigned int ext_family : 8; - unsigned int reserved2 : 4; - } processor_info; - - cpuid(1, info); - memcpy(&processor_info, info, sizeof(processor_info)); - if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) { + struct + { + unsigned int stepping : 4; + unsigned int model : 4; + unsigned int family : 4; + unsigned int processor_type : 2; + unsigned int reserved1 : 2; + unsigned int ext_model : 4; + unsigned int ext_family : 8; + unsigned int reserved2 : 4; + } processor_info; + + cpuid(1, info); + memcpy(&processor_info, info, sizeof(processor_info)); + // Intel JCC erratum mitigation if (processor_info.family == 6) { const uint32_t model = processor_info.model | (processor_info.ext_model << 4); const uint32_t stepping = processor_info.stepping; // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf - set_flag(BRANCHES_WITHIN_32B, + BranchesWithin32B = ((model == 0x4E) && (stepping == 0x3)) || ((model == 0x55) && (stepping == 0x4)) || ((model == 0x5E) && (stepping == 0x3)) || ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) || ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) || ((model == 0xA6) && (stepping == 0x0)) || - ((model == 0xAE) && (stepping == 0xA))); - } - load_win_msrs({ { 0x1a4, 7 } }); - } - - if (strcmp((const char*)manufacturer, "AuthenticAMD") == 0) { - if(processor_info.family == 0x17) - { - set_flag(AMD_RYZEN_FAMILY, true); - load_win_msrs({ { 0xc0011022, 0x510000 }, { 0xc001102b, 0x1808cc16}, { 0xc0011020, 0 } }); + ((model == 0xAE) && (stepping == 0xA)); } } } @@ -305,23 +213,53 @@ namespace randomx { JitCompilerX86::JitCompilerX86() { applyTweaks(); + + int32_t info[4]; + cpuid(1, info); + hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0); + + cpuid(0x80000001, info); + hasXOP = ((info[2] & (1 << 11)) != 0); + allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2); // Shift code base address to improve caching - all threads will use different L2/L3 cache sets code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize); memcpy(code, codePrologue, prologueSize); + if (hasXOP) { + memcpy(code + prologueSize, codeLoopLoadXOP, loopLoadXOPSize); + } + else { + memcpy(code + prologueSize, codeLoopLoad, loopLoadSize); + } memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); + + codePosFirst = prologueSize + (hasXOP ? loopLoadXOPSize : loopLoadSize); + +# ifdef XMRIG_FIX_RYZEN + mainLoopBounds.first = code + prologueSize; + mainLoopBounds.second = code + epilogueOffset; +# endif } JitCompilerX86::~JitCompilerX86() { freePagedMemory(allocatedCode, CodeSize); } - void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { + void JitCompilerX86::prepare() { + for (size_t i = 0; i < sizeof(engine); i += 64) + rx_prefetch_nta((const char*)(&engine) + i); + for (size_t i = 0; i < sizeof(RandomX_CurrentConfig); i += 64) + rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i); + } + + void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { + vm_flags = flags; + generateProgramPrologue(prog, pcfg); - + uint8_t* p; uint32_t n; - if (check_flag(AMD_RYZEN_FAMILY)) { + if (flags & RANDOMX_FLAG_AMD) { p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; } @@ -331,16 +269,17 @@ namespace randomx { } memcpy(code + codePos, p, n); codePos += n; - + generateProgramEpilogue(prog, pcfg); } void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { generateProgramPrologue(prog, pcfg); emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize, code, codePos); - emit(ADD_EBX_I, code, codePos); + *(uint32_t*)(code + codePos) = 0xc381; + codePos += 2; emit32(datasetOffset / CacheLineSize, code, codePos); - emitByte(CALL, code, codePos); + emitByte(0xe8, code, codePos); emit32(superScalarHashOffset - (codePos + 4), code, codePos); emit(codeReadDatasetLightSshFin, readDatasetLightFinSize, code, codePos); generateProgramEpilogue(prog, pcfg); @@ -358,21 +297,12 @@ namespace randomx { } emit(codeShhLoad, codeSshLoadSize, code, codePos); if (j < RandomX_CurrentConfig.CacheAccesses - 1) { - emit(REX_MOV_RR64, code, codePos); - emitByte(0xd8 + prog.getAddressRegister(), code, codePos); + *(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast(prog.getAddressRegister()) << 16); + codePos += 3; emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize, code, codePos); -#ifdef RANDOMX_ALIGN - int align = (codePos % 16); - while (align != 0) { - int nopSize = 16 - align; - if (nopSize > 8) nopSize = 8; - emit(NOPX[nopSize - 1], nopSize, code, codePos); - align = (codePos % 16); - } -#endif } } - emitByte(RET, code, codePos); + emitByte(0xc3, code, codePos); } template @@ -388,11 +318,17 @@ namespace randomx { code[codePos + 5] = 0xc0 + pcfg.readReg1; *(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; *(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; + if (hasAVX) { + uint32_t* p = (uint32_t*)(code + codePos + 67); + *p = (*p & 0xFF000000U) | 0x0077F8C5U; + } + +# ifdef XMRIG_FIX_RYZEN + xmrig::Rx::setMainLoopBounds(mainLoopBounds); +# endif - codePos = prologueSize; - memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); - memcpy(code + codePos, codeLoopLoad, loopLoadSize); - codePos += loopLoadSize; + memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask)); + codePos = codePosFirst; //mark all registers as used uint64_t* r = (uint64_t*)registerUsage; @@ -402,28 +338,43 @@ namespace randomx { r[j] = k; } - for (int i = 0, n = static_cast(RandomX_CurrentConfig.ProgramSize); i < n; ++i) { - Instruction instr = prog(i); - *((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16); - (this->*(engine[instr.opcode]))(instr); + constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16); + for (int i = 0, n = static_cast(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) { + Instruction& instr1 = prog(i); + Instruction& instr2 = prog(i + 1); + Instruction& instr3 = prog(i + 2); + Instruction& instr4 = prog(i + 3); + + InstructionGeneratorX86 gen1 = engine[instr1.opcode]; + InstructionGeneratorX86 gen2 = engine[instr2.opcode]; + InstructionGeneratorX86 gen3 = engine[instr3.opcode]; + InstructionGeneratorX86 gen4 = engine[instr4.opcode]; + + *((uint64_t*)&instr1) &= instr_mask; + (this->*gen1)(instr1); + + *((uint64_t*)&instr2) &= instr_mask; + (this->*gen2)(instr2); + + *((uint64_t*)&instr3) &= instr_mask; + (this->*gen3)(instr3); + + *((uint64_t*)&instr4) &= instr_mask; + (this->*gen4)(instr4); } - emit(REX_MOV_RR, code, codePos); - emitByte(0xc0 + pcfg.readReg2, code, codePos); - emit(REX_XOR_EAX, code, codePos); - emitByte(0xc0 + pcfg.readReg3, code, codePos); + *(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast(pcfg.readReg2) << 16) + (static_cast(pcfg.readReg3) << 40); + codePos += 6; } void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) { - emit(REX_MOV_RR64, code, codePos); - emitByte(0xc0 + pcfg.readReg0, code, codePos); - emit(REX_XOR_RAX_R64, code, codePos); - emitByte(0xc0 + pcfg.readReg1, code, codePos); + *(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast(pcfg.readReg0) << 16) + (static_cast(pcfg.readReg1) << 40); + codePos += 6; emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos); memcpy(code + codePos, codeLoopStore, loopStoreSize); codePos += loopStoreSize; - if (check_flag(BRANCHES_WITHIN_32B)) { + if (BranchesWithin32B) { const uint32_t branch_begin = static_cast(codePos); const uint32_t branch_end = static_cast(branch_begin + 9); @@ -438,14 +389,27 @@ namespace randomx { } } - emit(SUB_EBX, code, codePos); - emit(JNZ, code, codePos); + *(uint64_t*)(code + codePos) = 0x850f01eb83ull; + codePos += 5; emit32(prologueSize - codePos - 4, code, codePos); - emitByte(JMP, code, codePos); + emitByte(0xe9, code, codePos); emit32(epilogueOffset - codePos - 4, code, codePos); } void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { + static constexpr uint8_t REX_SUB_RR[] = { 0x4d, 0x2b }; + static constexpr uint8_t REX_MOV_RR64[] = { 0x49, 0x8b }; + static constexpr uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b }; + static constexpr uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf }; + static constexpr uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf }; + static constexpr uint8_t REX_MUL_R[] = { 0x49, 0xf7 }; + static constexpr uint8_t REX_81[] = { 0x49, 0x81 }; + static constexpr uint8_t MOV_RAX_I[] = { 0x48, 0xb8 }; + static constexpr uint8_t REX_LEA[] = { 0x4f, 0x8d }; + static constexpr uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; + static constexpr uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; + static constexpr uint8_t REX_ROT_I8[] = { 0x49, 0xc1 }; + switch ((SuperscalarInstructionType)instr.opcode) { case randomx::SuperscalarInstructionType::ISUB_R: @@ -484,33 +448,21 @@ namespace randomx { emit(REX_81, code, codePos); emitByte(0xc0 + instr.dst, code, codePos); emit32(instr.getImm32(), code, codePos); -#ifdef RANDOMX_ALIGN - emit(NOP1, code, codePos); -#endif break; case randomx::SuperscalarInstructionType::IXOR_C8: emit(REX_XOR_RI, code, codePos); emitByte(0xf0 + instr.dst, code, codePos); emit32(instr.getImm32(), code, codePos); -#ifdef RANDOMX_ALIGN - emit(NOP1, code, codePos); -#endif break; case randomx::SuperscalarInstructionType::IADD_C9: emit(REX_81, code, codePos); emitByte(0xc0 + instr.dst, code, codePos); emit32(instr.getImm32(), code, codePos); -#ifdef RANDOMX_ALIGN - emit(NOP2, code, codePos); -#endif break; case randomx::SuperscalarInstructionType::IXOR_C9: emit(REX_XOR_RI, code, codePos); emitByte(0xf0 + instr.dst, code, codePos); emit32(instr.getImm32(), code, codePos); -#ifdef RANDOMX_ALIGN - emit(NOP2, code, codePos); -#endif break; case randomx::SuperscalarInstructionType::IMULH_R: emit(REX_MOV_RR64, code, codePos); @@ -540,30 +492,33 @@ namespace randomx { } template - FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos) { - const uint32_t src = *((uint32_t*)&instr) & 0xFF0000; + FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos) { + *(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + (src << 16); - *(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + src; - codePos += (src == (RegisterNeedsSib << 16)) ? 4 : 3; + constexpr uint32_t add_table = 0x33333333u + (1u << (RegisterNeedsSib * 4)); + codePos += (add_table >> (src * 4)) & 0xf; emit32(instr.getImm32(), code, codePos); - if (rax) - emitByte(AND_EAX_I, code, codePos); - else - emit(AND_ECX_I, code, codePos); + if (rax) { + emitByte(0x25, code, codePos); + } + else { + *(uint32_t*)(code + codePos) = 0xe181; + codePos += 2; + } emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); } - template void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos); - template void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos); + template void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); + template void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); - FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) { + FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) { const uint32_t dst = static_cast(instr.dst) << 16; *(uint32_t*)(code + codePos) = 0x24808d41 + dst; codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3; emit32(instr.getImm32(), code, codePos); - emitByte(AND_EAX_I, code, codePos); + emitByte(0x25, code, codePos); if (instr.getModCond() < StoreL3Condition) { emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); } @@ -572,233 +527,270 @@ namespace randomx { } } - FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) { + FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, uint32_t& codePos) { emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos); } - static const uint32_t template_IADD_RS[8] = { - 0x048d4f, - 0x0c8d4f, - 0x148d4f, - 0x1c8d4f, - 0x248d4f, - 0xac8d4f, - 0x348d4f, - 0x3c8d4f, - }; - void JitCompilerX86::h_IADD_RS(const Instruction& instr) { - int pos = codePos; + uint32_t pos = codePos; uint8_t* const p = code + pos; - const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst; - *(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24); + const uint32_t dst = instr.dst; + const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst; + + uint32_t k = 0x048d4f + (dst << 19); + if (dst == RegisterNeedsDisplacement) + k = 0xac8d4f; + + *(uint32_t*)(p) = k | (sib << 24); *(uint32_t*)(p + 4) = instr.getImm32(); - pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4); + pos += ((dst == RegisterNeedsDisplacement) ? 8 : 4); - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } - static const uint32_t template_IADD_M[8] = { - 0x0604034c, - 0x060c034c, - 0x0614034c, - 0x061c034c, - 0x0624034c, - 0x062c034c, - 0x0634034c, - 0x063c034c, - }; - void JitCompilerX86::h_IADD_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit32(template_IADD_M[instr.dst], p, pos); + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + emit32(0x0604034c + (dst << 19), p, pos); } else { - emit(REX_ADD_RM, p, pos); - emitByte(0x86 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0x86034c + (dst << 19); + pos += 3; genAddressImm(instr, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } - void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, int& codePos) { + void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos) { emitByte((scale << 6) | (index << 3) | base, code, codePos); } void JitCompilerX86::h_ISUB_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; - if (instr.src != instr.dst) { - emit(REX_SUB_RR, p, pos); - emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + if (src != dst) { + *(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16); + pos += 3; } else { - emit(REX_81, p, pos); - emitByte(0xe8 + instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xe88149 + (dst << 16); + pos += 3; emit32(instr.getImm32(), p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_ISUB_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit(REX_SUB_RM, p, pos); - emitByte(0x04 + 8 * instr.dst, p, pos); - emitByte(0x06, p, pos); + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + emit32(0x06042b4c + (dst << 19), p, pos); } else { - emit(REX_SUB_RM, p, pos); - emitByte(0x86 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0x862b4c + (dst << 19); + pos += 3; genAddressImm(instr, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IMUL_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - emit(REX_IMUL_RR, p, pos); - emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + if (src != dst) { + emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos); } else { - emit(REX_IMUL_RRI, p, pos); - emitByte(0xc0 + 9 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc0694d + (((dst << 3) + dst) << 16); + pos += 3; emit32(instr.getImm32(), p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IMUL_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit(REX_IMUL_RM, p, pos); - emitByte(0x04 + 8 * instr.dst, p, pos); - emitByte(0x06, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + *(uint64_t*)(p + pos) = 0x0604af0f4cull + (dst << 27); + pos += 5; } else { - emit(REX_IMUL_RM, p, pos); - emitByte(0x86 + 8 * instr.dst, p, pos); + emit32(0x86af0f4c + (dst << 27), p, pos); genAddressImm(instr, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IMULH_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_MUL_R, p, pos); - emitByte(0xe0 + instr.src, p, pos); - emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16); + *(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16); + *(uint32_t*)(p + pos + 6) = 0xc28b4c + (dst << 19); + pos += 9; - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; + codePos = pos; + } + + void JitCompilerX86::h_IMULH_R_BMI2(const Instruction& instr) { + uint8_t* const p = code; + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); + *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); + pos += 8; + + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IMULH_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_MUL_MEM, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + *(uint64_t*)(p + pos) = 0x0e24f748c08b49ull + (dst << 16); + pos += 7; } else { - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_MUL_M, p, pos); - emitByte(0xa6, p, pos); + *(uint64_t*)(p + pos) = 0xa6f748c08b49ull + (dst << 16); + pos += 6; genAddressImm(instr, p, pos); } - emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc28b4c + (dst << 19); + pos += 3; + + registerUsage[dst] = pos; + codePos = pos; + } + + void JitCompilerX86::h_IMULH_M_BMI2(const Instruction& instr) { + uint8_t* const p = code; + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + *(uint32_t*)(p + pos) = static_cast(0xC4D08B49 + (dst << 16)); + *(uint64_t*)(p + pos + 4) = 0x0E04F6FB62ULL + (dst << 27); + pos += 9; + } + else { + *(uint64_t*)(p + pos) = 0x86F6FB62C4D08B49ULL + (dst << 16) + (dst << 59); + *(uint32_t*)(p + pos + 8) = instr.getImm32() & ScratchpadL3Mask; + pos += 12; + } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_ISMULH_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_MUL_R, p, pos); - emitByte(0xe8 + instr.src, p, pos); - emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); - - registerUsage[instr.dst] = pos; + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + *(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40); + pos += 8; + emitByte(0xc2 + 8 * dst, p, pos); + + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_ISMULH_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_IMUL_MEM, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + *(uint64_t*)(p + pos) = 0x0e2cf748c08b49ull + (dst << 16); + pos += 7; } else { - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); - emit(REX_MUL_M, p, pos); - emitByte(0xae, p, pos); + *(uint64_t*)(p + pos) = 0xaef748c08b49ull + (dst << 16); + pos += 6; genAddressImm(instr, p, pos); } - emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc28b4c + (dst << 19); + pos += 3; - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IMUL_RCP(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { - emit(MOV_RAX_I, p, pos); + *(uint32_t*)(p + pos) = 0xb848; + pos += 2; + emit64(randomx_reciprocal_fast(divisor), p, pos); - emit(REX_IMUL_RM, p, pos); - emitByte(0xc0 + 8 * instr.dst, p, pos); - registerUsage[instr.dst] = pos; + + const uint32_t dst = instr.dst; + emit32(0xc0af0f4c + (dst << 27), p, pos); + + registerUsage[dst] = pos; } codePos = pos; @@ -806,102 +798,112 @@ namespace randomx { void JitCompilerX86::h_INEG_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - emit(REX_NEG, p, pos); - emitByte(0xd8 + instr.dst, p, pos); + uint32_t pos = codePos; - registerUsage[instr.dst] = pos; + const uint32_t dst = instr.dst; + *(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16); + pos += 3; + + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IXOR_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - emit(REX_XOR_RR, p, pos); - emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + *(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16); + pos += 3; } else { - emit(REX_XOR_RI, p, pos); - emitByte(0xf0 + instr.dst, p, pos); - emit32(instr.getImm32(), p, pos); + const uint64_t imm = instr.getImm32(); + *(uint64_t*)(p + pos) = (imm << 24) + 0xf08149 + (dst << 16); + pos += 7; } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IXOR_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - genAddressReg(instr, p, pos); - emit(REX_XOR_RM, p, pos); - emitByte(0x04 + 8 * instr.dst, p, pos); - emitByte(0x06, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, src, p, pos); + emit32(0x0604334c + (dst << 19), p, pos); } else { - emit(REX_XOR_RM, p, pos); - emitByte(0x86 + 8 * instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0x86334c + (dst << 19); + pos += 3; genAddressImm(instr, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IROR_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - emit(REX_MOV_RR, p, pos); - emitByte(0xc8 + instr.src, p, pos); - emit(REX_ROT_CL, p, pos); - emitByte(0xc8 + instr.dst, p, pos); + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + *(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40); + pos += 6; } else { - emit(REX_ROT_I8, p, pos); - emitByte(0xc8 + instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc8c149 + (dst << 16); + pos += 3; emitByte(instr.getImm32() & 63, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_IROL_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; - if (instr.src != instr.dst) { - emit(REX_MOV_RR, p, pos); - emitByte(0xc8 + instr.src, p, pos); - emit(REX_ROT_CL, p, pos); - emitByte(0xc0 + instr.dst, p, pos); + if (src != dst) { + *(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40); + pos += 6; } else { - emit(REX_ROT_I8, p, pos); - emitByte(0xc0 + instr.dst, p, pos); + *(uint32_t*)(p + pos) = 0xc0c149 + (dst << 16); + pos += 3; emitByte(instr.getImm32() & 63, p, pos); } - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } void JitCompilerX86::h_ISWAP_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - if (instr.src != instr.dst) { - emit(REX_XCHG, p, pos); - emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); - registerUsage[instr.dst] = pos; - registerUsage[instr.src] = pos; + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + if (src != dst) { + *(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16); + pos += 3; + registerUsage[dst] = pos; + registerUsage[src] = pos; } codePos = pos; @@ -909,132 +911,174 @@ namespace randomx { void JitCompilerX86::h_FSWAP_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - emit(SHUFPD, p, pos); - emitByte(0xc0 + 9 * instr.dst, p, pos); - emitByte(1, p, pos); + uint32_t pos = codePos; + + const uint64_t dst = instr.dst; + + *(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24); + pos += 5; codePos = pos; } void JitCompilerX86::h_FADD_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; - const uint32_t dst = instr.dst % RegisterCountFlt; - const uint32_t src = instr.src % RegisterCountFlt; - emit(REX_ADDPD, p, pos); - emitByte(0xc0 + src + 8 * dst, p, pos); + const uint64_t dst = instr.dst % RegisterCountFlt; + const uint64_t src = instr.src % RegisterCountFlt; + + *(uint64_t*)(p + pos) = 0xc0580f4166ull + (((dst << 3) + src) << 32); + pos += 5; codePos = pos; } void JitCompilerX86::h_FADD_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - + uint32_t pos = codePos; + + const uint32_t src = instr.src; const uint32_t dst = instr.dst % RegisterCountFlt; - genAddressReg(instr, p, pos); - emit(REX_CVTDQ2PD_XMM12, p, pos); - emit(REX_ADDPD, p, pos); - emitByte(0xc4 + 8 * dst, p, pos); + + genAddressReg(instr, src, p, pos); + *(uint64_t*)(p + pos) = 0x41660624e60f44f3ull; + *(uint32_t*)(p + pos + 8) = 0xc4580f + (dst << 19); + pos += 11; codePos = pos; } void JitCompilerX86::h_FSUB_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - const uint32_t dst = instr.dst % RegisterCountFlt; - const uint32_t src = instr.src % RegisterCountFlt; - emit(REX_SUBPD, p, pos); - emitByte(0xc0 + src + 8 * dst, p, pos); + uint32_t pos = codePos; + + const uint64_t dst = instr.dst % RegisterCountFlt; + const uint64_t src = instr.src % RegisterCountFlt; + + *(uint64_t*)(p + pos) = 0xc05c0f4166ull + (((dst << 3) + src) << 32); + pos += 5; codePos = pos; } void JitCompilerX86::h_FSUB_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - + uint32_t pos = codePos; + + const uint32_t src = instr.src; const uint32_t dst = instr.dst % RegisterCountFlt; - genAddressReg(instr, p, pos); - emit(REX_CVTDQ2PD_XMM12, p, pos); - emit(REX_SUBPD, p, pos); - emitByte(0xc4 + 8 * dst, p, pos); + + genAddressReg(instr, src, p, pos); + *(uint64_t*)(p + pos) = 0x41660624e60f44f3ull; + *(uint32_t*)(p + pos + 8) = 0xc45c0f + (dst << 19); + pos += 11; codePos = pos; } void JitCompilerX86::h_FSCAL_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - + uint32_t pos = codePos; + const uint32_t dst = instr.dst % RegisterCountFlt; - emit(REX_XORPS, p, pos); - emitByte(0xc7 + 8 * dst, p, pos); + + emit32(0xc7570f41 + (dst << 27), p, pos); codePos = pos; } void JitCompilerX86::h_FMUL_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; - const uint32_t dst = instr.dst % RegisterCountFlt; - const uint32_t src = instr.src % RegisterCountFlt; - emit(REX_MULPD, p, pos); - emitByte(0xe0 + src + 8 * dst, p, pos); + const uint64_t dst = instr.dst % RegisterCountFlt; + const uint64_t src = instr.src % RegisterCountFlt; + + *(uint64_t*)(p + pos) = 0xe0590f4166ull + (((dst << 3) + src) << 32); + pos += 5; codePos = pos; } void JitCompilerX86::h_FDIV_M(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - const uint32_t dst = instr.dst % RegisterCountFlt; - genAddressReg(instr, p, pos); - emit(REX_CVTDQ2PD_XMM12, p, pos); - emit(REX_ANDPS_XMM12, p, pos); - emit(REX_DIVPD, p, pos); - emitByte(0xe4 + 8 * dst, p, pos); + uint32_t pos = codePos; + + const uint32_t src = instr.src; + const uint64_t dst = instr.dst % RegisterCountFlt; + + genAddressReg(instr, src, p, pos); + + *(uint64_t*)(p + pos) = 0x0624e60f44f3ull; + pos += 6; + if (hasXOP) { + *(uint64_t*)(p + pos) = 0xd0e6a218488full; + pos += 6; + } + else { + *(uint64_t*)(p + pos) = 0xe6560f45e5540f45ull; + pos += 8; + } + *(uint64_t*)(p + pos) = 0xe45e0f4166ull + (dst << 35); + pos += 5; codePos = pos; } void JitCompilerX86::h_FSQRT_R(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - + uint32_t pos = codePos; + const uint32_t dst = instr.dst % RegisterCountFlt; - emit(SQRTPD, p, pos); - emitByte(0xe4 + 9 * dst, p, pos); + + emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos); codePos = pos; } - static const uint8_t AND_OR_MOV_LDMXCSR_RYZEN[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x3B, 0x44, 0x24, 0xFC, 0x74, 0x09, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC }; - void JitCompilerX86::h_CFROUND(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; - - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.src, p, pos); - int rotate = (13 - (instr.getImm32() & 63)) & 63; - if (rotate != 0) { - emit(ROL_RAX, p, pos); - emitByte(rotate, p, pos); + uint32_t pos = codePos; + + const uint32_t src = instr.src; + + *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); + const int rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; + *(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24); + + if (vm_flags & RANDOMX_FLAG_AMD) { + *(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL; + *(uint64_t*)(p + pos + 15) = 0x8900EB0414AE0F0AULL; + *(uint32_t*)(p + pos + 23) = 0x202444; + pos += 26; } + else { + *(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL; + pos += 14; + } + + codePos = pos; + } + + void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) { + uint8_t* const p = code; + uint32_t pos = codePos; + + const uint64_t src = instr.src; + + const uint64_t rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; + *(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40); - if (check_flag(AMD_RYZEN_FAMILY)) { - emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos); + if (vm_flags & RANDOMX_FLAG_AMD) { + *(uint64_t*)(p + pos + 6) = 0x742024443B0CE083ULL; + *(uint64_t*)(p + pos + 14) = 0x8900EB0414AE0F0AULL; + *(uint32_t*)(p + pos + 22) = 0x202444; + pos += 25; } else { - emit(AND_OR_MOV_LDMXCSR, p, pos); + *(uint64_t*)(p + pos + 6) = 0x0414AE0F0CE083ULL; + pos += 13; } codePos = pos; @@ -1042,12 +1086,12 @@ namespace randomx { void JitCompilerX86::h_CBRANCH(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; const int reg = instr.dst; int32_t jmp_offset = registerUsage[reg] - (pos + 16); - if (check_flag(BRANCHES_WITHIN_32B)) { + if (BranchesWithin32B) { const uint32_t branch_begin = static_cast(pos + 7); const uint32_t branch_end = static_cast(branch_begin + ((jmp_offset >= -128) ? 9 : 13)); @@ -1059,22 +1103,20 @@ namespace randomx { } } - emit(REX_ADD_I, p, pos); - emitByte(0xc0 + reg, p, pos); + *(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16); const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; - const uint32_t imm = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1)); - emit32(imm, p, pos); - emit(REX_TEST, p, pos); - emitByte(0xc0 + reg, p, pos); - emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos); + *(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1)); + *(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16); + *(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift; + pos += 14; if (jmp_offset >= -128) { - emitByte(JZ_SHORT, p, pos); - emitByte(jmp_offset, p, pos); + *(uint32_t*)(p + pos) = 0x74 + (jmp_offset << 8); + pos += 2; } else { - emit(JZ, p, pos); - emit32(jmp_offset - 4, p, pos); + *(uint64_t*)(p + pos) = 0x840f + ((static_cast(jmp_offset) - 4) << 16); + pos += 6; } //mark all registers as used @@ -1090,20 +1132,18 @@ namespace randomx { void JitCompilerX86::h_ISTORE(const Instruction& instr) { uint8_t* const p = code; - int pos = codePos; + uint32_t pos = codePos; genAddressRegDst(instr, p, pos); - emit(REX_MOV_MR, p, pos); - emitByte(0x04 + 8 * instr.src, p, pos); - emitByte(0x06, p, pos); + emit32(0x0604894c + (static_cast(instr.src) << 19), p, pos); codePos = pos; } void JitCompilerX86::h_NOP(const Instruction& instr) { - emit(NOP1, code, codePos); + emitByte(0x90, code, codePos); } - InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; + alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; } diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp index b47ff6ec5..c8a60c1da 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp @@ -31,7 +31,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include "crypto/randomx/common.hpp" namespace randomx { @@ -50,7 +49,8 @@ namespace randomx { public: JitCompilerX86(); ~JitCompilerX86(); - void generateProgram(Program&, ProgramConfiguration&); + void prepare(); + void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); template void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); @@ -66,62 +66,56 @@ namespace randomx { } size_t getCodeSize(); - static InstructionGeneratorX86 engine[256]; + alignas(64) static InstructionGeneratorX86 engine[256]; + int registerUsage[RegistersCount]; - uint8_t* allocatedCode; uint8_t* code; - int32_t codePos; + uint32_t codePos; + uint32_t codePosFirst; + uint32_t vm_flags; - static std::atomic flags_set; - static constexpr uint64_t BRANCHES_WITHIN_32B = 1; - static constexpr uint64_t AMD_RYZEN_FAMILY = 2; - static uint64_t flags; +# ifdef XMRIG_FIX_RYZEN + std::pair mainLoopBounds; +# endif - static inline bool check_flag(uint64_t f) - { - return (flags & f) != 0; - } - - static inline void set_flag(uint64_t f, bool v) - { - if(v) - flags |= f; - else - flags &= ~f; - } + bool BranchesWithin32B = false; + bool hasAVX; + bool hasXOP; + + uint8_t* allocatedCode; - static void applyTweaks(); + void applyTweaks(); void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&); template - static void genAddressReg(const Instruction&, uint8_t* code, int& codePos); - static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos); - static void genAddressImm(const Instruction&, uint8_t* code, int& codePos); - static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos); + static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos); + static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos); + static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos); + static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos); void generateSuperscalarCode(Instruction &, std::vector &); - static void emitByte(uint8_t val, uint8_t* code, int& codePos) { + static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) { code[codePos] = val; ++codePos; } - static void emit32(uint32_t val, uint8_t* code, int& codePos) { + static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos) { memcpy(code + codePos, &val, sizeof val); codePos += sizeof val; } - static void emit64(uint64_t val, uint8_t* code, int& codePos) { + static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos) { memcpy(code + codePos, &val, sizeof val); codePos += sizeof val; } template - static void emit(const uint8_t (&src)[N], uint8_t* code, int& codePos) { + static void emit(const uint8_t (&src)[N], uint8_t* code, uint32_t& codePos) { emit(src, N, code, codePos); } - static void emit(const uint8_t* src, size_t count, uint8_t* code, int& codePos) { + static void emit(const uint8_t* src, size_t count, uint8_t* code, uint32_t& codePos) { memcpy(code + codePos, src, count); codePos += count; } @@ -133,7 +127,9 @@ namespace randomx { void h_IMUL_R(const Instruction&); void h_IMUL_M(const Instruction&); void h_IMULH_R(const Instruction&); + void h_IMULH_R_BMI2(const Instruction&); void h_IMULH_M(const Instruction&); + void h_IMULH_M_BMI2(const Instruction&); void h_ISMULH_R(const Instruction&); void h_ISMULH_M(const Instruction&); void h_IMUL_RCP(const Instruction&); @@ -154,6 +150,7 @@ namespace randomx { void h_FSQRT_R(const Instruction&); void h_CBRANCH(const Instruction&); void h_CFROUND(const Instruction&); + void h_CFROUND_BMI2(const Instruction&); void h_ISTORE(const Instruction&); void h_NOP(const Instruction&); }; diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.S b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.S index 50019b7e5..9f3a5bf18 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.S +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.S @@ -43,6 +43,7 @@ .global DECL(randomx_program_prologue_first_load) .global DECL(randomx_program_loop_begin) .global DECL(randomx_program_loop_load) +.global DECL(randomx_program_loop_load_xop) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) .global DECL(randomx_program_read_dataset_ryzen) @@ -93,7 +94,15 @@ DECL(randomx_program_prologue_first_load): and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK - stmxcsr dword ptr [rsp-20] + sub rsp, 40 + mov dword ptr [rsp], 0x9FC0 + mov dword ptr [rsp+4], 0xBFC0 + mov dword ptr [rsp+8], 0xDFC0 + mov dword ptr [rsp+12], 0xFFC0 + mov dword ptr [rsp+32], -1 + nop + nop + nop jmp DECL(randomx_program_loop_begin) .balign 64 @@ -106,6 +115,9 @@ DECL(randomx_program_loop_begin): DECL(randomx_program_loop_load): #include "asm/program_loop_load.inc" +DECL(randomx_program_loop_load_xop): + #include "asm/program_loop_load_xop.inc" + DECL(randomx_program_start): nop diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.asm b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.asm index 189c464c5..e36e5aafa 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.asm +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.asm @@ -34,6 +34,7 @@ PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue_first_load PUBLIC randomx_program_loop_begin PUBLIC randomx_program_loop_load +PUBLIC randomx_program_loop_load_xop PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset_ryzen @@ -81,7 +82,15 @@ randomx_program_prologue_first_load PROC and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK - stmxcsr dword ptr [rsp-20] + sub rsp, 40 + mov dword ptr [rsp], 9FC0h + mov dword ptr [rsp+4], 0BFC0h + mov dword ptr [rsp+8], 0DFC0h + mov dword ptr [rsp+12], 0FFC0h + mov dword ptr [rsp+32], -1 + nop + nop + nop jmp randomx_program_loop_begin randomx_program_prologue_first_load ENDP @@ -97,6 +106,10 @@ randomx_program_loop_load PROC include asm/program_loop_load.inc randomx_program_loop_load ENDP +randomx_program_loop_load_xop PROC + include asm/program_loop_load_xop.inc +randomx_program_loop_load_xop ENDP + randomx_program_start PROC nop randomx_program_start ENDP @@ -226,4 +239,4 @@ _RANDOMX_JITX86_STATIC ENDS ENDIF -END +END \ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.hpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.hpp index b0a7c5acb..6523f9c47 100644 --- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.hpp +++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86_static.hpp @@ -35,6 +35,7 @@ extern "C" { void randomx_program_prologue_first_load(); void randomx_program_loop_begin(); void randomx_program_loop_load(); + void randomx_program_loop_load_xop(); void randomx_program_start(); void randomx_program_read_dataset(); void randomx_program_read_dataset_ryzen(); diff --git a/xmrstak/backend/cpu/crypto/randomx/randomx.cpp b/xmrstak/backend/cpu/crypto/randomx/randomx.cpp index 1c6b048d2..ab55b25df 100644 --- a/xmrstak/backend/cpu/crypto/randomx/randomx.cpp +++ b/xmrstak/backend/cpu/crypto/randomx/randomx.cpp @@ -34,7 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/vm_compiled.hpp" #include "crypto/randomx/vm_compiled_light.hpp" #include "crypto/randomx/blake2/blake2.h" + #include "crypto/randomx/jit_compiler_x86_static.hpp" +#include "crypto/common/VirtualMemory.h" + +#include "cpuType.hpp" + +#include #include @@ -87,6 +93,18 @@ RandomX_ConfigurationArqma::RandomX_ConfigurationArqma() ScratchpadL3_Size = 262144; } +RandomX_ConfigurationSafex::RandomX_ConfigurationSafex() +{ + ArgonSalt = "RandomSFX\x01"; +} + +RandomX_ConfigurationKeva::RandomX_ConfigurationKeva() +{ + ArgonSalt = "RandomKV\x01"; + ScratchpadL2_Size = 131072; + ScratchpadL3_Size = 1048576; +} + RandomX_ConfigurationBase::RandomX_ConfigurationBase() : ArgonMemory(262144) , ArgonIterations(3) @@ -193,16 +211,27 @@ void RandomX_ConfigurationBase::Apply() #if defined(_M_X64) || defined(__x86_64__) *(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1; - const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE; - *(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask; - *(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask; - *(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; + // Not needed right now because all variants use default dataset base size + //const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE; + //*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask; + //*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask; + //*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; *(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated; #define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x +#elif defined(XMRIG_ARMv8) + + Log2_ScratchpadL1 = Log2(ScratchpadL1_Size); + Log2_ScratchpadL2 = Log2(ScratchpadL2_Size); + Log2_ScratchpadL3 = Log2(ScratchpadL3_Size); + Log2_DatasetBaseSize = Log2(DatasetBaseSize); + Log2_CacheSize = Log2((ArgonMemory * randomx::ArgonBlockSize) / randomx::CacheLineSize); + +#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x + #else #define JIT_HANDLE(x, prev) #endif @@ -214,14 +243,29 @@ void RandomX_ConfigurationBase::Apply() CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } +#define INST_HANDLE2(x, func_name, prev) \ + CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ + for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); } + INST_HANDLE(IADD_RS, NULL); INST_HANDLE(IADD_M, IADD_RS); INST_HANDLE(ISUB_R, IADD_M); INST_HANDLE(ISUB_M, ISUB_R); INST_HANDLE(IMUL_R, ISUB_M); INST_HANDLE(IMUL_M, IMUL_R); - INST_HANDLE(IMULH_R, IMUL_M); - INST_HANDLE(IMULH_M, IMULH_R); + +#if defined(_M_X64) || defined(__x86_64__) + if (xmrstak::cpu::hasBMI2()) { + INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M); + INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R); + } + else +#endif + { + INST_HANDLE(IMULH_R, IMUL_M); + INST_HANDLE(IMULH_M, IMULH_R); + } + INST_HANDLE(ISMULH_R, IMULH_M); INST_HANDLE(ISMULH_M, ISMULH_R); INST_HANDLE(IMUL_RCP, ISMULH_M); @@ -241,7 +285,17 @@ void RandomX_ConfigurationBase::Apply() INST_HANDLE(FDIV_M, FMUL_R); INST_HANDLE(FSQRT_R, FDIV_M); INST_HANDLE(CBRANCH, FSQRT_R); - INST_HANDLE(CFROUND, CBRANCH); + +#if defined(_M_X64) || defined(__x86_64__) + if (xmrstak::cpu::hasBMI2()) { + INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH); + } + else +#endif + { + INST_HANDLE(CFROUND, CBRANCH); + } + INST_HANDLE(ISTORE, CFROUND); INST_HANDLE(NOP, ISTORE); #undef INST_HANDLE @@ -251,8 +305,12 @@ RandomX_ConfigurationMonero RandomX_MoneroConfig; RandomX_ConfigurationWownero RandomX_WowneroConfig; RandomX_ConfigurationLoki RandomX_LokiConfig; RandomX_ConfigurationArqma RandomX_ArqmaConfig; +RandomX_ConfigurationSafex RandomX_SafexConfig; +RandomX_ConfigurationKeva RandomX_KevaConfig; + +alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig; -RandomX_ConfigurationBase RandomX_CurrentConfig; +static std::mutex vm_pool_mutex; extern "C" { @@ -327,7 +385,7 @@ extern "C" { dataset = new randomx_dataset(); if (flags & RANDOMX_FLAG_LARGE_PAGES) { dataset->dealloc = &randomx::deallocDataset; - if(flags & RANDOMX_FLAG_LARGE_PAGES_1G) { + if(flags & RANDOMX_FLAG_1GB_PAGES) { dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE, 1024u); } else { diff --git a/xmrstak/backend/cpu/crypto/randomx/randomx.h b/xmrstak/backend/cpu/crypto/randomx/randomx.h index 4dbecaef7..5157179dc 100644 --- a/xmrstak/backend/cpu/crypto/randomx/randomx.h +++ b/xmrstak/backend/cpu/crypto/randomx/randomx.h @@ -48,7 +48,8 @@ enum randomx_flags { RANDOMX_FLAG_HARD_AES = 2, RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_JIT = 8, - RANDOMX_FLAG_LARGE_PAGES_1G = 16 + RANDOMX_FLAG_1GB_PAGES = 16, + RANDOMX_FLAG_AMD = 64, }; @@ -118,9 +119,9 @@ struct RandomX_ConfigurationBase rx_vec_i128 fillAes4Rx4_Key[8]; uint8_t codeShhPrefetchTweaked[20]; - uint8_t codeReadDatasetTweaked[72]; + uint8_t codeReadDatasetTweaked[64]; uint32_t codeReadDatasetTweakedSize; - uint8_t codeReadDatasetRyzenTweaked[72]; + uint8_t codeReadDatasetRyzenTweaked[76]; uint32_t codeReadDatasetRyzenTweakedSize; uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codePrefetchScratchpadTweaked[32]; @@ -137,6 +138,14 @@ struct RandomX_ConfigurationBase uint32_t ConditionMask_Calculated; +#if defined(XMRIG_ARMv8) + uint32_t Log2_ScratchpadL1; + uint32_t Log2_ScratchpadL2; + uint32_t Log2_ScratchpadL3; + uint32_t Log2_DatasetBaseSize; + uint32_t Log2_CacheSize; +#endif + int CEIL_IADD_RS; int CEIL_IADD_M; int CEIL_ISUB_R; @@ -173,11 +182,15 @@ struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {}; struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); }; struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); }; struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); }; +struct RandomX_ConfigurationSafex : public RandomX_ConfigurationBase { RandomX_ConfigurationSafex(); }; +struct RandomX_ConfigurationKeva : public RandomX_ConfigurationBase { RandomX_ConfigurationKeva(); }; extern RandomX_ConfigurationMonero RandomX_MoneroConfig; extern RandomX_ConfigurationWownero RandomX_WowneroConfig; extern RandomX_ConfigurationLoki RandomX_LokiConfig; extern RandomX_ConfigurationArqma RandomX_ArqmaConfig; +extern RandomX_ConfigurationSafex RandomX_SafexConfig; +extern RandomX_ConfigurationKeva RandomX_KevaConfig; extern RandomX_ConfigurationBase RandomX_CurrentConfig; diff --git a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp index d3718d04d..3fdd86df4 100644 --- a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp +++ b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp @@ -46,6 +46,9 @@ class randomx_vm virtual void run(void* seed) = 0; void resetRoundingMode(); + void setFlags(uint32_t flags) { vm_flags = flags; } + uint32_t getFlags() const { return vm_flags; } + randomx::RegisterFile *getRegisterFile() { return ® } @@ -71,6 +74,7 @@ class randomx_vm randomx_dataset* datasetPtr; }; uint64_t datasetOffset; + uint32_t vm_flags; }; namespace randomx { diff --git a/xmrstak/backend/cpu/crypto/randomx/vm_compiled.cpp b/xmrstak/backend/cpu/crypto/randomx/vm_compiled.cpp index 0d18a3634..501bb8c70 100644 --- a/xmrstak/backend/cpu/crypto/randomx/vm_compiled.cpp +++ b/xmrstak/backend/cpu/crypto/randomx/vm_compiled.cpp @@ -41,16 +41,19 @@ namespace randomx { template void CompiledVm::run(void* seed) { + compiler.prepare(); VmBase::generateProgram(seed); randomx_vm::initialize(); - compiler.generateProgram(program, config); + compiler.generateProgram(program, config, randomx_vm::getFlags()); mem.memory = datasetPtr->memory + datasetOffset; execute(); } template void CompiledVm::execute() { - +#ifdef XMRIG_ARM + memcpy(reg.f, config.eMask, sizeof(config.eMask)); +#endif compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations); } diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index b1e5a0497..dfe1d2f8a 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -468,6 +468,12 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& case randomX_arqma: algv = 3; break; + case randomX_safex: + algv = 4; + break; + case randomX_keva: + algv = 5; + break; default: algv = 0; break; @@ -488,7 +494,15 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& //arqma RandomX_hash::template hash, - RandomX_hash::template hash + RandomX_hash::template hash, + + //safex + RandomX_hash::template hash, + RandomX_hash::template hash, + + //keva + RandomX_hash::template hash, + RandomX_hash::template hash }; std::bitset<1> digit; @@ -503,7 +517,9 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& {randomX, RandomX_generator::template cn_on_new_job}, {randomX_loki, RandomX_generator::template cn_on_new_job}, {randomX_wow, RandomX_generator::template cn_on_new_job}, - {randomX_arqma, RandomX_generator::template cn_on_new_job} + {randomX_arqma, RandomX_generator::template cn_on_new_job}, + {randomX_safex, RandomX_generator::template cn_on_new_job}, + {randomX_keva, RandomX_generator::template cn_on_new_job} }; auto it = on_new_job_map.find(algo.Id()); diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 02bef121c..04afc9fee 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -13,8 +13,10 @@ enum xmrstak_algo_id randomX = 1, randomX_loki = 2, randomX_wow = 3, - randomX_arqma = 4 - + randomX_arqma = 4, + randomX_safex = 5, + randomX_keva = 6 + //cryptonight_turtle = start_derived_algo_id, // please add the algorithm name to get_algo_name() }; @@ -25,13 +27,15 @@ enum xmrstak_algo_id */ inline std::string get_algo_name(xmrstak_algo_id algo_id) { - static std::array base_algo_names = + static std::array base_algo_names = {{ "invalid_algo", "randomx", "randomx_loki", "randomx_wow", - "randomx_arqma" + "randomx_arqma", + "randomx_safex", + "randomx_keva" }}; static std::array derived_algo_names = @@ -140,12 +144,14 @@ constexpr uint32_t RX_ARQMA_ITER = 0x10000; inline xmrstak_algo POW(xmrstak_algo_id algo_id) { - static std::array pow = {{ + static std::array pow = {{ {invalid_algo}, {randomX, randomX, _2MiB, _256KiB, _16KiB}, {randomX_loki, randomX_loki, _2MiB, _256KiB, _16KiB}, {randomX_wow, randomX_wow, _2MiB/2, _256KiB/2, _16KiB}, - {randomX_arqma, randomX_arqma, _2MiB/8, _256KiB/2, _16KiB} + {randomX_arqma, randomX_arqma, _2MiB/8, _256KiB/2, _16KiB}, + {randomX_safex, randomX_safex, _2MiB, _256KiB, _16KiB}, + {randomX_keva, randomX_keva, _2MiB/4, _256KiB/2, _16KiB} }}; return pow[algo_id]; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index c4ef358fb..b6abe22f0 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -102,11 +102,15 @@ constexpr size_t iConfigCnt = (sizeof(oConfigValues) / sizeof(oConfigValues[0])) xmrstak::coin_selection coins[] = { // name, userpool, default_pool_suggestion {"arqma", {POW(randomX_arqma)}, {POW(randomX_arqma)}, nullptr}, + {"keva", {POW(randomX_keva)}, {POW(randomX_keva)}, nullptr}, {"loki", {POW(randomX_loki)}, {POW(randomX_loki)}, nullptr}, {"monero", {POW(randomX)}, {POW(randomX)}, nullptr}, + {"safex", {POW(randomX_safex)}, {POW(randomX_safex)}, nullptr}, {"randomx", {POW(randomX)}, {POW(randomX)}, nullptr}, {"randomx_arqma", {POW(randomX_arqma)}, {POW(randomX_arqma)}, nullptr}, + {"randomx_keva", {POW(randomX_keva)}, {POW(randomX_keva)}, nullptr}, {"randomx_loki", {POW(randomX_loki)}, {POW(randomX_loki)}, nullptr}, + {"randomx_safex", {POW(randomX_safex)}, {POW(randomX_safex)}, nullptr}, {"randomx_wow", {POW(randomX_wow)}, {POW(randomX_wow)}, nullptr}, {"wownero", {POW(randomX_wow)}, {POW(randomX_wow)}, nullptr} }; diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index caa7395b7..a91993ad7 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -20,19 +20,24 @@ POOLCONF], * Currency to mine. Supported values: * * arqma + * keva * loki * monero + * safex * wownero * * Native algorithms which do not depend on any block versions: * * # 256KiB scratchpad memory * randomx_arqma + * # 512KiB scratchpad memory + * randomx_keva * # 1MiB scratchpad memory * randomx_wow * # 2MiB scratchpad memory * randomx * randomx_loki + * randomx_safex */ "currency" : "CURRENCY", From 37200ffa75b28a90cafc4f04f929339cdee96a1a Mon Sep 17 00:00:00 2001 From: fireice-uk Date: Wed, 6 May 2020 13:21:11 +0100 Subject: [PATCH 2/2] incr version --- xmrstak/version.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index dec78d1b3..71ca08890 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -20,7 +20,7 @@ #endif #define XMR_STAK_NAME "xmr-stak-rx" -#define XMR_STAK_VERSION "1.0.4-rx" +#define XMR_STAK_VERSION "1.0.5-rx" #if defined(_WIN32) #define OS_TYPE "win"