From c89b113715f952710832190afbec4e6713693d82 Mon Sep 17 00:00:00 2001 From: Dakoda Greaves Date: Tue, 30 Apr 2024 15:55:43 -0700 Subject: [PATCH] crypto: add multi-block intel avx2 wrapper for sha256 --- src/crypto/sha256.cpp | 56 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 1bd78ca693e..3813f719971 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -657,6 +657,7 @@ void TransformD64(unsigned char* out, const unsigned char* in) } typedef void (*transform_type) (uint32_t*, const unsigned char*, size_t); +typedef void (*transform_type_avx2) (const void*, const uint64_t, void*); typedef void (*transform_d64_type)(unsigned char*, const unsigned char*); /** Define a function pointer for Transform */ @@ -666,6 +667,49 @@ transform_d64_type transfrom_ptr_d64_2way = nullptr; transform_d64_type transfrom_ptr_d64_4way = nullptr; transform_d64_type transfrom_ptr_d64_8way = nullptr; +#if USE_AVX2 +transform_type_avx2 transform_ptr_avx2 = sha256_avx2; + +template +void TransformD64WrapperAVX2(unsigned char* out, const unsigned char* in) +{ + uint32_t s[8]; + static const unsigned char padding1[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0 + }; + unsigned char buffer2[64] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 + }; + sha256::Initialize(s); + tr(in, 1, s); + tr(padding1, 1, s); + WriteBE32(buffer2 + 0, s[0]); + WriteBE32(buffer2 + 4, s[1]); + WriteBE32(buffer2 + 8, s[2]); + WriteBE32(buffer2 + 12, s[3]); + WriteBE32(buffer2 + 16, s[4]); + WriteBE32(buffer2 + 20, s[5]); + WriteBE32(buffer2 + 24, s[6]); + WriteBE32(buffer2 + 28, s[7]); + sha256::Initialize(s); + tr(buffer2, 1, s); + WriteBE32(out + 0, s[0]); + WriteBE32(out + 4, s[1]); + WriteBE32(out + 8, s[2]); + WriteBE32(out + 12, s[3]); + WriteBE32(out + 16, s[4]); + WriteBE32(out + 20, s[5]); + WriteBE32(out + 24, s[6]); + WriteBE32(out + 28, s[7]); +} +#endif + template void TransformD64Wrapper(unsigned char* out, const unsigned char* in) { @@ -810,11 +854,15 @@ bool AVXEnabled() void inline Initialize_transform_ptr(void) { #if USE_AVX2 && defined(__linux__) - if (__builtin_cpu_supports("avx2")) - sha256::transform_ptr = sha256::Transform_AVX2; + if (__builtin_cpu_supports("avx2")) { + sha256::transform_ptr = sha256::Transform_AVX2; + sha256::transfrom_ptr_d64 = sha256::TransformD64WrapperAVX2; + } #elif USE_AVX2 && defined(__WIN64__) - if (AVXEnabled) - sha256::transform_ptr = sha256::Transform_AVX2; + if (AVXEnabled) { + sha256::transform_ptr = sha256::Transform_AVX2; + sha256::transfrom_ptr_d64 = sha256::TransformD64WrapperAVX2; + } #endif #if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__) || defined(__i386__)) bool have_sse4 = false;