Skip to content

Commit

Permalink
8-way AVX2 implementation for double SHA256 on 64-byte inputs
Browse files Browse the repository at this point in the history
Cherry-picked from: 4437d6e
  • Loading branch information
sipa authored and xanimo committed Apr 17, 2024
1 parent 499a68b commit 457f90c
Show file tree
Hide file tree
Showing 8 changed files with 402 additions and 7 deletions.
22 changes: 22 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ fi
# compatibility.
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
Expand Down Expand Up @@ -325,6 +326,25 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
AC_MSG_CHECKING(for AVX2 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__) && defined(__AVX2__)
#include <x86intrin.h>
#endif
]],[[
__m256i l = _mm256_set1_epi32(0);
return _mm256_extract_epi32(l, 7);
]])],
[ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"

AC_ARG_WITH([utils],
Expand Down Expand Up @@ -1226,6 +1246,7 @@ AM_CONDITIONAL([WORDS_BIGENDIAN],[test x$ac_cv_c_bigendian = xyes])
AM_CONDITIONAL([USE_SCRYPT_SSE2], [test x$use_scrypt_sse2 = xyes])
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])

AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
Expand Down Expand Up @@ -1268,6 +1289,7 @@ AC_SUBST(SANITIZER_CXXFLAGS)
AC_SUBST(SANITIZER_LDFLAGS)
AC_SUBST(SSE42_CXXFLAGS)
AC_SUBST(SSE41_CXXFLAGS)
AC_SUBST(AVX2_CXXFLAGS)
AC_SUBST(LIBTOOL_APP_LDFLAGS)
AC_SUBST(USE_UPNP)
AC_SUBST(USE_QRCODE)
Expand Down
15 changes: 14 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ LIBDOGECOIN_CLI=libdogecoin_cli.a
LIBDOGECOIN_UTIL=libdogecoin_util.a
LIBDOGECOIN_CRYPTO=crypto/libdogecoin_crypto.a
LIBDOGECOIN_CRYPTO_SSE41=crypto/libdogecoin_crypto_sse41.a
LIBDOGECOIN_CRYPTO_AVX2=crypto/libdogecoin_crypto_avx2.a
LIBDOGECOINQT=qt/libdogecoinqt.a
LIBSECP256K1=secp256k1/libsecp256k1.la

Expand All @@ -52,6 +53,7 @@ $(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
EXTRA_LIBRARIES += \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2) \
$(LIBDOGECOIN_UTIL) \
$(LIBDOGECOIN_COMMON) \
$(LIBDOGECOIN_CONSENSUS) \
Expand Down Expand Up @@ -289,6 +291,14 @@ crypto_libdogecoin_crypto_sse41_a_CPPFLAGS += -DENABLE_SSE41
endif
crypto_libdogecoin_crypto_sse41_a_SOURCES = crypto/sha256_sse41.cpp

crypto_libdogecoin_crypto_avx2_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libdogecoin_crypto_avx2_a_CPPFLAGS = $(AM_CPPFLAGS)
if ENABLE_AVX2
crypto_libdogecoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
crypto_libdogecoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
endif
crypto_libdogecoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp

# consensus: shared between all executables that validate any consensus rules.
libdogecoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libdogecoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
Expand Down Expand Up @@ -414,6 +424,7 @@ dogecoind_LDADD = \
$(LIBDOGECOIN_CONSENSUS) \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2) \
$(LIBLEVELDB) \
$(LIBMEMENV) \
$(LIBSECP256K1)
Expand All @@ -435,7 +446,8 @@ dogecoin_cli_LDADD = \
$(LIBUNIVALUE) \
$(LIBDOGECOIN_UTIL) \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41)
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2)

dogecoin_cli_LDADD += $(BOOST_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(EVENT_LIBS)
#
Expand All @@ -457,6 +469,7 @@ dogecoin_tx_LDADD = \
$(LIBDOGECOIN_CONSENSUS) \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2) \
$(LIBSECP256K1)

dogecoin_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
Expand Down
1 change: 1 addition & 0 deletions src/Makefile.bench.include
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ bench_bench_dogecoin_LDADD = \
$(LIBDOGECOIN_CONSENSUS) \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2) \
$(LIBLEVELDB) \
$(LIBMEMENV) \
$(LIBSECP256K1) \
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.qt.include
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ endif
if ENABLE_ZMQ
qt_dogecoin_qt_LDADD += $(LIBDOGECOIN_ZMQ) $(ZMQ_LIBS)
endif
qt_dogecoin_qt_LDADD += $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBMEMENV) \
qt_dogecoin_qt_LDADD += $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBDOGECOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBMEMENV) \
$(BOOST_LIBS) $(QT_LIBS) $(QT_DBUS_LIBS) $(QR_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
qt_dogecoin_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.qttest.include
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ endif
if ENABLE_ZMQ
qt_test_test_dogecoin_qt_LDADD += $(LIBDOGECOIN_ZMQ) $(ZMQ_LIBS)
endif
qt_test_test_dogecoin_qt_LDADD += $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) \
qt_test_test_dogecoin_qt_LDADD += $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBDOGECOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) \
$(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(QT_DBUS_LIBS) $(QT_TEST_LIBS) $(QT_LIBS) \
$(QR_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
Expand Down
3 changes: 2 additions & 1 deletion src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ endif

test_test_dogecoin_SOURCES = $(BITCOIN_TESTS) $(JSON_TEST_FILES) $(RAW_TEST_FILES)
test_test_dogecoin_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES) -I$(builddir)/test/ $(TESTDEFS) $(EVENT_CFLAGS)
test_test_dogecoin_LDADD = $(LIBDOGECOIN_SERVER) $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBUNIVALUE) \
test_test_dogecoin_LDADD = $(LIBDOGECOIN_SERVER) $(LIBDOGECOIN_CLI) $(LIBDOGECOIN_COMMON) $(LIBDOGECOIN_UTIL) $(LIBDOGECOIN_CONSENSUS) $(LIBDOGECOIN_CRYPTO) $(LIBDOGECOIN_CRYPTO_SSE41) $(LIBDOGECOIN_CRYPTO_AVX2) $(LIBUNIVALUE) \
$(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(LIBSECP256K1) $(EVENT_LIBS) $(EVENT_PTHREADS_LIBS)
test_test_dogecoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
if ENABLE_WALLET
Expand Down Expand Up @@ -181,6 +181,7 @@ test_test_dogecoin_fuzzy_LDADD = \
$(LIBDOGECOIN_CONSENSUS) \
$(LIBDOGECOIN_CRYPTO) \
$(LIBDOGECOIN_CRYPTO_SSE41) \
$(LIBDOGECOIN_CRYPTO_AVX2) \
$(LIBSECP256K1)

test_test_dogecoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
Expand Down
35 changes: 32 additions & 3 deletions src/crypto/sha256.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,17 @@ namespace sha256d64_sse41
void Transform_4way(unsigned char* out, const unsigned char* in);
}

namespace sha256d64_avx2
{
void Transform_8way(unsigned char* out, const unsigned char* in);
}

// Internal implementation code.
namespace
{
/// Internal SHA-256 implementation.
namespace sha256
{
#ifndef USE_AVX2
uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
Expand All @@ -79,7 +83,6 @@ void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, u
d += t1;
h = t1 + t2;
}
#endif

/** Initialize SHA-256 state. */
void inline Initialize(uint32_t* s)
Expand Down Expand Up @@ -668,19 +671,37 @@ bool SelfTest(TransformType tr) {
TransformType Transform = ::Transform;
TransformD64Type TransformD64 = sha256::TransformD64;
TransformD64Type TransformD64_4way = nullptr;
TransformD64Type TransformD64_8way = nullptr;

#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
// We can't use cpuid.h's __get_cpuid as it does not support subleafs.
void inline cpuid(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d)
{
__asm__ ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(leaf), "2"(subleaf));
}
#endif
} // namespace


std::string SHA256AutoDetect()
{
std::string ret = "standard";
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
uint32_t eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx >> 19) & 1) {
cpuid(1, 0, eax, ebx, ecx, edx);
if ((ecx >> 19) & 1) {
Transform = sha256_sse4::Transform;
TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
#if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
TransformD64_4way = sha256d64_sse41::Transform_4way;
ret = "sse4(1way+4way)";
#if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
cpuid(7, 0, eax, ebx, ecx, edx);
if ((ebx >> 5) & 1) {
TransformD64_8way = sha256d64_avx2::Transform_8way;
ret += ",avx2(8way)";
}
#endif
#else
ret = "sse4";
#endif
Expand Down Expand Up @@ -750,6 +771,14 @@ CSHA256& CSHA256::Reset()

void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
{
if (TransformD64_8way) {
while (blocks >= 8) {
TransformD64_8way(out, in);
out += 256;
in += 512;
blocks -= 8;
}
}
if (TransformD64_4way) {
while (blocks >= 4) {
TransformD64_4way(out, in);
Expand Down
Loading

0 comments on commit 457f90c

Please sign in to comment.