diff --git a/src/bench/scrypt.cpp b/src/bench/scrypt.cpp index f05fdd9bda3..2b75f9ef738 100644 --- a/src/bench/scrypt.cpp +++ b/src/bench/scrypt.cpp @@ -2,6 +2,7 @@ #include #include "bench.h" +#include "crypto/hwcap.h" #include "crypto/scrypt.h" #include "uint256.h" #include "utiltime.h" @@ -15,9 +16,8 @@ static void Scrypt(benchmark::State& state) uint256 output; std::vector in(BUFFER_SIZE, 0); -#ifdef USE_SSE2 - scrypt_detect_sse2(); -#endif // USE_SSE2 + HardwareCapabilities capabilities = DetectHWCapabilities(); + scrypt_select_implementation(capabilities); while (state.KeepRunning()) { diff --git a/src/crypto/hwcap.cpp b/src/crypto/hwcap.cpp index aec4ac40548..1d145622af5 100644 --- a/src/crypto/hwcap.cpp +++ b/src/crypto/hwcap.cpp @@ -17,7 +17,23 @@ HardwareCapabilities DetectHWCapabilities() // generic x86_64 and i686 detection #if defined(HAVE_GETCPUID) uint32_t eax, ebx, ecx, edx; - GetCPUID(1, 0, eax, ebx, ecx, edx); + bool fUsingSSE2; +#if defined(USE_SSE2_ALWAYS) + fUsingSSE2 = true; +#else // USE_SSE2_ALWAYS + // 32bit x86 Linux or Windows, detect cpuid features + edx=0; +#if defined(_MSC_VER) + // MSVC + int x86cpuid[4]; + __cpuid(x86cpuid, 1); + edx = (unsigned int)buffer[3]; +#else // _MSC_VER + // Linux or i686-w64-mingw32 (gcc-4.6.3) + unsigned int eax, ebx, ecx; + GetCPUID(1, 0, &eax, &ebx, &ecx, &edx); +#endif // _MSC_VER +#endif // detect SSE2 #if defined(USE_SSE2) diff --git a/src/crypto/hwcap.h b/src/crypto/hwcap.h index aeefac80609..77701ae05a6 100644 --- a/src/crypto/hwcap.h +++ b/src/crypto/hwcap.h @@ -5,7 +5,15 @@ #ifndef BITCOIN_CRYPTO_HWCAP_H #define BITCOIN_CRYPTO_HWCAP_H -#include "compat/cpuid.h" +#if defined(USE_SSE2) && !defined(USE_SSE2_ALWAYS) +#ifdef _MSC_VER +// MSVC 64bit is unable to use inline asm +#include +#else +// GCC Linux or i686-w64-mingw32 +#include +#endif +#endif struct HardwareCapabilities { bool has_sse2; diff --git a/src/crypto/scrypt-sse2.cpp b/src/crypto/scrypt-sse2.cpp index 7241abe9e30..1c1df71efcc 100644 --- a/src/crypto/scrypt-sse2.cpp +++ b/src/crypto/scrypt-sse2.cpp @@ -38,7 +38,11 @@ // this entire functionality is experimental EXPERIMENTAL_FEATURE -static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) +namespace scrypt_sse2 { + +namespace { + +inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) { __m128i X0, X1, X2, X3; __m128i T; @@ -95,7 +99,9 @@ static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) B[3] = _mm_add_epi32(B[3], X3); } -void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad) +} // anon namespace + +void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad) { uint8_t B[128]; union { @@ -137,3 +143,5 @@ void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchp PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32); } + +} // namespace scrypt_sse2 diff --git a/src/crypto/scrypt.cpp b/src/crypto/scrypt.cpp index af6621f0a3e..07c8ef39433 100644 --- a/src/crypto/scrypt.cpp +++ b/src/crypto/scrypt.cpp @@ -29,20 +29,12 @@ #include "crypto/scrypt.h" #include "crypto/hmac_sha256.h" +#include "crypto/hwcap.h" +#include "support/experimental.h" #include #include #include -#if defined(USE_SSE2) && !defined(USE_SSE2_ALWAYS) -#ifdef _MSC_VER -// MSVC 64bit is unable to use inline asm -#include -#else -// GCC Linux or i686-w64-mingw32 -#include -#endif -#endif - #ifndef __FreeBSD__ static inline uint32_t be32dec(const void *pp) { @@ -59,8 +51,8 @@ static inline void be32enc(void *pp, uint32_t x) p[1] = (x >> 16) & 0xff; p[0] = (x >> 24) & 0xff; } - #endif + /** * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and @@ -116,9 +108,13 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, } } +namespace scrypt_generic { + +namespace { + #define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) -static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) { uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; int i; @@ -184,7 +180,9 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) B[15] += x15; } -void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad) +} // anon namespace + +void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad) { uint8_t B[128]; uint32_t X[32]; @@ -217,44 +215,23 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32); } -#if defined(USE_SSE2) -// By default, set to generic scrypt function. This will prevent crash in case when scrypt_detect_sse2() wasn't called -void (*scrypt_1024_1_1_256_sp_detected)(const char *input, char *output, char *scratchpad) = &scrypt_1024_1_1_256_sp_generic; +} // scrypt_generic namespace + +// by default, use the generic version +void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad) = &scrypt_generic::scrypt_1024_1_1_256_sp; -bool scrypt_detect_sse2() +bool scrypt_select_implementation(const HardwareCapabilities capabilities) { - bool fUsingSSE2; -#if defined(USE_SSE2_ALWAYS) - fUsingSSE2 = true; -#else // USE_SSE2_ALWAYS - // 32bit x86 Linux or Windows, detect cpuid features - unsigned int cpuid_edx=0; -#if defined(_MSC_VER) - // MSVC - int x86cpuid[4]; - __cpuid(x86cpuid, 1); - cpuid_edx = (unsigned int)buffer[3]; -#else // _MSC_VER - // Linux or i686-w64-mingw32 (gcc-4.6.3) - unsigned int eax, ebx, ecx; - GetCPUID(1, 0, &eax, &ebx, &ecx, &cpuid_edx); -#endif // _MSC_VER - - if (cpuid_edx & 1<<26) - { - scrypt_1024_1_1_256_sp_detected = &scrypt_1024_1_1_256_sp_sse2; - fUsingSSE2 = true; - } - else - { - scrypt_1024_1_1_256_sp_detected = &scrypt_1024_1_1_256_sp_generic; - fUsingSSE2 = false; - } -#endif // USE_SSE2_ALWAYS +#if defined(USE_SSE2) + EXPERIMENTAL_FEATURE - return fUsingSSE2; + if (capabilities.has_sse2) { + scrypt_1024_1_1_256_sp = &scrypt_sse2::scrypt_1024_1_1_256_sp; + return true; + } +#endif // defined(USE_SSE2) + return false; } -#endif void scrypt_1024_1_1_256(const char *input, char *output) { diff --git a/src/crypto/scrypt.h b/src/crypto/scrypt.h index bdf6230a059..db6724cd3ab 100644 --- a/src/crypto/scrypt.h +++ b/src/crypto/scrypt.h @@ -5,6 +5,7 @@ #ifndef BITCOIN_CRYPTO_SCRYPT_H #define BITCOIN_CRYPTO_SCRYPT_H +#include "crypto/hwcap.h" #include #include @@ -14,28 +15,21 @@ static const int SCRYPT_SCRATCHPAD_SIZE = 131072 + 63; -void scrypt_1024_1_1_256(const char *input, char *output); -void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad); - -#if defined(USE_SSE2) -#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__)) -#define USE_SSE2_ALWAYS 1 -#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_sse2((input), (output), (scratchpad)) -#else -#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_detected((input), (output), (scratchpad)) -#endif - -bool scrypt_detect_sse2(); -void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad); -extern void (*scrypt_1024_1_1_256_sp_detected)(const char *input, char *output, char *scratchpad); -#else -#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_generic((input), (output), (scratchpad)) -#endif - void PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen); +namespace scrypt_generic { + void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad); +} + +namespace scrypt_sse2 { + void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad); +} + +void scrypt_1024_1_1_256(const char *input, char *output); +bool scrypt_select_implementation(const HardwareCapabilities capabilities); + #ifndef __FreeBSD__ static inline uint32_t le32dec(const void *pp) { diff --git a/src/init.cpp b/src/init.cpp index c3f54e4607c..f779098794e 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -17,7 +17,8 @@ #include "checkpoints.h" #include "compat/sanity.h" #include "consensus/validation.h" -#include "crypto/scrypt.h" // for scrypt_detect_sse2 +#include "crypto/hwcap.h" // for DetectHWCapabilities +#include "crypto/scrypt.h" // for scrypt_select_implementation #include "fs.h" #include "httpserver.h" #include "httprpc.h" @@ -1258,13 +1259,13 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) int64_t nStart; -#if defined(USE_SSE2) - if (scrypt_detect_sse2()) { - LogPrintf("scrypt: using SSE2 implementation\n"); + + HardwareCapabilities capabilities = DetectHWCapabilities(); + if (scrypt_select_implementation(capabilities)) { + LogPrintf("scrypt: using SSE2 implementation\n"); } else { - LogPrintf("scrypt: using generic implementation\n"); + LogPrintf("scrypt: using generic implementation\n"); } -#endif // ********************************************************* Step 5: verify wallet database integrity #ifdef ENABLE_WALLET diff --git a/src/test/scrypt_tests.cpp b/src/test/scrypt_tests.cpp index d0131716341..5f18bca13da 100644 --- a/src/test/scrypt_tests.cpp +++ b/src/test/scrypt_tests.cpp @@ -4,6 +4,7 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include +#include "crypto/hwcap.h" #include "crypto/scrypt.h" #include "uint256.h" #include "util.h" @@ -13,13 +14,14 @@ BOOST_AUTO_TEST_SUITE(scrypt_tests) BOOST_AUTO_TEST_CASE(scrypt_hashtest) { +#if defined(USE_SSE2) + // get cpu capabilities + HardwareCapabilities capabilities = DetectHWCapabilities(); +#endif // Test Scrypt hash with known inputs against expected outputs #define HASHCOUNT 5 const char* inputhex[HASHCOUNT] = { "020000004c1271c211717198227392b029a64a7971931d351b387bb80db027f270411e398a07046f7d4a08dd815412a8712f874a7ebf0507e3878bd24e20a3b73fd750a667d2f451eac7471b00de6659", "0200000011503ee6a855e900c00cfdd98f5f55fffeaee9b6bf55bea9b852d9de2ce35828e204eef76acfd36949ae56d1fbe81c1ac9c0209e6331ad56414f9072506a77f8c6faf551eac7471b00389d01", "02000000a72c8a177f523946f42f22c3e86b8023221b4105e8007e59e81f6beb013e29aaf635295cb9ac966213fb56e046dc71df5b3f7f67ceaeab24038e743f883aff1aaafaf551eac7471b0166249b", "010000007824bc3a8a1b4628485eee3024abd8626721f7f870f8ad4d2f33a27155167f6a4009d1285049603888fe85a84b6c803a53305a8d497965a5e896e1a00568359589faf551eac7471b0065434e", "0200000050bfd4e4a307a8cb6ef4aef69abc5c0f2d579648bd80d7733e1ccc3fbc90ed664a7f74006cb11bde87785f229ecd366c2d4e44432832580e0608c579e4cb76f383f7f551eac7471b00c36982" }; const char* expected[HASHCOUNT] = { "00000000002bef4107f882f6115e0b01f348d21195dacd3582aa2dabd7985806" , "00000000003a0d11bdd5eb634e08b7feddcfbbf228ed35d250daf19f1c88fc94", "00000000000b40f895f288e13244728a6c2d9d59d8aff29c65f8dd5114a8ca81", "00000000003007005891cd4923031e99d8e8d72f6e8e7edc6a86181897e105fe", "000000000018f0b426a4afc7130ccb47fa02af730d345b4fe7c7724d3800ec8c" }; -#if defined(USE_SSE2) - scrypt_detect_sse2(); -#endif uint256 scrypthash; std::vector inputbytes; char scratchpad[SCRYPT_SCRATCHPAD_SIZE]; @@ -27,11 +29,13 @@ BOOST_AUTO_TEST_CASE(scrypt_hashtest) inputbytes = ParseHex(inputhex[i]); #if defined(USE_SSE2) // Test SSE2 scrypt - scrypt_1024_1_1_256_sp_sse2((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad); - BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]); + if (capabilities.has_sse2) { + scrypt_sse2::scrypt_1024_1_1_256_sp((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad); + BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]); + } #endif // Test generic scrypt - scrypt_1024_1_1_256_sp_generic((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad); + scrypt_generic::scrypt_1024_1_1_256_sp((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad); BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]); } }