Skip to content

Commit

Permalink
crypto: use generic hwcap detection for scrypt sse2
Browse files Browse the repository at this point in the history
Cherry-picked from: f5c21c4

Additional edits needed for fixing MSVC
  • Loading branch information
patricklodder authored and xanimo committed May 2, 2024
1 parent 74da7e7 commit 21cbc71
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 84 deletions.
6 changes: 3 additions & 3 deletions src/bench/scrypt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <vector>

#include "bench.h"
#include "crypto/hwcap.h"
#include "crypto/scrypt.h"
#include "uint256.h"
#include "utiltime.h"
Expand All @@ -15,9 +16,8 @@ static void Scrypt(benchmark::State& state)
uint256 output;
std::vector<char> in(BUFFER_SIZE, 0);

#ifdef USE_SSE2
scrypt_detect_sse2();
#endif // USE_SSE2
HardwareCapabilities capabilities = DetectHWCapabilities();
scrypt_select_implementation(capabilities);

while (state.KeepRunning())
{
Expand Down
18 changes: 17 additions & 1 deletion src/crypto/hwcap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,23 @@ HardwareCapabilities DetectHWCapabilities()
// generic x86_64 and i686 detection
#if defined(HAVE_GETCPUID)
uint32_t eax, ebx, ecx, edx;
GetCPUID(1, 0, eax, ebx, ecx, edx);
bool fUsingSSE2;
#if defined(USE_SSE2_ALWAYS)
fUsingSSE2 = true;
#else // USE_SSE2_ALWAYS
// 32bit x86 Linux or Windows, detect cpuid features
edx=0;
#if defined(_MSC_VER)
// MSVC
int x86cpuid[4];
__cpuid(x86cpuid, 1);
edx = (unsigned int)buffer[3];
#else // _MSC_VER
// Linux or i686-w64-mingw32 (gcc-4.6.3)
unsigned int eax, ebx, ecx;
GetCPUID(1, 0, &eax, &ebx, &ecx, &edx);
#endif // _MSC_VER
#endif

// detect SSE2
#if defined(USE_SSE2)
Expand Down
10 changes: 9 additions & 1 deletion src/crypto/hwcap.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
#ifndef BITCOIN_CRYPTO_HWCAP_H
#define BITCOIN_CRYPTO_HWCAP_H

#include "compat/cpuid.h"
#if defined(USE_SSE2) && !defined(USE_SSE2_ALWAYS)
#ifdef _MSC_VER
// MSVC 64bit is unable to use inline asm
#include <intrin.h>
#else
// GCC Linux or i686-w64-mingw32
#include <compat/cpuid.h>
#endif
#endif

struct HardwareCapabilities {
bool has_sse2;
Expand Down
12 changes: 10 additions & 2 deletions src/crypto/scrypt-sse2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@
// this entire functionality is experimental
EXPERIMENTAL_FEATURE

static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4])
namespace scrypt_sse2 {

namespace {

inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4])
{
__m128i X0, X1, X2, X3;
__m128i T;
Expand Down Expand Up @@ -95,7 +99,9 @@ static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4])
B[3] = _mm_add_epi32(B[3], X3);
}

void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad)
} // anon namespace

void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad)
{
uint8_t B[128];
union {
Expand Down Expand Up @@ -137,3 +143,5 @@ void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchp

PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32);
}

} // namespace scrypt_sse2
71 changes: 24 additions & 47 deletions src/crypto/scrypt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,12 @@

#include "crypto/scrypt.h"
#include "crypto/hmac_sha256.h"
#include "crypto/hwcap.h"
#include "support/experimental.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#if defined(USE_SSE2) && !defined(USE_SSE2_ALWAYS)
#ifdef _MSC_VER
// MSVC 64bit is unable to use inline asm
#include <intrin.h>
#else
// GCC Linux or i686-w64-mingw32
#include <compat/cpuid.h>
#endif
#endif

#ifndef __FreeBSD__
static inline uint32_t be32dec(const void *pp)
{
Expand All @@ -59,8 +51,8 @@ static inline void be32enc(void *pp, uint32_t x)
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}

#endif

/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
Expand Down Expand Up @@ -116,9 +108,13 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
}
}

namespace scrypt_generic {

namespace {

#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))

static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
Expand Down Expand Up @@ -184,7 +180,9 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
B[15] += x15;
}

void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad)
} // anon namespace

void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad)
{
uint8_t B[128];
uint32_t X[32];
Expand Down Expand Up @@ -217,44 +215,23 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat
PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32);
}

#if defined(USE_SSE2)
// By default, set to generic scrypt function. This will prevent crash in case when scrypt_detect_sse2() wasn't called
void (*scrypt_1024_1_1_256_sp_detected)(const char *input, char *output, char *scratchpad) = &scrypt_1024_1_1_256_sp_generic;
} // scrypt_generic namespace

// by default, use the generic version
void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad) = &scrypt_generic::scrypt_1024_1_1_256_sp;

bool scrypt_detect_sse2()
bool scrypt_select_implementation(const HardwareCapabilities capabilities)
{
bool fUsingSSE2;
#if defined(USE_SSE2_ALWAYS)
fUsingSSE2 = true;
#else // USE_SSE2_ALWAYS
// 32bit x86 Linux or Windows, detect cpuid features
unsigned int cpuid_edx=0;
#if defined(_MSC_VER)
// MSVC
int x86cpuid[4];
__cpuid(x86cpuid, 1);
cpuid_edx = (unsigned int)buffer[3];
#else // _MSC_VER
// Linux or i686-w64-mingw32 (gcc-4.6.3)
unsigned int eax, ebx, ecx;
GetCPUID(1, 0, &eax, &ebx, &ecx, &cpuid_edx);
#endif // _MSC_VER

if (cpuid_edx & 1<<26)
{
scrypt_1024_1_1_256_sp_detected = &scrypt_1024_1_1_256_sp_sse2;
fUsingSSE2 = true;
}
else
{
scrypt_1024_1_1_256_sp_detected = &scrypt_1024_1_1_256_sp_generic;
fUsingSSE2 = false;
}
#endif // USE_SSE2_ALWAYS
#if defined(USE_SSE2)
EXPERIMENTAL_FEATURE

return fUsingSSE2;
if (capabilities.has_sse2) {
scrypt_1024_1_1_256_sp = &scrypt_sse2::scrypt_1024_1_1_256_sp;
return true;
}
#endif // defined(USE_SSE2)
return false;
}
#endif

void scrypt_1024_1_1_256(const char *input, char *output)
{
Expand Down
30 changes: 12 additions & 18 deletions src/crypto/scrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#ifndef BITCOIN_CRYPTO_SCRYPT_H
#define BITCOIN_CRYPTO_SCRYPT_H
#include "crypto/hwcap.h"
#include <stdlib.h>
#include <stdint.h>

Expand All @@ -14,28 +15,21 @@

static const int SCRYPT_SCRATCHPAD_SIZE = 131072 + 63;

void scrypt_1024_1_1_256(const char *input, char *output);
void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad);

#if defined(USE_SSE2)
#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__))
#define USE_SSE2_ALWAYS 1
#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_sse2((input), (output), (scratchpad))
#else
#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_detected((input), (output), (scratchpad))
#endif

bool scrypt_detect_sse2();
void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad);
extern void (*scrypt_1024_1_1_256_sp_detected)(const char *input, char *output, char *scratchpad);
#else
#define scrypt_1024_1_1_256_sp(input, output, scratchpad) scrypt_1024_1_1_256_sp_generic((input), (output), (scratchpad))
#endif

void
PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen);

namespace scrypt_generic {
void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad);
}

namespace scrypt_sse2 {
void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad);
}

void scrypt_1024_1_1_256(const char *input, char *output);
bool scrypt_select_implementation(const HardwareCapabilities capabilities);

#ifndef __FreeBSD__
static inline uint32_t le32dec(const void *pp)
{
Expand Down
13 changes: 7 additions & 6 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#include "checkpoints.h"
#include "compat/sanity.h"
#include "consensus/validation.h"
#include "crypto/scrypt.h" // for scrypt_detect_sse2
#include "crypto/hwcap.h" // for DetectHWCapabilities
#include "crypto/scrypt.h" // for scrypt_select_implementation
#include "fs.h"
#include "httpserver.h"
#include "httprpc.h"
Expand Down Expand Up @@ -1258,13 +1259,13 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler)

int64_t nStart;

#if defined(USE_SSE2)
if (scrypt_detect_sse2()) {
LogPrintf("scrypt: using SSE2 implementation\n");

HardwareCapabilities capabilities = DetectHWCapabilities();
if (scrypt_select_implementation(capabilities)) {
LogPrintf("scrypt: using SSE2 implementation\n");
} else {
LogPrintf("scrypt: using generic implementation\n");
LogPrintf("scrypt: using generic implementation\n");
}
#endif

// ********************************************************* Step 5: verify wallet database integrity
#ifdef ENABLE_WALLET
Expand Down
16 changes: 10 additions & 6 deletions src/test/scrypt_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <boost/test/unit_test.hpp>

#include "crypto/hwcap.h"
#include "crypto/scrypt.h"
#include "uint256.h"
#include "util.h"
Expand All @@ -13,25 +14,28 @@ BOOST_AUTO_TEST_SUITE(scrypt_tests)

BOOST_AUTO_TEST_CASE(scrypt_hashtest)
{
#if defined(USE_SSE2)
// get cpu capabilities
HardwareCapabilities capabilities = DetectHWCapabilities();
#endif
// Test Scrypt hash with known inputs against expected outputs
#define HASHCOUNT 5
const char* inputhex[HASHCOUNT] = { "020000004c1271c211717198227392b029a64a7971931d351b387bb80db027f270411e398a07046f7d4a08dd815412a8712f874a7ebf0507e3878bd24e20a3b73fd750a667d2f451eac7471b00de6659", "0200000011503ee6a855e900c00cfdd98f5f55fffeaee9b6bf55bea9b852d9de2ce35828e204eef76acfd36949ae56d1fbe81c1ac9c0209e6331ad56414f9072506a77f8c6faf551eac7471b00389d01", "02000000a72c8a177f523946f42f22c3e86b8023221b4105e8007e59e81f6beb013e29aaf635295cb9ac966213fb56e046dc71df5b3f7f67ceaeab24038e743f883aff1aaafaf551eac7471b0166249b", "010000007824bc3a8a1b4628485eee3024abd8626721f7f870f8ad4d2f33a27155167f6a4009d1285049603888fe85a84b6c803a53305a8d497965a5e896e1a00568359589faf551eac7471b0065434e", "0200000050bfd4e4a307a8cb6ef4aef69abc5c0f2d579648bd80d7733e1ccc3fbc90ed664a7f74006cb11bde87785f229ecd366c2d4e44432832580e0608c579e4cb76f383f7f551eac7471b00c36982" };
const char* expected[HASHCOUNT] = { "00000000002bef4107f882f6115e0b01f348d21195dacd3582aa2dabd7985806" , "00000000003a0d11bdd5eb634e08b7feddcfbbf228ed35d250daf19f1c88fc94", "00000000000b40f895f288e13244728a6c2d9d59d8aff29c65f8dd5114a8ca81", "00000000003007005891cd4923031e99d8e8d72f6e8e7edc6a86181897e105fe", "000000000018f0b426a4afc7130ccb47fa02af730d345b4fe7c7724d3800ec8c" };
#if defined(USE_SSE2)
scrypt_detect_sse2();
#endif
uint256 scrypthash;
std::vector<unsigned char> inputbytes;
char scratchpad[SCRYPT_SCRATCHPAD_SIZE];
for (int i = 0; i < HASHCOUNT; i++) {
inputbytes = ParseHex(inputhex[i]);
#if defined(USE_SSE2)
// Test SSE2 scrypt
scrypt_1024_1_1_256_sp_sse2((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad);
BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]);
if (capabilities.has_sse2) {
scrypt_sse2::scrypt_1024_1_1_256_sp((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad);
BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]);
}
#endif
// Test generic scrypt
scrypt_1024_1_1_256_sp_generic((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad);
scrypt_generic::scrypt_1024_1_1_256_sp((const char*)&inputbytes[0], BEGIN(scrypthash), scratchpad);
BOOST_CHECK_EQUAL(scrypthash.ToString().c_str(), expected[i]);
}
}
Expand Down

0 comments on commit 21cbc71

Please sign in to comment.