Skip to content

Commit

Permalink
[Arm64] lj_new_str() crc32 optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
debayang committed May 7, 2018
1 parent 10aeff6 commit 3801d5c
Show file tree
Hide file tree
Showing 3 changed files with 277 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
CCOPT_x64=
CCOPT_arm=
CCOPT_arm64=
CCOPT_arm64= -march=armv8-a+crc
CCOPT_ppc=
CCOPT_mips=
#
Expand Down
271 changes: 271 additions & 0 deletions src/arm64/src/lj_str_hash_arm64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/*
* This file defines string hash function using CRC32. It takes advantage of
* Arm64 hardware support (crc32 instruction) to speedup the CRC32
* computation. The hash functions try to compute CRC32 of length and up
* to 128 bytes of given string.
*/

#ifndef _LJ_STR_HASH_ARM64_H_
#define _LJ_STR_HASH_ARM64_H_

#if defined(__aarch64__) && defined(__GNUC__)

#include <stdint.h>
#include <sys/types.h>
#include <unistd.h>
#include <time.h>
#include <sys/auxv.h>
#include <stdio.h>
#include <arm_acle.h>

#include "../../lj_def.h"

#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
#endif /* HWCAP for crc32 */

#ifndef LJ_AINLINE
#define LJ_AINLINE inline __attribute__((always_inline))
#endif

#ifdef __MINGW32__
#define random() ((long) rand())
#define srandom(seed) srand(seed)
#endif

extern uint32_t lj_str_original_hash(const char *str, size_t lenx);
static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len);
/* lj_str hash function determined at runtime */
typedef uint32_t (*lj_str_hash_func)(const char *str, size_t lenx);
lj_str_hash_func LJ_STR_HASH;

static const uint64_t* cast_uint64p(const char* str)
{
return (const uint64_t*)(void*)str;
}

static const uint32_t* cast_uint32p(const char* str)
{
return (const uint32_t*)(void*)str;
}

static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len)
{
uint32_t v = str[0], h = 0;
v = (v << 8) | str[len >> 1];
v = (v << 8) | str[len - 1];
v = (v << 8) | len;
return __crc32cw(h, v);
}

static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, size_t len)
{
uint64_t v1, v2, h = 0;

if (len >= 8) {
v1 = *cast_uint64p(str);
v2 = *cast_uint64p(str + len - 8);
} else {
v1 = *cast_uint32p(str);
v2 = *cast_uint32p(str + len - 4);
}

h = __crc32cw(h, len);
h = __crc32cd(h, v1);
h = __crc32cd(h, v2);

return h;
}

static LJ_AINLINE uint32_t lj_str_hash_16_128(const char* str, size_t len)
{
uint64_t h1 = 0, h2 = 0;
uint32_t i;

h1 = __crc32cw(h1, len);

for (i = 0; i < len - 16; i += 16) {
h1 += __crc32cd(h1, *cast_uint64p(str + i));
h2 += __crc32cd(h2, *cast_uint64p(str + i + 8));
}

h1 = __crc32cd(h1, *cast_uint64p(str + len - 16));
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));

return __crc32cw(h1, h2);
}

/* **************************************************************************
*
* Following is code about hashing string with length >= 128
*
* **************************************************************************
*/

static uint32_t random_pos[32][2];
static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };

/* return floor(log2(n)) */
static LJ_AINLINE uint32_t log2_floor(uint32_t n)
{
if (n <= 127) {
return log2_tab[n];
}

if ((n >> 8) <= 127) {
return log2_tab[n >> 8] + 8;
}

if ((n >> 16) <= 127) {
return log2_tab[n >> 16] + 16;
}

if ((n >> 24) <= 127) {
return log2_tab[n >> 24] + 24;
}

return 31;
}

#define POW2_MASK(n) ((1L << (n)) - 1)
/* This function is to populate `random_pos` such that random_pos[i][*]
* contains random value in the range of [2**i, 2**(i+1)).
*/
static void arm64_init_random(void)
{
int i, seed, rml;

/* Calculate the ceil(log2(RAND_MAX)) */
rml = log2_floor(RAND_MAX);
if (RAND_MAX & (RAND_MAX - 1)) {
rml += 1;
}

/* Init seed */
seed = 0;
seed = __crc32cw(seed, getpid());
seed = __crc32cw(seed, time(NULL));
srandom(seed);

/* Now start to populate the random_pos[][]. */
for (i = 0; i < 3; i++) {
/* No need to provide random value for chunk smaller than 8 bytes */
random_pos[i][0] = random_pos[i][1] = 0;
}

for (; i < rml; i++) {
random_pos[i][0] = random() & POW2_MASK(i+1);
random_pos[i][1] = random() & POW2_MASK(i+1);
}

for (; i < 31; i++) {
int j;
for (j = 0; j < 2; j++) {
uint32_t v, scale;
scale = random_pos[i - rml][0];
if (scale == 0) {
scale = 1;
}
v = (random() * scale) & POW2_MASK(i+1);
random_pos[i][j] = v;
}
}
}
#undef POW2_MASK

void __attribute__((constructor)) arm64_init_constructor()
{
// Check if crc32 supported.
unsigned long hwcap;
hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_CRC32) {
LJ_STR_HASH = lj_str_hash;
}
else {
LJ_STR_HASH = lj_str_original_hash;
}

// init random
arm64_init_random();
}

/* Return a pre-computed random number in the range of [1**chunk_sz_order,
* 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
* may be greater than chunk-size; it is up to the caller to make sure
* "chunk-base + return-value-of-this-func" has valid virtual address.
*/
static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
uint32_t idx)
{
uint32_t pos = random_pos[chunk_sz_order][idx & 1];
return pos;
}

static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
uint32_t len)
{
uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
uint32_t h1, h2, v;
const char* chunk_ptr;

chunk_num = 16;
chunk_sz = len / chunk_num;
chunk_sz_log2 = log2_floor(chunk_sz);

pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);

h1 = 0;
h1 = __crc32cw(h1, len);
h2 = 0;

/* loop over 14 chunks, 2 chunks at a time */
for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
chunk_ptr += chunk_sz, i++) {

v = *cast_uint64p(chunk_ptr + pos1);
h1 = __crc32cd(h1, v);

v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
h2 = __crc32cd(h2, v);
}

/* the last two chunks */
v = *cast_uint64p(chunk_ptr + pos1);
h1 = __crc32cd(h1, v);

v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
h2 = __crc32cd(h2, v);

/* process the trailing part */
h1 = __crc32cd(h1, *cast_uint64p(str));
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));

h1 = __crc32cw(h1, h2);
return h1;
}


/* NOTE: the "len" should not be zero */
static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
{
if (len < 128) {
if (len >= 16) {
return lj_str_hash_16_128(str, len);
}

if ((len >= 4) && (len < 16)) {
return lj_str_hash_4_16(str, len);
}

return lj_str_hash_1_4(str, len);
}
return lj_str_hash_128_above(str, len);
}

#endif // defined(__aarch64__)
#endif // _LJ_STR_HASH_ARM64_H_
5 changes: 5 additions & 0 deletions src/lj_str.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,18 @@ lj_str_indep_hash(GCstr *str) {
return lj_str_original_hash(strdata(str), str->len);
}

#if defined(__aarch64__)
/* AArch64 CRC32 support determined at runtime */
#include "arm64/src/lj_str_hash_arm64.h"
#else /* x64 */
#include "x64/src/lj_str_hash_x64.h"

#if defined(LJ_ARCH_STR_HASH)
#define LJ_STR_HASH LJ_ARCH_STR_HASH
#else
#define LJ_STR_HASH lj_str_original_hash
#endif
#endif

/* Intern a string and return string object. */
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
Expand Down

0 comments on commit 3801d5c

Please sign in to comment.