Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved performance of rz_bv_copy_nbits and rz_bv_set_range #4740

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
159 changes: 147 additions & 12 deletions librz/util/bitvector.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
// SPDX-License-Identifier: LGPL-3.0-only

#include "rz_util.h"
#include <rz_types.h>
#include <stdlib.h>
#include <stdio.h>

#define NELEM(N, ELEMPER) ((N + (ELEMPER)-1) / (ELEMPER))
#define BV_ELEM_SIZE 8U
#define RZ_BV_CHUNK_SIZE (sizeof(ut32) * CHAR_BIT)
#define SIZE_OF_UT32 sizeof(ut32)

// optimization for reversing 8 bits which uses 32 bits
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith32Bits
Expand Down Expand Up @@ -137,7 +140,6 @@ RZ_API RZ_OWN char *rz_bv_as_hex_string(RZ_NONNULL const RzBitVector *bv, bool p
if (!str) {
return NULL;
}

str[0] = '0';
str[1] = 'x';
ut32 j = 2;
Expand Down Expand Up @@ -200,6 +202,79 @@ RZ_API ut32 rz_bv_copy(RZ_NONNULL const RzBitVector *src, RZ_NONNULL RzBitVector
return dst->_elem_len;
}

/**
* Get a 32-bit chunk from the specified position in the bit vector.
* \param bv RzBitVector, the bit vector from which to extract the chunk
* \param chunk_idx ut32, the index of the chunk to retrieve
* \return chunk ut32, the extracted 32-bit chunk
*/
RZ_API ut32 rz_bv_get_chunk(const RzBitVector *bv, ut32 chunk_idx) {
rz_return_val_if_fail(bv, 0); // Ensure the bit vector is not NULL

// Calculate the starting position for the chunk
ut32 chunk_start_pos = chunk_idx * RZ_BV_CHUNK_SIZE;
ut32 word_idx = chunk_start_pos / 32; // Identify the starting word for the chunk

ut32 chunk = 0;
ut32 bit_offset = chunk_start_pos % 32; // Offset within the word

// Extract the 32-bit chunk, considering the word boundary
if (bit_offset == 0) {
// The chunk is aligned to a 32-bit boundary
chunk = bv->data[word_idx];
} else {
// The chunk spans across two 32-bit words
ut32 first_word = bv->data[word_idx];
ut32 second_word = bv->data[word_idx + 1];

// Shift the first word and mask the necessary bits
chunk = first_word >> bit_offset;

// Mask the remaining bits from the second word
chunk |= (second_word << (32 - bit_offset));
}

return chunk; // Return the extracted 32-bit chunk
}

/**
* Set a 32-bit chunk at the specified position in the bit vector.
* \param bv RzBitVector, the bit vector in which to set the chunk
* \param chunk_idx ut32, the index of the chunk to set
* \param chunk ut32, the 32-bit chunk to set
*/
RZ_API void rz_bv_set_chunk(RzBitVector *bv, ut32 chunk_idx, ut32 chunk) {
rz_return_if_fail(bv); // Ensure the bit vector is not NULL

// Calculate the starting position for the chunk
ut32 chunk_start_pos = chunk_idx * RZ_BV_CHUNK_SIZE;
ut32 word_idx = chunk_start_pos / 32; // Identify the starting word for the chunk

ut32 bit_offset = chunk_start_pos % 32; // Offset within the word

// Set the 32-bit chunk, considering the word boundary
if (bit_offset == 0) {
// The chunk is aligned to a 32-bit boundary
bv->data[word_idx] = chunk;
} else {
// The chunk spans across two 32-bit words
ut32 first_word = bv->data[word_idx];
ut32 second_word = bv->data[word_idx + 1];

// Clear the bits in the current chunk positions
first_word &= ~(0xFFFFFFFF >> bit_offset); // Clear the upper bits
second_word &= (0xFFFFFFFF >> (32 - bit_offset)); // Clear the lower bits
Comment on lines +265 to +266
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
first_word &= ~(0xFFFFFFFF >> bit_offset); // Clear the upper bits
second_word &= (0xFFFFFFFF >> (32 - bit_offset)); // Clear the lower bits
first_word &= ~(UT32_MAX >> bit_offset); // Clear the upper bits
second_word &= (UT32_MAX >> (32 - bit_offset)); // Clear the lower bits


// Combine the chunk into the words
first_word |= (chunk << bit_offset);
second_word |= (chunk >> (32 - bit_offset));

// Write the words back to the bit vector
bv->data[word_idx] = first_word;
bv->data[word_idx + 1] = second_word;
}
}

/**
* Copy n bits from start position of source to start position of dest, return num of copied bits
* \param src RzBitVector, data source
Expand All @@ -209,24 +284,60 @@ RZ_API ut32 rz_bv_copy(RZ_NONNULL const RzBitVector *src, RZ_NONNULL RzBitVector
* \param nbit ut32, control the size of copy (in bits)
* \return copied_size ut32, Actual copied size
*/

RZ_API ut32 rz_bv_copy_nbits(RZ_NONNULL const RzBitVector *src, ut32 src_start_pos, RZ_NONNULL RzBitVector *dst, ut32 dst_start_pos, ut32 nbit) {
rz_return_val_if_fail(src && dst, 0);

ut32 max_nbit = RZ_MIN((src->len - src_start_pos),
(dst->len - dst_start_pos));
// Determine the chunk size (word size) dynamically
const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UT32 * CHAR_BIT; // Word size in bits
ut32 max_nbit = RZ_MIN((src->len - src_start_pos), (dst->len - dst_start_pos));

// prevent overflow
if (max_nbit < nbit) {
return 0;
}

// normal case here
for (ut32 i = 0; i < nbit; ++i) {
bool c = rz_bv_get(src, src_start_pos + i);
rz_bv_set(dst, dst_start_pos + i, c);
ut32 nbit_original = nbit;

// Handle unaligned prefix
if (src_start_pos % RZ_BV_CHUNK_SIZE != 0 || dst_start_pos % RZ_BV_CHUNK_SIZE != 0) {
while (nbit > 0) {
bool bit = rz_bv_get(src, src_start_pos++);
rz_bv_set(dst, dst_start_pos++, bit);
--nbit;
}
}

// Process aligned chunks
while (nbit >= RZ_BV_CHUNK_SIZE) {
// Get chunks from the source and destination
ut32 src_chunk = rz_bv_get_chunk(src, src_start_pos / RZ_BV_CHUNK_SIZE);
ut32 dst_chunk = rz_bv_get_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE);

// Create a mask for the bits to copy
ut32 mask = UT32_MAX;
if (nbit < RZ_BV_CHUNK_SIZE) {
mask = (1UL << nbit) - 1;
}

// Merge chunks using the optimized approach , reference : https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
ut32 result = dst_chunk ^ ((dst_chunk ^ src_chunk) & mask);
rz_bv_set_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE, result);

src_start_pos += RZ_BV_CHUNK_SIZE;
dst_start_pos += RZ_BV_CHUNK_SIZE;
if (nbit < RV_BV_CHUNK_SIZE) {
break;
}
nbit -= RZ_BV_CHUNK_SIZE;
}

return nbit;
// Handle remaining unaligned suffix bits
while (nbit > 0) {
bool bit = rz_bv_get(src, src_start_pos++);
rz_bv_set(dst, dst_start_pos++, bit);
--nbit;
}
return nbit_original;
}

/**
Expand Down Expand Up @@ -1481,12 +1592,36 @@ RZ_API ut64 rz_bv_to_ut64(RZ_NONNULL const RzBitVector *x) {
*/
RZ_API bool rz_bv_set_range(RZ_NONNULL RzBitVector *bv, ut32 pos_start, ut32 pos_end, bool b) {
rz_return_val_if_fail(bv, false);
if (pos_start > bv->len - 1 || pos_end > bv->len - 1) {

if (pos_start > bv->len - 1 || pos_end > bv->len - 1 || pos_start > pos_end) {
return false;
}

for (ut32 i = pos_start; i <= pos_end; ++i) {
rz_bv_set(bv, i, b);
// Determine the chunk size dynamically
const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UT32 * CHAR_BIT;

// Handle unaligned prefix bits
while (pos_start < pos_end && pos_start % RZ_BV_CHUNK_SIZE != 0) {
rz_bv_set(bv, pos_start++, b);
}

// Process aligned chunks
if (pos_start < pos_end) {
ut32 chunk_start = pos_start / RZ_BV_CHUNK_SIZE;
ut32 chunk_end = pos_end / RZ_BV_CHUNK_SIZE;

ut32 fill_value = b ? ~0UL : 0UL;

for (ut32 i = chunk_start; i < chunk_end; ++i) {
rz_bv_set_chunk(bv, i, fill_value);
}

pos_start = chunk_end * RZ_BV_CHUNK_SIZE;
}

// Handle remaining unaligned suffix bits
while (pos_start <= pos_end) {
rz_bv_set(bv, pos_start++, b);
}

return true;
Expand Down
Loading