Skip to content

Commit

Permalink
[enhancement](memory) Add PODArray UT and pick some patch
Browse files Browse the repository at this point in the history
  • Loading branch information
xinyiZzz committed May 30, 2024
1 parent 9932b6e commit 610418d
Show file tree
Hide file tree
Showing 3 changed files with 674 additions and 22 deletions.
62 changes: 44 additions & 18 deletions be/src/vec/common/pod_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,19 @@ class PODArrayBase : private boost::noncopyable,
char* c_end_of_storage = null; /// Does not include pad_right.

/// The amount of memory occupied by the num_elements of the elements.
static size_t byte_size(size_t num_elements) { return num_elements * ELEMENT_SIZE; }
static size_t byte_size(size_t num_elements) {
#ifndef NDEBUG
size_t amount;
if (__builtin_mul_overflow(num_elements, ELEMENT_SIZE, &amount)) {
DCHECK(false)
<< "Amount of memory requested to allocate is more than allowed, num_elements "
<< num_elements << ", ELEMENT_SIZE " << ELEMENT_SIZE;
}
return amount;
#else
return num_elements * ELEMENT_SIZE;
#endif
}

/// Minimum amount of memory to allocate for num_elements, including padding.
static size_t minimum_memory_for_elements(size_t num_elements) {
Expand Down Expand Up @@ -275,6 +287,19 @@ class PODArrayBase : private boost::noncopyable,
#endif
}

template <typename It1, typename It2>
void assert_not_intersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) {
#ifndef NDEBUG
const char* ptr_begin = reinterpret_cast<const char*>(&*from_begin);
const char* ptr_end = reinterpret_cast<const char*>(&*from_end);

/// Also it's safe if the range is empty.
assert(!((ptr_begin >= c_start && ptr_begin < c_end) ||
(ptr_end > c_start && ptr_end <= c_end)) ||
(ptr_begin == ptr_end));
#endif
}

~PODArrayBase() { dealloc(); }
};

Expand All @@ -296,8 +321,8 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r
/// We cannot use boost::iterator_adaptor, because it defeats loop vectorization,
/// see https://github.com/ClickHouse/ClickHouse/pull/9442

using iterator = T *;
using const_iterator = const T *;
using iterator = T*;
using const_iterator = const T*;

PODArray() = default;

Expand Down Expand Up @@ -433,6 +458,7 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r
/// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
template <typename It1, typename It2, typename... TAllocatorParams>
void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) {
this->assert_not_intersects(from_begin, from_end);
size_t required_capacity = this->size() + (from_end - from_begin);
if (required_capacity > this->capacity())
this->reserve(round_up_to_power_of_two_or_zero(required_capacity),
Expand Down Expand Up @@ -461,14 +487,17 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r

template <typename It1, typename It2>
void insert(iterator it, It1 from_begin, It2 from_end) {
insert_prepare(from_begin, from_end);

size_t bytes_to_copy = this->byte_size(from_end - from_begin);
size_t bytes_to_move = (end() - it) * sizeof(T);
if (!bytes_to_copy) {
return;
}
size_t bytes_to_move = this->byte_size(end() - it);
insert_prepare(from_begin, from_end);

if (UNLIKELY(bytes_to_move))
memcpy(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move,
bytes_to_move);
if (UNLIKELY(bytes_to_move)) {
memmove(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move,
bytes_to_move);
}

memcpy(this->c_end - bytes_to_move, reinterpret_cast<const void*>(&*from_begin),
bytes_to_copy);
Expand All @@ -477,6 +506,7 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r

template <typename It1, typename It2>
void insert_assume_reserved(It1 from_begin, It2 from_end) {
this->assert_not_intersects(from_begin, from_end);
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
memcpy(this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy);
this->c_end += bytes_to_copy;
Expand Down Expand Up @@ -593,6 +623,7 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r

template <typename It1, typename It2>
void assign(It1 from_begin, It2 from_end) {
this->assert_not_intersects(from_begin, from_end);
size_t required_capacity = from_end - from_begin;
if (required_capacity > this->capacity())
this->reserve(round_up_to_power_of_two_or_zero(required_capacity));
Expand All @@ -604,15 +635,13 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r

void assign(const PODArray& from) { assign(from.begin(), from.end()); }

void erase(const_iterator first, const_iterator last)
{
void erase(const_iterator first, const_iterator last) {
auto first_no_const = const_cast<iterator>(first);
auto last_no_const = const_cast<iterator>(last);

size_t items_to_move = end() - last;

while (items_to_move != 0)
{
while (items_to_move != 0) {
*first_no_const = *last_no_const;

++first_no_const;
Expand All @@ -621,13 +650,10 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r
--items_to_move;
}

this->c_end = reinterpret_cast<char *>(first_no_const);
this->c_end = reinterpret_cast<char*>(first_no_const);
}

void erase(const_iterator pos)
{
this->erase(pos, pos + 1);
}
void erase(const_iterator pos) { this->erase(pos, pos + 1); }

bool operator==(const PODArray& rhs) const {
if (this->size() != rhs.size()) {
Expand Down
8 changes: 4 additions & 4 deletions be/src/vec/common/pod_array_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocat
size_t pad_right_ = 0, size_t pad_left_ = 0>
class PODArray;

/** For columns. Padding is enough to read and write xmm-register at the address of the last element.
* TODO, pad_right is temporarily changed from 15 to 16, will waste 1 bytes,
* can rollback after fix wrong reinterpret_cast column and PODArray swap.
/** For columns. Padding is enough to read and write xmm-register at the address of the last element.
* TODO, Adapt internal data structures to 512-bit era https://github.com/ClickHouse/ClickHouse/pull/42564
* Padding in internal data structures increased to 64 bytes., support AVX-512 simd.
*/
template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 16, 16>;
using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 16, 15>;

/** A helper for declaring PODArray that uses inline memory.
* The initial size is set to use all the inline bytes, since using less would
Expand Down
Loading

0 comments on commit 610418d

Please sign in to comment.