Skip to content

Commit

Permalink
Support SIMD
Browse files Browse the repository at this point in the history
  • Loading branch information
frozenca committed Jul 31, 2022
1 parent debd48b commit 3498a53
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 50 deletions.
144 changes: 99 additions & 45 deletions fc_btree.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#ifndef __FC_BTREE_H__
#define __FC_BTREE_H__

#define FC_USE_SIMD 0
#define FC_PREFER_BINARY_SEARCH 0

#if FC_USE_SIMD
#include "fc_comp.h"
#endif // FC_USE_SIMD
#include <algorithm>
#include <array>
#include <cassert>
Expand All @@ -12,6 +17,7 @@
#include <initializer_list>
#include <iostream>
#include <iterator>
#include <limits>
#include <memory>
#include <ranges>
#include <span>
Expand All @@ -20,8 +26,6 @@
#include <utility>
#include <vector>

#define FC_PREFER_BINARY_SEARCH 0

namespace frozenca {

template <typename T>
Expand Down Expand Up @@ -137,7 +141,7 @@ requires(Fanout >= 2) class BTreeBase {
Deleter(const Alloc &alloc) : alloc_{alloc} {}

template <typename T> void operator()(T *node) noexcept {
alloc_.deallocate(node, sizeof(T));
alloc_.deallocate(node, 1);
}
};

Expand All @@ -146,8 +150,7 @@ requires(Fanout >= 2) class BTreeBase {

static constexpr bool is_disk_ = DiskAllocable<V>;

static constexpr auto disk_max_nkeys =
static_cast<std::size_t>(2 * Fanout - 1);
static constexpr auto disk_max_nkeys = static_cast<std::size_t>(2 * Fanout);

static constexpr bool use_linsearch_ =
#if FC_PREFER_BINARY_SEARCH
Expand All @@ -156,6 +159,20 @@ requires(Fanout >= 2) class BTreeBase {
std::is_arithmetic_v<K> && (Fanout <= 128);
#endif // FC_PREFER_BINARY_SEARCH

static constexpr bool CompIsLess = std::is_same_v<Comp, std::ranges::less> ||
std::is_same_v<Comp, std::less<K>>;
static constexpr bool CompIsGreater =
std::is_same_v<Comp, std::ranges::greater> ||
std::is_same_v<Comp, std::greater<K>>;

static constexpr bool use_simd_ =
#if FC_USE_SIMD
is_set_ && CanUseSimd<K> && (Fanout % (sizeof(K) == 4 ? 8 : 4) == 0) &&
(Fanout <= 128) && (CompIsLess || CompIsGreater);
#else
false;
#endif // FC_USE_SIMD

struct alignas(64) Node {
using keys_type =
std::conditional_t<is_disk_, std::array<V, disk_max_nkeys>,
Expand All @@ -168,21 +185,25 @@ requires(Fanout >= 2) class BTreeBase {
// invariant: for root, 0 <= #(child) == (#(key) + 1)) <= 2 * t
// invariant: for leaves, 0 == #(child)
// invariant: child_0 <= key_0 <= child_1 <= ... <= key_(N - 1) <= child_N
keys_type keys_;
Node *parent_ = nullptr;
attr_t size_ = 0; // number of keys in the subtree (not keys in this node)
attr_t index_ = 0;
attr_t height_ = 0;
attr_t num_keys_ =
0; // number of keys in this node, used only for disk variant
keys_type keys_;
std::vector<std::conditional_t<is_disk_, std::unique_ptr<Node, Deleter>,
std::unique_ptr<Node>>>
children_;

// can throw bad_alloc
Node() requires(is_disk_) {}
Node() { keys_.reserve(disk_max_nkeys); }

Node() requires(!is_disk_) { keys_.reserve(disk_max_nkeys); }
// can throw bad_alloc
Node() requires(is_disk_) {
if constexpr (use_simd_) {
keys_.fill(std::numeric_limits<K>::max());
}
}

Node(const Node &node) = delete;
Node &operator=(const Node &node) = delete;
Expand Down Expand Up @@ -653,6 +674,9 @@ requires(Fanout >= 2) class BTreeBase {
std::memmove(sibling->keys_.data(), sibling->keys_.data() + 1,
(sibling->num_keys_ - 1) * sizeof(V));
sibling->num_keys_--;
if constexpr (use_simd_) {
sibling->keys_[sibling->num_keys_] = std::numeric_limits<K>::max();
}
} else {
node->keys_.push_back(std::move(parent->keys_[node->index_]));
parent->keys_[node->index_] = std::move(sibling->keys_.front());
Expand Down Expand Up @@ -706,6 +730,12 @@ requires(Fanout >= 2) class BTreeBase {
std::memmove(sibling->keys_.data(), sibling->keys_.data() + n,
(sibling->num_keys_ - n) * sizeof(V));
sibling->num_keys_ -= n;
if constexpr (use_simd_) {
for (attr_t k = 0; k < n; ++k) {
sibling->keys_[sibling->num_keys_ + k] =
std::numeric_limits<K>::max();
}
}
} else {
// brings one key from parent
node->keys_.push_back(std::move(parent->keys_[node->index_]));
Expand Down Expand Up @@ -763,6 +793,9 @@ requires(Fanout >= 2) class BTreeBase {
node->keys_[0] = parent->keys_[node->index_ - 1];
parent->keys_[node->index_ - 1] = sibling->keys_[sibling->num_keys_ - 1];
sibling->num_keys_--;
if constexpr (use_simd_) {
sibling->keys_[sibling->num_keys_] = std::numeric_limits<K>::max();
}
} else {
node->keys_.insert(node->keys_.begin(),
std::move(parent->keys_[node->index_ - 1]));
Expand Down Expand Up @@ -818,6 +851,9 @@ requires(Fanout >= 2) class BTreeBase {
std::make_reverse_iterator(node->keys_.begin() + node->num_keys_ - n),
node->keys_.rend());
sibling->num_keys_ -= n;
if constexpr (use_simd_) {
sibling->keys_[sibling->num_keys_] = std::numeric_limits<K>::max();
}
} else {
// brings n - 1 keys from sibling
std::ranges::move(sibling->keys_ |
Expand Down Expand Up @@ -867,35 +903,50 @@ requires(Fanout >= 2) class BTreeBase {
}
}

auto get_lb_location(const K &key, const V *first,
const V *last) const noexcept {
auto lbcomp = [&key](const V &other) { return Comp{}(Proj{}(other), key); };
if constexpr (use_linsearch_) {
auto get_lb(const K &key, const Node *x) const noexcept {
if constexpr (use_simd_) {
return get_lb_simd<K, CompIsLess>(key, x->keys_.data(),
x->keys_.data() + 2 * Fanout);
} else if constexpr (use_linsearch_) {
auto lbcomp = [&key](const V &other) {
return Comp{}(Proj{}(other), key);
};
return std::distance(
first, std::ranges::find_if_not(first, last, lbcomp, Proj{}));
x->keys_.begin(),
std::ranges::find_if_not(
x->keys_.begin(), x->keys_.begin() + x->nkeys(), lbcomp, Proj{}));
} else {
return std::distance(
first, std::ranges::lower_bound(first, last, key, Comp{}, Proj{}));
}
}

auto get_ub_location(const K &key, const V *first,
const V *last) const noexcept {
auto ubcomp = [&key](const V &other) { return Comp{}(key, Proj{}(other)); };
if constexpr (use_linsearch_) {
return std::distance(first,
std::ranges::find_if(first, last, ubcomp, Proj{}));
return std::distance(x->keys_.begin(),
std::ranges::lower_bound(
x->keys_.begin(), x->keys_.begin() + x->nkeys(),
key, Comp{}, Proj{}));
}
}

auto get_ub(const K &key, const Node *x) const noexcept {
if constexpr (use_simd_) {
return get_ub_simd<K, CompIsLess>(key, x->keys_.data(),
x->keys_.data() + 2 * Fanout);
} else if constexpr (use_linsearch_) {
auto ubcomp = [&key](const V &other) {
return Comp{}(key, Proj{}(other));
};
return std::distance(x->keys_.begin(),
std::ranges::find_if(x->keys_.begin(),
x->keys_.begin() + x->nkeys(),
ubcomp, Proj{}));
} else {
return std::distance(
first, std::ranges::upper_bound(first, last, key, Comp{}, Proj{}));
return std::distance(x->keys_.begin(),
std::ranges::upper_bound(
x->keys_.begin(), x->keys_.begin() + x->nkeys(),
key, Comp{}, Proj{}));
}
}

const_iterator_type search(const K &key) const {
auto x = root_.get();
while (x) {
auto i =
get_lb_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_lb(key, x);
if (i < x->nkeys() && key == Proj{}(x->keys_[i])) { // equal? key found
return const_iterator_type(x, static_cast<attr_t>(i));
} else if (x->is_leaf()) { // no child, key is not in the tree
Expand All @@ -910,8 +961,7 @@ requires(Fanout >= 2) class BTreeBase {
nonconst_iterator_type find_lower_bound(const K &key, bool climb = true) {
auto x = root_.get();
while (x) {
auto i =
get_lb_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_lb(key, x);
if (x->is_leaf()) {
auto it = nonconst_iterator_type(x, static_cast<attr_t>(i));
if (climb) {
Expand All @@ -928,8 +978,7 @@ requires(Fanout >= 2) class BTreeBase {
const_iterator_type find_lower_bound(const K &key, bool climb = true) const {
auto x = root_.get();
while (x) {
auto i =
get_lb_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_lb(key, x);
if (x->is_leaf()) {
auto it = const_iterator_type(x, static_cast<attr_t>(i));
if (climb) {
Expand All @@ -946,8 +995,7 @@ requires(Fanout >= 2) class BTreeBase {
nonconst_iterator_type find_upper_bound(const K &key, bool climb = true) {
auto x = root_.get();
while (x) {
auto i =
get_ub_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_ub(key, x);
if (x->is_leaf()) {
auto it = nonconst_iterator_type(x, static_cast<attr_t>(i));
if (climb) {
Expand All @@ -964,8 +1012,7 @@ requires(Fanout >= 2) class BTreeBase {
const_iterator_type find_upper_bound(const K &key, bool climb = true) const {
auto x = root_.get();
while (x) {
auto i =
get_ub_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_ub(key, x);
if (x->is_leaf()) {
auto it = const_iterator_type(x, static_cast<attr_t>(i));
if (climb) {
Expand Down Expand Up @@ -1034,6 +1081,11 @@ requires(Fanout >= 2) class BTreeBase {
x->num_keys_++;
x->keys_[i] = y->keys_[Fanout - 1];
y->num_keys_ = Fanout - 1;
if constexpr (use_simd_) {
for (attr_t k = Fanout - 1; k < 2 * Fanout; ++k) {
y->keys_[k] = std::numeric_limits<K>::max();
}
}
} else {
x->keys_.insert(x->keys_.begin() + i, std::move(y->keys_[Fanout - 1]));
y->keys_.resize(Fanout - 1);
Expand Down Expand Up @@ -1083,6 +1135,9 @@ requires(Fanout >= 2) class BTreeBase {
std::memmove(x->keys_.data() + i, x->keys_.data() + i + 1,
(x->num_keys_ - (i + 1)) * sizeof(V));
x->num_keys_--;
if constexpr (use_simd_) {
x->keys_[x->num_keys_] = std::numeric_limits<K>::max();
}
} else {
// shift keys from i left by 1 (because key[i] is merged)
std::shift_left(x->keys_.begin() + i, x->keys_.end(), 1);
Expand Down Expand Up @@ -1175,8 +1230,7 @@ requires(Fanout >= 2) class BTreeBase {
AllowDup &&std::is_same_v<std::remove_cvref_t<T>, V>) {
auto x = root_.get();
while (true) {
auto i = get_ub_location(Proj{}(key), x->keys_.data(),
x->keys_.data() + x->nkeys());
auto i = get_ub(Proj{}(key), x);
if (x->is_leaf()) {
return insert_leaf(x, static_cast<attr_t>(i), std::forward<T>(key));
} else {
Expand All @@ -1197,8 +1251,7 @@ requires(Fanout >= 2) class BTreeBase {
std::is_same_v<std::remove_cvref_t<T>, V>) {
auto x = root_.get();
while (true) {
auto i = get_lb_location(Proj{}(key), x->keys_.data(),
x->keys_.data() + x->nkeys());
auto i = get_lb(Proj{}(key), x);
if (i < x->nkeys() && Proj{}(key) == Proj{}(x->keys_[i])) {
return {iterator_type(x, static_cast<attr_t>(i)), false};
} else if (x->is_leaf()) {
Expand Down Expand Up @@ -1226,6 +1279,9 @@ requires(Fanout >= 2) class BTreeBase {
std::memmove(node->keys_.data() + i, node->keys_.data() + i + 1,
(node->num_keys_ - (i + 1)) * sizeof(V));
node->num_keys_--;
if constexpr (use_simd_) {
node->keys_[node->num_keys_] = std::numeric_limits<K>::max();
}
} else {
std::shift_left(node->keys_.begin() + i, node->keys_.end(), 1);
node->keys_.pop_back();
Expand All @@ -1246,8 +1302,7 @@ requires(Fanout >= 2) class BTreeBase {

size_t erase_lb(Node *x, const K &key) requires(!AllowDup) {
while (true) {
auto i =
get_lb_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_lb(key, x);
if (i < x->nkeys() && key == Proj{}(x->keys_[i])) {
// key found
assert(x->is_leaf() || i + 1 < std::ssize(x->children_));
Expand Down Expand Up @@ -1602,8 +1657,7 @@ requires(Fanout >= 2) class BTreeBase {
K key{std::forward<T>(raw_key)};
auto x = root_.get();
while (true) {
auto i =
get_lb_location(key, x->keys_.data(), x->keys_.data() + x->nkeys());
auto i = get_lb(key, x);
if (i < x->nkeys() && key == Proj{}(x->keys_[i])) {
return iterator_type(x, static_cast<attr_t>(i))->second;
} else if (x->is_leaf()) {
Expand Down
3 changes: 2 additions & 1 deletion fc_disk_fixed_alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ template <typename T> class AllocatorFixed {
}

void deallocate(T *ptr, size_t n) {
mem_res_->deallocate(reinterpret_cast<void *>(ptr), sizeof(T) * n);
mem_res_->deallocate(reinterpret_cast<void *>(ptr), sizeof(T) * n,
std::alignment_of_v<T>);
}

[[nodiscard]] std::pmr::memory_resource *
Expand Down
23 changes: 19 additions & 4 deletions test/perftest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,23 +128,38 @@ int main() {

std::cout << "Balanced tree test\n";
{
std::set<std::int64_t> rbtree;
fc::BTreeSet<std::int64_t> btree;
// warm up for benchmark
tree_perf_test(rbtree, true);
tree_perf_test(btree, true);
}
std::cout << "Warming up complete...\n";

{
std::cout << "frozenca::BTreeSet test\n";
std::cout << "frozenca::BTreeSet test (fanout 64 - default)\n";
fc::BTreeSet<std::int64_t> btree;
tree_perf_test(btree);
}
{
std::cout << "frozenca::DiskBTreeSet test\n";
std::cout << "frozenca::BTreeSet test (fanout 96)\n";
fc::BTreeSet<std::int64_t, 96> btree;
tree_perf_test(btree);
}
{
std::cout << "frozenca::DiskBTreeSet test (fanout 128)\n";
fc::DiskBTreeSet<std::int64_t, 128> btree("database.bin", 1UL << 25UL,
true);
tree_perf_test(btree);
}
{
std::cout << "frozenca::BTreeSet test (fanout 128)\n";
fc::BTreeSet<std::int64_t, 128> btree;
tree_perf_test(btree);
}
{
std::cout << "frozenca::BTreeSet test (don't use SIMD) \n";
fc::BTreeSet<std::uint64_t> btree;
tree_perf_test(btree);
}
{
std::cout << "std::set test\n";
std::set<std::int64_t> rbtree;
Expand Down

0 comments on commit 3498a53

Please sign in to comment.