-
Notifications
You must be signed in to change notification settings - Fork 1
/
btree.h
371 lines (291 loc) · 9.71 KB
/
btree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
// Copyright (C) 2011 Red Hat, Inc. All rights reserved.
//
// This file is part of the thin-provisioning-tools source.
//
// thin-provisioning-tools is free software: you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// thin-provisioning-tools is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with thin-provisioning-tools. If not, see
// <http://www.gnu.org/licenses/>.
#ifndef BTREE_H
#define BTREE_H
#include "endian_utils.h"
#include "transaction_manager.h"
#include <boost/noncopyable.hpp>
#include <boost/optional.hpp>
#include <list>
//----------------------------------------------------------------
namespace persistent_data {
template <typename ValueType>
class NoOpRefCounter {
public:
void inc(ValueType const &v) {}
void dec(ValueType const &v) {}
};
struct uint64_traits {
typedef base::__le64 disk_type;
typedef uint64_t value_type;
typedef NoOpRefCounter<uint64_t> ref_counter;
static void unpack(disk_type const &disk, value_type &value) {
value = base::to_cpu<uint64_t>(disk);
}
static void pack(value_type const &value, disk_type &disk) {
disk = base::to_disk<base::__le64>(value);
}
};
namespace btree_detail {
using namespace base;
using namespace std;
using namespace boost;
uint32_t const BTREE_CSUM_XOR = 121107;
//------------------------------------------------
// On disk data layout for btree nodes
enum node_flags {
INTERNAL_NODE = 1,
LEAF_NODE = 1 << 1
};
struct node_header {
__le32 csum;
__le32 flags;
__le64 blocknr; /* which block this node is supposed to live in */
__le32 nr_entries;
__le32 max_entries;
__le32 value_size;
__le32 padding;
} __attribute__((packed));
struct disk_node {
struct node_header header;
__le64 keys[0];
} __attribute__((packed));
enum node_type {
INTERNAL,
LEAF
};
//------------------------------------------------
// Class that acts as an interface over the raw little endian btree
// node data.
template <typename ValueTraits>
class node_ref {
public:
explicit node_ref(block_address b, disk_node *raw);
uint32_t get_checksum() const;
block_address get_location() const {
return location_;
}
block_address get_block_nr() const;
node_type get_type() const;
void set_type(node_type t);
unsigned get_nr_entries() const;
void set_nr_entries(unsigned n);
unsigned get_max_entries() const;
void set_max_entries(unsigned n);
// FIXME: remove this, and get the constructor to do it.
void set_max_entries(); // calculates the max for you.
size_t get_value_size() const;
void set_value_size(size_t);
uint64_t key_at(unsigned i) const;
void set_key(unsigned i, uint64_t k);
typename ValueTraits::value_type value_at(unsigned i) const;
void set_value(unsigned i,
typename ValueTraits::value_type const &v);
// Increments the nr_entries field
void insert_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v);
// Does not increment nr_entries
void overwrite_at(unsigned i,
uint64_t key,
typename ValueTraits::value_type const &v);
// Copies entries from another node, appends them
// to the back of this node. Adjusts nr_entries.
void copy_entries(node_ref const &rhs,
unsigned begin,
unsigned end);
// Various searches
int bsearch(uint64_t key, int want_hi) const;
optional<unsigned> exact_search(uint64_t key) const;
int lower_bound(uint64_t key) const;
template <typename RefCounter>
void inc_children(RefCounter &rc);
disk_node *raw() {
return raw_;
}
disk_node const *raw() const {
return raw_;
}
private:
static unsigned calc_max_entries(void);
void *key_ptr(unsigned i) const;
void *value_ptr(unsigned i) const;
block_address location_;
disk_node *raw_;
};
//------------------------------------------------
//
template <typename ValueTraits>
node_ref<ValueTraits>
to_node(typename block_manager<>::read_ref &b)
{
// FIXME: this should return a const read_ref somehow.
return node_ref<ValueTraits>(
b.get_location(),
reinterpret_cast<disk_node *>(
const_cast<unsigned char *>(b.data().raw())));
}
template <typename ValueTraits>
node_ref<ValueTraits>
to_node(typename block_manager<>::write_ref &b)
{
return node_ref<ValueTraits>(
b.get_location(),
reinterpret_cast<disk_node *>(
const_cast<unsigned char *>(b.data().raw())));
}
class ro_spine : private noncopyable {
public:
ro_spine(transaction_manager::ptr tm)
: tm_(tm) {
}
void step(block_address b);
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(spine_.back());
}
private:
transaction_manager::ptr tm_;
std::list<block_manager<>::read_ref> spine_;
};
class shadow_spine : private noncopyable {
public:
typedef transaction_manager::read_ref read_ref;
typedef transaction_manager::write_ref write_ref;
shadow_spine(transaction_manager::ptr tm)
: tm_(tm) {
}
// true if the children of the shadow need incrementing
bool step(block_address b);
void step(transaction_manager::write_ref b) {
spine_.push_back(b);
if (spine_.size() == 1)
root_ = spine_.front().get_location();
else if (spine_.size() > 2)
spine_.pop_front();
}
void pop() {
spine_.pop_back();
}
template <typename ValueTraits>
node_ref<ValueTraits> get_node() {
return to_node<ValueTraits>(spine_.back());
}
block_address get_block() const {
return spine_.back().get_location();
}
bool has_parent() const {
return spine_.size() > 1;
}
node_ref<uint64_traits> get_parent() {
if (spine_.size() < 2)
throw std::runtime_error("no parent");
return to_node<uint64_traits>(spine_.front());
}
block_address get_parent_location() const {
return spine_.front().get_location();
}
block_address get_root() const {
return root_;
}
private:
transaction_manager::ptr tm_;
std::list<block_manager<>::write_ref> spine_;
block_address root_;
};
}
template <unsigned Levels, typename ValueTraits>
class btree {
public:
typedef boost::shared_ptr<btree<Levels, ValueTraits> > ptr;
typedef uint64_t key[Levels];
typedef typename ValueTraits::value_type value_type;
typedef boost::optional<value_type> maybe_value;
typedef boost::optional<std::pair<unsigned, value_type> > maybe_pair;
typedef typename block_manager<>::read_ref read_ref;
typedef typename block_manager<>::write_ref write_ref;
typedef typename btree_detail::node_ref<ValueTraits> leaf_node;
typedef typename btree_detail::node_ref<uint64_traits> internal_node;
btree(typename persistent_data::transaction_manager::ptr tm,
typename ValueTraits::ref_counter rc);
btree(typename transaction_manager::ptr tm,
block_address root,
typename ValueTraits::ref_counter rc);
~btree();
maybe_value lookup(key const &key) const;
maybe_pair lookup_le(key const &key) const;
maybe_pair lookup_ge(key const &key) const;
void insert(key const &key, typename ValueTraits::value_type const &value);
void remove(key const &key);
void set_root(block_address root);
block_address get_root() const;
ptr clone() const;
// free the on disk btree when the destructor is called
void destroy();
// Derive a class from this base class if you need to
// inspect the individual nodes that make up a btree.
class visitor {
public:
virtual ~visitor() {}
typedef boost::shared_ptr<visitor> ptr;
// The bool return values indicate whether the walk
// should be continued into sub trees of the node (true == continue).
virtual bool visit_internal(unsigned level, bool sub_root, boost::optional<uint64_t> key,
internal_node const &n) = 0;
virtual bool visit_internal_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
internal_node const &n) = 0;
virtual bool visit_leaf(unsigned level, bool sub_root, boost::optional<uint64_t> key,
leaf_node const &n) = 0;
virtual void visit_complete() {}
};
// Walks the tree in depth first order
void visit(typename visitor::ptr visitor) const;
private:
template <typename ValueTraits2, typename Search>
optional<typename ValueTraits2::value_type>
lookup_raw(btree_detail::ro_spine &spine, block_address block, uint64_t key) const;
template <typename ValueTraits2>
void split_node(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key,
bool top);
template <typename ValueTraits2>
void split_beneath(btree_detail::shadow_spine &spine, uint64_t key);
template <typename ValueTraits2>
void split_sibling(btree_detail::shadow_spine &spine,
block_address parent_index,
uint64_t key);
template <typename ValueTraits2>
bool
insert_location(btree_detail::shadow_spine &spine,
block_address block,
uint64_t key,
int *index);
void walk_tree(typename visitor::ptr visitor,
unsigned level, bool root, boost::optional<uint64_t> key,
block_address b) const;
typename persistent_data::transaction_manager::ptr tm_;
bool destroy_;
block_address root_;
NoOpRefCounter<uint64_t> internal_rc_;
typename ValueTraits::ref_counter rc_;
};
};
#include "btree.tcc"
//----------------------------------------------------------------
#endif