-
Notifications
You must be signed in to change notification settings - Fork 99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BTrees in Motoko. #396
base: master
Are you sure you want to change the base?
BTrees in Motoko. #396
Changes from all commits
c35b3d9
b95320f
d81a1c3
142a0d0
ffd99ec
61334ec
0d06506
273fb6f
9db84ca
4393a81
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
/// Imperative sequences as B-Trees. | ||
|
||
import A "Array"; | ||
import I "Iter"; | ||
import List "List"; | ||
import Text "Text"; | ||
import Option "Option"; | ||
import Order "Order"; | ||
import P "Prelude"; | ||
import Debug "Debug"; | ||
import Prim "mo:⛔"; | ||
|
||
module { | ||
|
||
/// Constants we use to shape the tree. | ||
/// See https://en.wikipedia.org/wiki/B-tree#Definition | ||
module Constants { | ||
let MAX_CHILDREN = 4; | ||
}; | ||
|
||
public type Compare<K> = { | ||
show : K -> Text; | ||
compare : (K, K) -> Order.Order | ||
}; | ||
|
||
public type Data<K, V> = [(K, V)]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious, what's the benefit of the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a slight space savings to the tuple (1 word per instance). Otherwise, I'd prefer the record with labeled fields. I actually had that same record definition initially, but recalled this recent review from Claudio for the HashMap improvements. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, why is
I'm probably missing something as it's early and I definitely didn't get enough sleep last night 😅 Is there a reference implementation you used for this that I can peek through? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
public type Internal<K, V> = { | ||
data : Data<K, V>; | ||
trees : [Tree<K, V>]; | ||
}; | ||
|
||
public type Tree<K, V> = { | ||
#internal : Internal<K, V>; | ||
#leaf : Data<K, V>; | ||
}; | ||
|
||
func find_data<K, V>(data : Data<K, V>, find_k : K, c : (K, K) -> Order.Order) : ?V { | ||
for ((k, v) in data.vals()) { | ||
if (c(k, find_k) == #equal) { return ?v }; | ||
}; | ||
return null | ||
}; | ||
|
||
public func find<K, V>(t : Tree<K, V>, k : K, c : (K, K) -> Order.Order) : ?V { | ||
switch t { | ||
case (#leaf(d)) { return find_data<K, V>(d, k, c) }; | ||
case (#internal(i)) { | ||
for (j in I.range(0, i.data.size() - 1)) { | ||
switch (c(k, i.data[j].0)) { | ||
case (#equal) { return ?i.data[j].1 }; | ||
case (#less) { return find<K, V>(i.trees[j], k, c) }; | ||
case _ { } | ||
} | ||
}; | ||
find<K, V>(i.trees[i.data.size()], k, c) | ||
}; | ||
}; | ||
}; | ||
|
||
public module Insert { | ||
|
||
|
||
|
||
}; | ||
|
||
// Assert that the given B-Tree instance observes all relevant invariants. | ||
// Used for unit tests. Show function helps debug failing tests. | ||
// | ||
// Note: These checks-as-assertions can be refactored into value-producing checks, | ||
// if that seems useful. Then, they can be individual matchers tests. Again, if useful. | ||
public func assertIsValid<K, V>( | ||
t : Tree<K, V>, | ||
compare : (K, K) -> Order.Order, | ||
show : K -> Text) | ||
{ | ||
Check.root<K, V>({compare; show}, t) | ||
}; | ||
|
||
public func assertIsValidTextKeys<V>(t : Tree<Text, V>){ | ||
Check.root<Text, V>({compare=Text.compare; show=func (t:Text) : Text { t }}, t) | ||
}; | ||
Comment on lines
+72
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it help if |
||
|
||
/// Check that a B-Tree instance observes invariants of B-Trees. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this module is just for testing/debugging, should When I import the default BTree module like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This module is static. The compiler will not compile static code that you import but do not use. (However, class instances will always contain all methods, regardless of usage. Not applicable here.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Why? It's more enclosed and encapsulated here. There is no way to "hide" a top-level module in |
||
/// Invariants ensure performance is what we expect. | ||
/// For testing and debugging. | ||
/// | ||
/// Future refactoring --- Eventually, we can return Result or | ||
/// Option so that both valid and invalid inputs can be inspected in | ||
/// test cases. Doing assertions directly here is easier, for now. | ||
module Check { | ||
|
||
type Inf<K> = {#infmax; #infmin; #finite : K}; | ||
|
||
type InfCompare<K> = { | ||
compare : (Inf<K>, Inf<K>) -> Order.Order; | ||
show : Inf<K> -> Text | ||
}; | ||
|
||
func infCompare<K>(c : Compare<K>) : InfCompare<K> = { | ||
show = func (k : Inf<K>) : Text { | ||
switch k { | ||
case (#infmax) "#infmax"; | ||
case (#infmin) "#infmin"; | ||
case (#finite k) "#finite(" # c.show k # ")"; | ||
} | ||
}; | ||
compare = func (k1 : Inf<K>, k2 : Inf<K>) : Order.Order { | ||
switch (k1, k2) { | ||
case (#infmin, _) #less; | ||
case (_, #infmin) { /* nonsense case. */ assert false; loop { } }; | ||
case (_, #infmax) #less; | ||
case (#infmax, _) { /* nonsense case. */ assert false; loop { } }; | ||
case (#finite(k1), #finite(k2)) c.compare(k1, k2); | ||
} | ||
} | ||
}; | ||
|
||
public func root<K, V>(compare : Compare<K>, t : Tree<K, V>) { | ||
switch t { | ||
case (#leaf _) { rec(#infmin, infCompare(compare), t, #infmax) }; | ||
case (#internal i) { | ||
if (i.data.size() == 0) { assert i.trees.size() == 0; return }; | ||
if (i.trees.size() < 2) { assert false }; | ||
Comment on lines
+123
to
+124
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a benefit to having all of the invalid cases trap vs. have be this be more like a I guess the question from a usability standpoint is do you see developers using it like
or
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using Indeed, using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
My intentions are time-variant:
|
||
rec(#infmin, infCompare(compare), t, #infmax) | ||
}; | ||
} | ||
}; | ||
|
||
func rec<K, V>(lower : Inf<K>, c : InfCompare<K>, t : Tree<K, V>, upper : Inf<K>) { | ||
switch t { | ||
case (#leaf(d)) { data(lower, c, d, upper) }; | ||
case (#internal(i)) { internal(lower, c, i, upper) }; | ||
} | ||
}; | ||
|
||
func data<K, V>(lower : Inf<K>, c : InfCompare<K>, d : Data<K, V>, upper : Inf<K>) { | ||
var prev_k : Inf<K> = #infmin; | ||
for ((k, _) in d.vals()) { | ||
if false { | ||
Debug.print (c.show (#finite k)); | ||
}; | ||
assert (c.compare(prev_k, #finite k) == #less); | ||
assert (c.compare(lower, #finite k) == #less); | ||
assert (c.compare(#finite k, upper) == #less); | ||
prev_k := #finite k; | ||
}; | ||
}; | ||
|
||
func internal<K, V>(lower : Inf<K>, c : InfCompare<K>, i : Internal<K, V>, upper : Inf<K>) { | ||
// counts make sense when there is one tree between each pair of | ||
// consecutive key-value pairs; no key-value pairs on the end. | ||
assert (i.trees.size() == i.data.size() + 1); | ||
for (j in I.range(0, i.trees.size() - 1)) { | ||
let lower_ = if (j == 0) { lower } else { #finite(i.data[j - 1].0) }; | ||
let upper_ = if (j + 1 == i.trees.size()) { upper } else { #finite((i.data[j]).0) | ||
}; | ||
rec<K, V>(lower_, c, i.trees[j], upper_) | ||
} | ||
}; | ||
}; | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import Debug "mo:base/Debug"; | ||
import BT "mo:base/BTree"; | ||
import Nat "mo:base/Nat"; | ||
import Text "mo:base/Text"; | ||
|
||
import Suite "mo:matchers/Suite"; | ||
import M "mo:matchers/Matchers"; | ||
import T "mo:matchers/Testable"; | ||
|
||
Debug.print "BTree tests: Begin."; | ||
|
||
let empty1 = #internal({data=[]; trees=[]}); | ||
let empty2 = #leaf([]); | ||
let leaf_of_one = #leaf([("oak", 1)]); | ||
let leaf_of_two = #leaf([("ash", 1), ("oak", 2)]); | ||
let leaf_of_three_a_c = #leaf([("apple", 1), ("ash", 4), ("crab apple", 3)]); | ||
let leaf_of_three_s_w = #leaf([("salix", 11), ("sallows", 44), ("willow", 33)]); | ||
let binary_internal = #internal( | ||
{ | ||
data=[("pine", 42)]; | ||
trees=[leaf_of_three_a_c, leaf_of_three_s_w] | ||
}); | ||
|
||
/* | ||
let _ = Suite.suite( | ||
"constructions and checks.", | ||
[ // These checks-as-assertions can be refactored into value-producing checks, | ||
// if that seems useful. Then, they can be individual matchers tests. Again, if useful. | ||
Suite.test("assertions.", try { | ||
*/ | ||
Debug.print "empty1."; | ||
BT.assertIsValidTextKeys(empty1); | ||
|
||
Debug.print "empty2."; | ||
BT.assertIsValidTextKeys(empty2); | ||
|
||
Debug.print "leaf of one."; | ||
BT.assertIsValidTextKeys(leaf_of_one); | ||
|
||
Debug.print "leaf of two."; | ||
BT.assertIsValidTextKeys(leaf_of_two); | ||
|
||
Debug.print "leaf of three. A-C"; | ||
BT.assertIsValidTextKeys(leaf_of_three_a_c); | ||
|
||
Debug.print "leaf of three. S-W"; | ||
BT.assertIsValidTextKeys(leaf_of_three_s_w); | ||
|
||
Debug.print "binary internal."; | ||
BT.assertIsValidTextKeys(binary_internal); | ||
|
||
/* | ||
true | ||
} catch _ { false }, | ||
M.equals(T.bool(true)) | ||
)]); | ||
*/ | ||
|
||
let _ = Suite.suite("find", [ | ||
Suite.test("pine", | ||
BT.find<Text, Nat>(binary_internal, "pine", Text.compare), | ||
M.equals(T.optional<Nat>(T.natTestable, ?42)) | ||
), | ||
Suite.test("apple", | ||
BT.find<Text, Nat>(binary_internal, "apple", Text.compare), | ||
M.equals(T.optional<Nat>(T.natTestable, ?1)) | ||
), | ||
Suite.test("willow", | ||
BT.find<Text, Nat>(binary_internal, "willow", Text.compare), | ||
M.equals(T.optional<Nat>(T.natTestable, ?33)) | ||
), | ||
]); | ||
|
||
Debug.print "BTree tests: End."; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is the max 4? https://panthema.net/2007/stx-btree/stx-btree-0.8.3/doxygen-html/speedtest.html shows that 32-128 perform considerably better at large n.
I think it therefore might make sense to allow the developer to configure larger child values (i.e. 4, 16, 32, 64, 128, 256). I'd be curious to run some performance tests inserting a running counter or batch of elements (ordered or unordered) to see what difference this might make as the tree grows in size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want to write simple tests before adjusting this number into something that varies, which I agree is desirable.