Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized deletion for Trie/TrieMap #525

Merged
merged 24 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f048519
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Dec 19, 2022
f9c141c
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Dec 20, 2022
e3db654
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Dec 22, 2022
daffba5
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Jan 5, 2023
fc5c20f
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Jan 5, 2023
3a1da3b
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Jan 23, 2023
2779e0b
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Feb 10, 2023
ca4ada8
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Feb 13, 2023
dfd9b31
Delete functionality for Trie and TrieMap
luc-blaeser Feb 13, 2023
0f326e3
Code refactoring
luc-blaeser Feb 14, 2023
7b638c9
Update src/Trie.mo
luc-blaeser Feb 14, 2023
dda937d
Simplifying implememtation
luc-blaeser Feb 14, 2023
550e675
Also collapse two empty sibling nodes
luc-blaeser Feb 15, 2023
96c58ae
Apply reduction to other functions
luc-blaeser Feb 20, 2023
88deb57
Merge branch 'master' of https://github.com/dfinity/motoko-base
luc-blaeser Feb 20, 2023
452eb31
Merge branch 'master' into luc/trie-delete
luc-blaeser Feb 20, 2023
45dc1e5
Merge branch 'master' into luc/trie-delete
luc-blaeser Feb 28, 2023
2d6e0c3
Optimize replace function (second return value)
luc-blaeser Feb 28, 2023
0c41a03
Merge branch 'master' into luc/trie-delete
luc-blaeser Feb 28, 2023
bf9e3cf
parens removal
crusso Feb 28, 2023
3383061
Merge branch 'luc/trie-delete' of github.com:dfinity/motoko-base into…
crusso Feb 28, 2023
fb0ffb3
remove parens
crusso Feb 28, 2023
76ebaaf
Remove redundant null assignment
luc-blaeser Feb 28, 2023
ddd404a
Merge branch 'master' into luc/trie-delete
crusso Feb 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 64 additions & 26 deletions src/Trie.mo
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ module {
/// ```
public func empty<K, V>() : Trie<K, V> { #empty };


/// Get the size in O(1) time.
///
/// For a more detailed overview of how to use a `Trie`,
Expand Down Expand Up @@ -354,36 +353,79 @@ module {
/// Purely-functional representation permits _O(1)_ copy, via persistent sharing.
public func clone<K, V>(t : Trie<K, V>) : Trie<K, V> = t;

/// Replace the given key's value option with the given one, returning the previous one
/// Combine two nodes that may have a reduced size after an entry deletion.
func combineReducedNodes<K, V>(left : Trie<K, V>, right : Trie<K, V>) : Trie<K, V> {
switch (left, right) {
case (#empty, #empty) {
#empty
};
case (#leaf(leftLeaf), #empty) {
luc-blaeser marked this conversation as resolved.
Show resolved Hide resolved
#leaf(leftLeaf)
};
case (#empty, #leaf(rightLeaf)) {
luc-blaeser marked this conversation as resolved.
Show resolved Hide resolved
#leaf(rightLeaf)
};
case (#leaf(leftLeaf), #leaf(rightLeaf)) {
let size = leftLeaf.size + rightLeaf.size;
if (size <= MAX_LEAF_SIZE) {
let union = List.append(leftLeaf.keyvals, rightLeaf.keyvals);
#leaf({ size = size; keyvals = union })
luc-blaeser marked this conversation as resolved.
Show resolved Hide resolved
} else {
branch(left, right)
}
};
case (left, right) {
branch(left, right)
}
}
};

/// Replace the given key's value option with the given value, returning the modified trie.
/// Also returns the replaced value if the key existed and `null` otherwise.
/// Compares keys using the provided function `k_eq`.
///
/// Note: Replacing a key's value by `null` removes the key and also shrinks the trie.
///
/// For a more detailed overview of how to use a `Trie`,
/// see the [User's Overview](#overview).
///
/// Example:
/// ```motoko include=initialize
/// trie := Trie.put(trie, key "test", Text.equal, 1).0;
/// trie := Trie.replace(trie, key "test", Text.equal, 42).0;
/// assert (Trie.get(trie, key "hello", Text.equal) == ?42);
/// ```
public func replace<K, V>(t : Trie<K, V>, k : Key<K>, k_eq : (K, K) -> Bool, v : ?V) : (Trie<K, V>, ?V) {
Copy link
Contributor

@crusso crusso Feb 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this would produce a little less garbage by communicating the previous value using private state and having rec simply return a trie, not a pair, as in RedBlackTree.insert/replace (or what ever it is called). Maybe not worth it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion. I implemented this.

let key_eq = equalKey(k_eq);
var replacedValue: ?V = null;

func rec(t : Trie<K, V>, bitpos : Nat) : (Trie<K, V>, ?V) {
func recursiveReplace(t : Trie<K, V>, bitpos : Nat) : Trie<K, V> {
switch t {
case (#empty) {
let (kvs, _) = AssocList.replace(null, k, key_eq, v);
(leaf(kvs, bitpos), null)
leaf(kvs, bitpos)
};
case (#branch(b)) {
let bit = Hash.bit(k.hash, bitpos);
// rebuild either the left or right path with the (k, v) pair
if (not bit) {
let (l, v_) = rec(b.left, bitpos + 1);
(branch(l, b.right), v_)
let l = recursiveReplace(b.left, bitpos + 1);
combineReducedNodes(l, b.right)
} else {
let (r, v_) = rec(b.right, bitpos + 1);
(branch(b.left, r), v_)
let r = recursiveReplace(b.right, bitpos + 1);
combineReducedNodes(b.left, r)
}
};
case (#leaf(l)) {
let (kvs2, old_val) = AssocList.replace(l.keyvals, k, key_eq, v);
(leaf(kvs2, bitpos), old_val)
let (kvs2, oldValue) = AssocList.replace(l.keyvals, k, key_eq, v);
replacedValue := oldValue;
leaf(kvs2, bitpos)
}
}
};
let (to, vo) = rec(t, 0);
//assert(isValid<K, V>(to, false));
(to, vo)
let newTrie = recursiveReplace(t, 0);
//assert(isValid<K, V>(newTrie, false));
(newTrie, replacedValue)
};

/// Put the given key's value in the trie; return the new trie, and the previous value associated with the key, if any.
Expand Down Expand Up @@ -577,7 +619,7 @@ module {
switch (x, y) {
case (null, ?v) { v };
case (?v, null) { v };
case (_, _) { Debug.trap "Trie.mergeDisjoint"}
case (_, _) { Debug.trap "Trie.mergeDisjoint" }
}
}
),
Expand Down Expand Up @@ -1287,11 +1329,7 @@ module {
case (#branch(b)) {
let fl = rec(b.left, bitpos + 1);
let fr = rec(b.right, bitpos + 1);
if (isEmpty(fl) and isEmpty(fr)) {
#empty
} else {
branch(fl, fr)
}
combineReducedNodes(fl, fr)
}
}
};
Expand Down Expand Up @@ -1339,11 +1377,7 @@ module {
case (#branch(b)) {
let fl = rec(b.left, bitpos + 1);
let fr = rec(b.right, bitpos + 1);
if (isEmpty(fl) and isEmpty(fr)) {
#empty
} else {
branch(fl, fr)
}
combineReducedNodes(fl, fr)
}
}
};
Expand Down Expand Up @@ -1508,7 +1542,11 @@ module {
updated_outer
};

/// Remove the given key's value in the trie; return the new trie
/// Remove the entry for the given key from the trie, by returning the reduced trie.
/// Also returns the removed value if the key existed and `null` otherwise.
/// Compares keys using the provided function `k_eq`.
///
/// Note: The removal of an existing key shrinks the trie.
///
/// For a more detailed overview of how to use a `Trie`,
/// see the [User's Overview](#overview).
Expand All @@ -1517,7 +1555,7 @@ module {
/// ```motoko include=initialize
/// trie := Trie.put(trie, key "hello", Text.equal, 42).0;
/// trie := Trie.put(trie, key "bye", Text.equal, 32).0;
/// // remove the value associated with "hello"
/// // remove the entry associated with "hello"
/// trie := Trie.remove(trie, key "hello", Text.equal).0;
/// assert (Trie.get(trie, key "hello", Text.equal) == null);
/// ```
Expand Down
4 changes: 4 additions & 0 deletions src/TrieMap.mo
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ module {
/// Delete the entry associated with key `key`, if it exists. If the key is
/// absent, there is no effect.
///
/// Note: The deletion of an existing key shrinks the trie map.
///
/// Example:
/// ```motoko include=initialize
/// map.put(0, 10);
Expand All @@ -121,6 +123,8 @@ module {
/// Delete the entry associated with key `key`. Return the deleted value
/// as an option if it exists, and `null` otherwise.
///
/// Note: The deletion of an existing key shrinks the trie map.
///
/// Example:
/// ```motoko include=initialize
/// map.put(0, 10);
Expand Down
152 changes: 152 additions & 0 deletions test/trieMapTest.mo
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import Iter "mo:base/Iter";
import Hash "mo:base/Hash";
import Text "mo:base/Text";
import Nat "mo:base/Nat";
import Array "mo:base/Array";
import Order "mo:base/Order";

import Suite "mo:matchers/Suite";
import T "mo:matchers/Testable";
Expand Down Expand Up @@ -356,6 +358,156 @@ let suite = Suite.suite(

Suite.run(suite);

/* --------------------------------------- */

object Random {
var number = 4711;
public func next() : Nat {
number := (123138118391 * number + 133489131) % 9999;
number
}
};

func shuffle(array : [Nat]) : [Nat] {
let extended = Array.map<Nat, (Nat, Nat)>(array, func(value) { (value, Random.next()) });
let sorted = Array.sort<(Nat, Nat)>(
extended,
func(first, second) {
Nat.compare(first.1, second.1)
}
);
Array.map<(Nat, Nat), Nat>(
sorted,
func(value) {
value.0
}
)
};

let testSize = 1_000;

let testKeys = shuffle(Array.tabulate<Nat>(testSize, func(index) { index }));

func buildTestTrie() : TrieMap.TrieMap<Nat, Text> {
let trie = TrieMap.TrieMap<Nat, Text>(Nat.equal, Hash.hash);
for (key in testKeys.vals()) {
trie.put(key, debug_show (key))
};
trie
};

func expectedKeyValuePairs(keys : [Nat]) : [(Nat, Text)] {
Array.tabulate<(Nat, Text)>(keys.size(), func(index) { (keys[index], debug_show (keys[index])) })
};

let expectedEntries = expectedKeyValuePairs(Array.sort(testKeys, Nat.compare));
let expectedKeys = Array.sort(testKeys, Nat.compare);
let expectedValues = Array.sort(Array.map<Nat, Text>(expectedKeys, func(key) { debug_show (key) }), Text.compare);

let entryTestable = T.tuple2Testable(T.natTestable, T.textTestable);

func compareByKey(first : (Nat, Text), second : (Nat, Text)) : Order.Order {
Nat.compare(first.0, second.0)
};

func sortedEntries(trie : TrieMap.TrieMap<Nat, Text>) : [(Nat, Text)] {
Array.sort(Iter.toArray(trie.entries()), compareByKey)
};

class TrieMatcher(expected : [(Nat, Text)]) : M.Matcher<TrieMap.TrieMap<Nat, Text>> {
public func describeMismatch(actual : TrieMap.TrieMap<Nat, Text>, description : M.Description) {
Prim.debugPrint(debug_show (sortedEntries(actual)) # " should be " # debug_show (expected))
};

public func matches(actual : TrieMap.TrieMap<Nat, Text>) : Bool {
sortedEntries(actual) == expected
}
};

let randomTestSuite = Suite.suite(
"random trie",
[
Suite.test(
"size",
buildTestTrie().size(),
M.equals(T.nat(testSize))
),
Suite.test(
"iterate entries",
sortedEntries(buildTestTrie()),
M.equals(T.array<(Nat, Text)>(entryTestable, expectedEntries))
),
Suite.test(
"iterate keys",
Array.sort(Iter.toArray(buildTestTrie().keys()), Nat.compare),
M.equals(T.array<Nat>(T.natTestable, expectedKeys))
),
Suite.test(
"iterate values",
Array.sort(Iter.toArray(buildTestTrie().vals()), Text.compare),
M.equals(T.array<Text>(T.textTestable, expectedValues))
),
Suite.test(
"get all",
do {
let trie = buildTestTrie();
for (key in testKeys.vals()) {
let value = trie.get(key);
assert (value == ?debug_show (key))
};
trie
},
TrieMatcher(expectedEntries)
),
Suite.test(
"replace all",
do {
let trie = buildTestTrie();
for (key in testKeys.vals()) {
let value = trie.replace(key, "TEST-" # debug_show (key));
assert (value == ?debug_show (key))
};
trie
},
TrieMatcher(Array.map<Nat, (Nat, Text)>(expectedKeys, func(key) { (key, "TEST-" # debug_show (key)) }))
),
Suite.test(
"remove randomized",
do {
let trie = buildTestTrie();
var count = 0;
for (key in testKeys.vals()) {
if (Random.next() % 2 == 0) {
let result = trie.remove(key);
assert (result == ?debug_show (key));
count += 1
}
};
trie.size() == +testKeys.size() - count
},
M.equals(T.bool(true))
),
Suite.test(
"clear",
do {
let trie = buildTestTrie();
for ((key, value) in trie.entries()) {
// stable iteration
assert (debug_show (key) == value);
let result = trie.remove(key);
assert (result == ?debug_show (key))
};
trie
},
TrieMatcher([])
)
]
);

Suite.run(randomTestSuite);

/* --------------------------------------- */

debug {
let a = TrieMap.TrieMap<Text, Nat>(Text.equal, Text.hash);

Expand Down