Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hashconsed maps nodes #1

Merged
merged 33 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1aec8e0
WIP on hashconsed maps
dlesbre Apr 25, 2024
68a6d31
Restrictg hash-consed values for better type safety
dlesbre Apr 26, 2024
b98c1b2
Hashconsed polymorphic variants
dlesbre Apr 26, 2024
0b4baea
Document name change
dlesbre Apr 26, 2024
136fbb1
MakeCustom renames for homogeneous version
dlesbre Apr 26, 2024
edc3e51
More doc + hashconsed homogeneous set
dlesbre Apr 26, 2024
9e44c67
Don't upload artifact on pull request
dlesbre Apr 26, 2024
129b2f1
Change MAP interface and add Hashcons maps
dlesbre Apr 26, 2024
2896643
Some documentation
dlesbre Apr 26, 2024
e01056a
More doc and @since comments
dlesbre Apr 26, 2024
3a3a3a5
Version number+stop printing PatriciaTree all over the doc
dlesbre Apr 26, 2024
f46029f
Add MAP_WITH_VALUE interface
dlesbre Apr 26, 2024
1fcc4c1
Generic test on hash consed maps
dlesbre Apr 26, 2024
a3d4312
Test for patricia tree
dlesbre Apr 26, 2024
8d618d9
Test remove preserve physical eq
dlesbre Apr 26, 2024
1cbc296
Merge branch 'main' into hashconsed-maps
dlesbre Apr 30, 2024
d1cb995
Test with small nat as well
dlesbre Apr 30, 2024
f7f530a
Changelog
dlesbre Apr 30, 2024
9726947
Revert to fully generic hashconsed trees
dlesbre May 2, 2024
2096d88
Some doc changes
dlesbre May 3, 2024
7e0dd10
Lots of changes to reintroduce values in hashconsed maps
dlesbre May 3, 2024
f35a5d6
Doc doc doc
dlesbre May 6, 2024
526cf51
More doc
dlesbre May 6, 2024
90e6075
CHANGELOG
dlesbre May 6, 2024
ffa55fd
Newlines before assumes
dlesbre May 6, 2024
50eacb8
Merge branch 'main' into hashconsed-maps
dlesbre May 11, 2024
d1c5b45
Use type = instead of :=
dlesbre May 11, 2024
b92ee75
doc stuff
dlesbre May 11, 2024
36bc560
Update patriciaTree.mli (typo)
mlemerre May 12, 2024
cb595e3
Update README.md
mlemerre May 12, 2024
ebbebed
Switch to physical equality for hash-consed equal
dlesbre May 15, 2024
ab51329
Rename `get_id` to `to_int`
dlesbre May 15, 2024
7a92256
Explain generative functors more
dlesbre May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 166 additions & 3 deletions patriciaTree.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ module type NODE_WITH_ID = sig
val get_id: 'a t -> int
end

module type HASH_CONSED_NODE = sig
include NODE_WITH_ID
val fast_equal : 'a t -> 'a t -> bool
val fast_compare : 'a t -> 'a t -> int
end

module type BASE_MAP = sig
include NODE

Expand Down Expand Up @@ -501,7 +507,7 @@ module NodeWithId(Key:sig type 'a t end)(Value:VALUE):NODE_WITH_ID
| NBranch{id;_} -> id
| NLeaf{id;_} -> id

let count = ref 0;;
let count = ref 0

let empty = NEmpty
let is_empty x = x == NEmpty
Expand Down Expand Up @@ -588,6 +594,163 @@ module WeakSetNode(Key:sig type 'a t end)(* :NODE *) = struct

end

let sdbm x y = y + (x lsl 16) + (x lsl 6) - x
(** Combine two numbers into a new hash *)

module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:VALUE): HASH_CONSED_NODE
with type 'key key = 'key Key.t
and type ('key,'map) value = ('key,'map) Value.t
= struct

type 'a key = 'a Key.t
type ('key,'map) value = ('key,'map) Value.t

type 'map view =
| Empty: 'map view
| Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view
| Leaf: { key:'key key; value:('key,'map) value } -> 'map view
and 'map t =
| NEmpty: 'map t
| NBranch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t; id:int } -> 'map t
| NLeaf: { key:'key key; value:('key,'map) value; id:int } -> 'map t

let view = function
| NEmpty -> Empty
| NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1}
| NLeaf{key;value;_} -> Leaf{key;value}

let get_id = function
| NEmpty -> 0
| NBranch{ id; _ } -> id
| NLeaf{ id; _ } -> id

let count = ref 1 (** Start at 1 as we increment in post *)

module HashArg = struct
type 'a map = 'a t
type t = Exi : 'a map -> t [@@unboxed]
let equal (Exi a) (Exi b) = match a, b with
| NEmpty, NEmpty -> true
| NLeaf{key=key1;value=value1;_}, NLeaf{key=key2;value=value2;_} ->
begin match Key.polyeq key1 key2 with
| Eq -> value1 == Obj.magic value2
| Diff -> false
end
| NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_},
NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} ->
prefixa == prefixb && branching_bita == branching_bitb &&
get_id tree0a = get_id tree0b && get_id tree1a = get_id tree1b
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can use == instead of get_id here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately not, because these values don't have the same type... This is also why I need an Obj.magic in the leaf case.

However, the more I think about it, the more I believe we should get rid of arbitrary value maps for hash-consing. It makes little sense to hashcons maps from 'a key to ('a, 'b) value together with map from 'a key to ('a, 'c) value. So I'll probably change it to use a 'a value type instead of the more generic ('a, 'b) value type.

Otherwise we might have problems with different types having equal values.

| _ -> false

let hash (Exi a) = match a with
| NEmpty -> 0
| NLeaf{key; _} -> (Key.to_int key lsl 1) lor 1 (* All leaf hashes are odd *)
| NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *)
(sdbm (prefix lor branching_bit) @@ sdbm (get_id tree0) (get_id tree1)) lsl 1
end

module WeakHash = Weak.Make(HashArg)

let weakh = WeakHash.create 120

let empty = NEmpty
let is_empty x = x == NEmpty

let try_find tentative =
let Exi x = WeakHash.merge weakh (Exi tentative) in
let x = Obj.magic x in
if x == tentative then incr count;
x

let leaf key value = try_find (NLeaf{key;value;id= !count})

let branch ~prefix ~branching_bit ~tree0 ~tree1 =
match tree0,tree1 with
| NEmpty, x -> x
| x, NEmpty -> x
| _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)})

let fast_equal x y = Int.equal (get_id x) (get_id y)
let fast_compare x y = Int.compare (get_id x) (get_id y)
end

module HashconsedSetNode(Key:HETEROGENEOUS_KEY): HASH_CONSED_NODE
with type 'key key = 'key Key.t
and type ('key,'map) value = unit
= struct

type 'a key = 'a Key.t
type ('key,'map) value = unit

type 'map view =
| Empty: 'map view
| Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view
| Leaf: { key:'key key; value:unit } -> 'map view
and 'map t =
| NEmpty: 'map t
| NBranch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t; id:int } -> 'map t
| NLeaf: { key:'key key; id:int } -> 'map t

let view = function
| NEmpty -> Empty
| NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1}
| NLeaf{ key; _ } -> Leaf{ key; value=() }

let get_id = function
| NEmpty -> 0
| NBranch{ id; _ } -> id
| NLeaf{ id; _ } -> id

let count = ref 1 (** Start at 1 as we increment in post *)

module HashArg = struct
type 'a map = 'a t
type t = Exi : 'a map -> t [@@unboxed]
let equal (Exi a) (Exi b) = match a, b with
| NEmpty, NEmpty -> true
| NLeaf{key=key1;_}, NLeaf{key=key2;_} ->
begin match Key.polyeq key1 key2 with
| Eq -> true
| Diff -> false
end
| NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_},
NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} ->
prefixa == prefixb && branching_bita == branching_bitb &&
get_id tree0a = get_id tree0b && get_id tree1a = get_id tree1b
| _ -> false

let hash (Exi a) = match a with
| NEmpty -> 0
| NLeaf{key; _} -> ((Key.to_int key) lsl 1) lor 1 (* All leaf hashes are odd *)
| NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *)
(sdbm (prefix lor branching_bit) @@ sdbm (get_id tree0) (get_id tree1)) lsl 1
end

module WeakHash = Weak.Make(HashArg)

let weakh = WeakHash.create 120

let empty = NEmpty
let is_empty x = x == NEmpty

let try_find tentative =
let Exi x = WeakHash.merge weakh (Exi tentative) in
let x = Obj.magic x in
if x == tentative then incr count;
x

let leaf key () = try_find (NLeaf{ key; id = !count })

let branch ~prefix ~branching_bit ~tree0 ~tree1 =
match tree0,tree1 with
| NEmpty, x -> x
| x, NEmpty -> x
| _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)})

let fast_equal x y = Int.equal (get_id x) (get_id y)
let fast_compare x y = Int.compare (get_id x) (get_id y)
end

module MakeCustomHeterogeneous
(Key:HETEROGENEOUS_KEY)
(Value:VALUE)
Expand Down Expand Up @@ -1375,6 +1538,8 @@ module MakeCustomHeterogeneous
end




(* TODO: We should make it a functor, so that we can simplify the
interface for set independently from how it is constructed. *)
module MakeHeterogeneousSet(Key:HETEROGENEOUS_KEY) : HETEROGENEOUS_SET
Expand Down Expand Up @@ -1449,8 +1614,6 @@ end
module MakeHeterogeneousMap(Key:HETEROGENEOUS_KEY)(Value:VALUE) =
MakeCustomHeterogeneous(Key)(Value)(SimpleNode(Key)(Value))



module HomogeneousValue = struct
type ('a,'map) t = 'map
end
Expand Down
34 changes: 33 additions & 1 deletion patriciaTree.mli
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,26 @@ module type NODE = sig
(** Convert the map to a view. Should be constant time. *)
end

(** Associate a unique number to each node. *)
(** Associate a unique number to each node, so they can be used as keys in sets or maps. *)
module type NODE_WITH_ID = sig
include NODE
val get_id: 'a t -> int
(** Unique number for each node *)
end

(** Hash-consed nodes also associates a unique number to each node,
Unlike {!NODE_WITH_ID}, they also check before instanciating the node wether
a similar node already exists. This results in slightly slower constructors
(they perform an extra hash-table lookup), but allows for constant time
equality and comparison. *)
module type HASH_CONSED_NODE = sig
include NODE_WITH_ID

val fast_equal : 'a t -> 'a t -> bool
(** Constant time equality. *)

val fast_compare : 'a t -> 'a t -> int
(** Constant time comparison using the node ids. *)
end

(** {1 Map signatures} *)
Expand Down Expand Up @@ -1183,6 +1199,22 @@ module WeakSetNode(Key : sig type 'k t end):NODE
with type 'a key = 'a Key.t
and type ('key,'map) value = unit

(** Gives a unique number to each node like {!NodeWithId},
but also performs hash-consing. So two maps with the same bindings will
always be physically equal.

This makes constructors a bit slower, but comparison much faster.
It can also speed up quite a few operations on map pairs, as these use
physical equality test to skip uneccessary work. *)
module HashconsedNode(Key : HETEROGENEOUS_KEY)(Value : VALUE) : HASH_CONSED_NODE
with type 'a key = 'a Key.t
and type ('key,'map) value = ('key,'map) Value.t

(** Both a {!HashconsedNode} and a {!SetNode}. *)
module HashconsedSetNode(Key : HETEROGENEOUS_KEY) : HASH_CONSED_NODE
with type 'a key = 'a Key.t
and type ('key,'map) value = unit

(* TODO: Functor to make sets from maps. *)
(* TODO: consider the "shape" of a map, and use this to have functions
that filter a map, or update several elements. Maybe this just
Expand Down
Loading