diff --git a/CHANGELOG.md b/CHANGELOG.md index 98d2db6..85b0fad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,25 +9,32 @@ ## Detailed changes +**Breaking changes:** - Renamed `MakeCustom` to `MakeCustomMap`, added new functor `MakeCustomSet`. + `MakeCustomMap` changed to take a new argument to specify the `'a value` type. - Renamed `MakeCustomHeterogeneous` to `MakeCustomHeterogeneousMap`, added new functor `MakeCustomHeterogeneousSet`. +- Renamed `NODE_WITH_ID.get_id` to `NODE_WITH_ID.to_int`, this allows using + instances `NODE_WITH_ID` directly as a `KEY`. - Renamed `VALUE` to `HETEROGENEOUS_VALUE`, added a `VALUE` module type (previously unnamed). +- Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` + and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, + `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` + and `unsigned_max_elt` respectively, to clarify that these functions consider + negative numbers as larger than positive ones. + +**New features:** - Added new interface `MAP_WITH_VALUE` which is the same as `MAP` but with a custom type `'a value` instead of just `'a`. -- `MakeCustomMap` changed to take a new argument to specify the `'a value` type. - Added `HashconsedNode`, `HashconsedSetNode` as well as four functors to create hash-consed heterogeneous/homogeneous maps/sets: `MakeHashconsedMap`, `MakeHashconsedSet`, `MakeHashconsedHeterogeneousMap` and `MakeHashconsedHeterogeneousSet`. - Now support using negative keys. Trees are built using the bitwise representation of integer, meaning they effectively use an unsigned order. Negative keys are considered bigger than positive keys, `0` is the minimal number and `-1` the maximal one. -- Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` - and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, - `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` - and `unsigned_max_elt` respectively, to clarify that these functions consider - negative numbers as larger than positive ones. - Added new functions `fold_on_nonequal_inter` and `fold_on_nonequal_union` to maps. + +**Bug fixes:** - Fixed a bug where `NodeWithId` wasn't incrementing ids properly - `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster method of finding an integer's highest bit. diff --git a/README.md b/README.md index 4278dcc..438f282 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,8 @@ dune build @doc - hash-consed versions of heterogeneous/homogeneous maps/sets are available. These provide constant time equality and comparison, and ensure maps/set with the same constants are always physically equal. It comes at the cost - of more memory usage a slightly slower constructors. + of a constant overhead in memory usage (at worst, as hash-consing may allow memory gains) and constant time overhead + when calling constructors. ## Quick overview @@ -137,6 +138,10 @@ These uniquely number their nodes, which means: the documentation of `HASHED_VALUE.polyeq` for details. Note that this is the case in the default implementations `HashedValue` and `HeterogeneousHashedValue`. +- All hash-consing functors are **generative**, since each functor call will + create a new hash-table to store the created nodes. Calling a functor + twice with same arguments will lead to two numbering systems for identifiers, + and thus the types should not be considered compatible. ### Interfaces diff --git a/index.mld b/index.mld index c94d94e..88d67e0 100644 --- a/index.mld +++ b/index.mld @@ -86,7 +86,8 @@ dune build @doc {li Additionally, hashconsed versions of heterogeneous/homogeneous maps/sets are available. These provide constant time equality and comparison, and ensure maps/set with the same constants are always physically equal. It comes at the cost - of more memory usage a slightly slower constructors.}} + of a constant overhead in memory usage (at worst, as hash-consing may allow + memory gains) and constant time overhead when calling constructors.}} {1 Quick overview} @@ -130,6 +131,10 @@ The functors used to build maps and sets are the following: Note that this is the case in the default implementations {{!PatriciaTree.HashedValue}[HashedValue]} and {{!PatriciaTree.HeterogeneousHashedValue}[HeterogeneousHashedValue]}. + - All hash-consing functors are {b generative}, since each functor call will + create a new hash-table to store the created nodes. Calling a functor + twice with same arguments will lead to two numbering systems for identifiers, + and thus the types should not be considered compatible. }} {2 Interfaces} diff --git a/patriciaTree.ml b/patriciaTree.ml index 82f02fd..872a1eb 100644 --- a/patriciaTree.ml +++ b/patriciaTree.ml @@ -50,7 +50,7 @@ end module type NODE_WITH_ID = sig include NODE - val get_id: 'a t -> int + val to_int: 'a t -> int end module type HASH_CONSED_NODE = sig @@ -495,7 +495,7 @@ module NodeWithId(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE):NODE_WITH_ID | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} | NLeaf{key;value;_} -> Leaf{key;value} - let get_id = function + let to_int = function | NEmpty -> 0 | NBranch{id;_} -> id | NLeaf{id;_} -> id @@ -633,7 +633,7 @@ module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} | NLeaf{key;value;_} -> Leaf{key;value} - let get_id = function + let to_int = function | NEmpty -> 0 | NBranch{ id; _ } -> id | NLeaf{ id; _ } -> id @@ -654,7 +654,7 @@ module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> prefixa == prefixb && branching_bita == branching_bitb && - get_id tree0a = get_id tree0b && get_id tree1a = get_id tree1b + to_int tree0a = to_int tree0b && to_int tree1a = to_int tree1b | _ -> false let hash (AnyMap x) = match x with @@ -664,7 +664,7 @@ module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() (hash lsl 1) lor 1 (* All leaf hashes are odd *) | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) - (sdbm (prefix lor branching_bit) @@ sdbm (get_id tree0) (get_id tree1)) lsl 1 + (sdbm (prefix lor branching_bit) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 end module WeakHash = Weak.Make(HashArg) @@ -688,8 +688,8 @@ module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() | x, NEmpty -> x | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) - let equal x y = Int.equal (get_id x) (get_id y) - let compare x y = Int.compare (get_id x) (get_id y) + let equal x y = x == y + let compare x y = Int.compare (to_int x) (to_int y) end module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE @@ -715,7 +715,7 @@ module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} | NLeaf{ key; _ } -> Leaf{ key; value=() } - let get_id = function + let to_int = function | NEmpty -> 0 | NBranch{ id; _ } -> id | NLeaf{ id; _ } -> id @@ -741,7 +741,7 @@ module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE | NEmpty -> 0 | NLeaf{key; _} -> ((Key.to_int key) lsl 1) lor 1 (* All leaf hashes are odd *) | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) - (sdbm (prefix lor branching_bit) @@ sdbm (get_id tree0) (get_id tree1)) lsl 1 + (sdbm (prefix lor branching_bit) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 end module WeakHash = Weak.Make(HashArg) @@ -764,8 +764,8 @@ module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE | x, NEmpty -> x | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) - let equal x y = Int.equal (get_id x) (get_id y) - let compare x y = Int.compare (get_id x) (get_id y) + let equal x y = x == y + let compare x y = Int.compare (to_int x) (to_int y) end (** {1 Keys and values} *) @@ -1952,7 +1952,7 @@ module MakeHashconsedHeterogeneousMap(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS let equal = Node.equal let compare = Node.compare - let get_id = Node.get_id + let to_int = Node.to_int end module MakeHashconsedHeterogeneousSet(Key:HETEROGENEOUS_KEY)() = struct @@ -1961,7 +1961,7 @@ module MakeHashconsedHeterogeneousSet(Key:HETEROGENEOUS_KEY)() = struct let equal = Node.equal let compare = Node.compare - let get_id = Node.get_id + let to_int = Node.to_int end module MakeHashconsedSet(Key : KEY)() = struct @@ -1969,7 +1969,7 @@ module MakeHashconsedSet(Key : KEY)() = struct include MakeCustomSet(Key)(Node) let equal = Node.equal let compare = Node.compare - let get_id = Node.get_id + let to_int = Node.to_int end module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() = struct @@ -1979,5 +1979,5 @@ module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() = struct let equal = Node.equal let compare = Node.compare - let get_id = Node.get_id + let to_int = Node.to_int end diff --git a/patriciaTree.mli b/patriciaTree.mli index 01ab7e5..27af88b 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -31,7 +31,7 @@ {- The required signature for keys is different, in that we require each key to be mapped to a unique integer identifier.} - {- The implementation uses Patricia Tree, as described in Oksasaki + {- The implementation uses Patricia Tree, as described in Okasaki and Gill's 1998 paper {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, i.e. it is a space-efficient prefix trie over the big-endian representation of @@ -228,7 +228,7 @@ end module type NODE_WITH_ID = sig include NODE (** @closed *) - val get_id: 'a t -> int + val to_int: 'a t -> int (** Unique number for each node. This is not {{!hash_consed}hash-consing}. @@ -249,13 +249,16 @@ end module type HASH_CONSED_NODE = sig include NODE (** @closed *) - val get_id : 'a t -> int - (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.get_id}, hash-consing ensures that maps + val to_int : 'a t -> int + (** Returns a unique number for each map, the {{!hash_consed}hash-consed} identifier of the map. + Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps which contain the same keys (compared by {!KEY.to_int}) and values (compared by {!HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. + Maps with the same identifier are also physically equal: + [to_int m1 = to_int m2] implies [m1 == m2]. + Note that when using physical equality as {!HASHED_VALUE.polyeq}, some maps of different types [a t] and [b t] may be given the same identifier. See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) @@ -1463,7 +1466,7 @@ module type HASHED_VALUE = sig val m1 : int HMap.t = # let m2 = HMap.singleton 5 'a';; val m2 : char HMap.t = - # HMap.get_id m1 = HMap.get_id m2;; + # HMap.to_int m1 = HMap.to_int m2;; - : bool = true ]} This can cause problems if you wish to use identifiers of different map @@ -1471,8 +1474,8 @@ module type HASHED_VALUE = sig {[ type any = Any : 'a HMap.t -> any module MapOfMaps = MakeMap(struct - type t = any - let to_int (Any x) = HMap.get_id x + type t = Any : 'a HMap.t -> t + let to_int (Any x) = Node.to_int x end) ]} Using this can lead to unexpected behaviors: @@ -1559,7 +1562,7 @@ module HashedValue : HASHED_VALUE with type 'a t = 'a Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing and physical equality for equality. Note that this may lead to maps of different types having the same identifier - ({!MakeHashconsedMap.get_id}), see the documentation of {!HASHED_VALUE.polyeq} + ({!MakeHashconsedMap.to_int}), see the documentation of {!HASHED_VALUE.polyeq} for details on this. *) module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm @@ -1567,7 +1570,7 @@ module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing and physical equality for equality. Note that this may lead to maps of different types having the same identifier - ({!MakeHashconsedHeterogeneousMap.get_id}), see the documentation of + ({!MakeHashconsedHeterogeneousMap.to_int}), see the documentation of {!HASHED_VALUE.polyeq} for details on this. *) @@ -1668,20 +1671,26 @@ module MakeCustomHeterogeneousSet and {!HeterogeneousHashedValue}. All hash-consing functors are {b generative}, since each functor call will - create a new hashtable to store the created nodes. Calling a functor + create a new hash-table to store the created nodes. Calling a functor twice with same arguments will lead to two numbering systems for identifiers, and thus the types should not be considered compatible. *) (** Hash-consed version of {!MAP}. See {!hash_consed} for the differences between hash-consed and non hash-consed maps. + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + @since v0.10.0 *) module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() : sig include MAP_WITH_VALUE with type key = Key.t and type 'a value = 'a Value.t (** @closed *) - val get_id : 'a t -> int + val to_int : 'a t -> int (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.get_id}, hash-consing ensures that maps + Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps which contain the same keys (compared by {!KEY.to_int}) and values (compared by {!HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. @@ -1710,13 +1719,19 @@ end (** Hash-consed version of {!SET}. See {!hash_consed} for the differences between hash-consed and non hash-consed sets. + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + @since v0.10.0 *) module MakeHashconsedSet(Key: KEY)() : sig include SET with type elt = Key.t (** @closed *) - val get_id : t -> int + val to_int : t -> int (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.get_id}, hash-consing ensures that maps + Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps which contain the same keys (compared by {!KEY.to_int}) and values (compared by {!HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. @@ -1745,13 +1760,19 @@ end (** Hash-consed version of {!HETEROGENEOUS_SET}. See {!hash_consed} for the differences between hash-consed and non hash-consed sets. + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + @since v0.10.0 *) module MakeHashconsedHeterogeneousSet(Key: HETEROGENEOUS_KEY)() : sig include HETEROGENEOUS_SET with type 'a elt = 'a Key.t (** @closed *) - val get_id : t -> int + val to_int : t -> int (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.get_id}, hash-consing ensures that maps + Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps which contain the same keys (compared by {!KEY.to_int}) and values (compared by {!HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. @@ -1780,15 +1801,21 @@ end (** Hash-consed version of {!HETEROGENEOUS_MAP}. See {!hash_consed} for the differences between hash-consed and non hash-consed maps. + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + @since v0.10.0 *) module MakeHashconsedHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : sig include HETEROGENEOUS_MAP with type 'a key = 'a Key.t and type ('k,'m) value = ('k, 'm) Value.t (** @closed *) - val get_id : 'a t -> int + val to_int : 'a t -> int (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.get_id}, hash-consing ensures that maps + Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps which contain the same keys (compared by {!KEY.to_int}) and values (compared by {!HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. @@ -1829,7 +1856,7 @@ module SimpleNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE and type ('key,'map) value = ('key,'map) Value.t (** Here, nodes also contain a unique id, e.g. so that they can be - used as keys of maps or hashtables. *) + used as keys of maps or hash-tables. *) module NodeWithId(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE_WITH_ID with type 'a key = 'a Key.t and type ('key,'map) value = ('key,'map) Value.t @@ -1868,8 +1895,15 @@ module WeakSetNode(Key: sig type 'k t end) : NODE but also performs hash-consing. So two maps with the same bindings will always be physically equal. See {!hash_consed} for more details on this. - Using these nodes with multiple {!MakeCustomMap} functors will result in - all those maps being hash-consed (stored in the same hash table, same numbering system). + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + Using a single {!HashconsedNode} in multiple {!MakeCustomMap} functors will result in + all those maps being hash-consed together (stored in the same hash-table, + same numbering system). @since v0.10.0 *) module HashconsedNode(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : HASH_CONSED_NODE diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index 556e571..4fce639 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -292,7 +292,7 @@ end module TestImpl(MyMap : MAP with type key = int)(Param : sig val test_id : bool val number_gen : int QCheck.arbitrary - (* val get_id : 'a MyMap.t -> int option *) + (* val to_int : 'a MyMap.t -> int option *) end) = struct (* Add a list of pair of ints to a map. *)