Skip to content

Commit

Permalink
examples in doc
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Hamburg committed Apr 9, 2022
1 parent cd382f4 commit fb897d3
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include = ["/src", "/benches", "LICENSE.txt"]
build = "build.rs" # for cbindgen

[lib]
crate-type = ["cdylib"]
crate-type = ["cdylib","lib"]
bench = false

[badges]
Expand Down
2 changes: 1 addition & 1 deletion TODO.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Release items

* C interface / dylib: deserialize, save/load file, map with bytes output
* Test CFFI
* Demo apps
* Examples in doc

# Post 0.2 quality items

Expand Down
2 changes: 1 addition & 1 deletion examples/ffi.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int main(int argc, char **argv) {
double end = now();
size_t hashmap_len = cmap_hashmap_u64_u64_len(hash);
assert(hashmap_len <= nitems);
assert(hashmap_len >= nitems * 0.99);
assert(hashmap_len >= nitems * 0.9);
printf("Build hashmap of %lld / %lld items: %0.3fs = %0.1f ns/item\n",
(long long)hashmap_len, (long long)nitems, end-start, (end-start)*1e9/nitems);

Expand Down
54 changes: 53 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,59 @@ construct a `CompressedMap` from an empty map.)
This compressed map implementation is most efficient for maps containing
hundreds to hundred-millions of keys, but only a few values.
## Code example
This toy example shows the compression that [`CompressedMap<u64,bool>`] provides
compared to a serialized [`HashMap`](std::collections::HashMap).
```
// Import relevant libraries
use rand::{Rng,thread_rng,distributions::{Bernoulli, Distribution}};
use compressed_map::{
CompressedMap,BuildOptions,
serialized_size,STD_BINCODE_CONFIG
};
use std::collections::HashMap;
// Set up the RNG
let distribution = Bernoulli::new(0.05).unwrap();
let mut rng = thread_rng();
// Create a map with 100k items, about 95% "no" : 5% "yes"
let nitems = 100000;
let mut map = HashMap::new();
for _ in 0..nitems {
map.insert(rng.gen::<u64>(),distribution.sample(&mut rng));
}
// Compress the map
let cmap = CompressedMap::<'_,u64,bool>::build(&map,
&mut BuildOptions::default()).unwrap();
// Query the compressed map: the answer is the same as for the hashmap
// Also count the true proportion so we can compute the Shannon limit
let mut nyes = 0;
for (key,value) in &map {
assert_eq!(value,cmap.query(&key));
nyes += *value as u64;
}
let p = nyes as f64 / nitems as f64;
// How big is the map?
let hash_sersize = serialized_size(&map,STD_BINCODE_CONFIG).unwrap();
let sersize = serialized_size(&cmap,STD_BINCODE_CONFIG).unwrap();
let shannon = nitems as f64 * -(p*p.log2() + (1.-p)*(1.-p).log2());
println!("hashmap={} bytes, cmap={} bytes, ratio={:0.1}",
hash_sersize, sersize, hash_sersize as f64/sersize as f64);
println!("Shannon limit for {:0.2}%={} bytes, overhead={:0.2}%",
p*100., (shannon/8.) as u64, (sersize as f64*8. / shannon - 1.) * 100.);
// Example output:
// hashmap=900008 bytes, cmap=3952 bytes, ratio=227.7, 0.32 bits/key
// Shannon limit for 5.03%=3596 bytes, overhead=9.87%
```
# Random compressed maps
A lower-level building block is [`CompressedRandomMap<K,V>`]. These work much
Expand Down
63 changes: 57 additions & 6 deletions src/nonuniform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,15 +657,17 @@ impl <'a,'b,T> IntoIterator for &'a FilteredVec<'b,T> {

#[cfg(test)]
mod tests {
use rand::{Rng,SeedableRng};
use rand::rngs::StdRng;
use crate::nonuniform::{CompressedMap,BuildOptions};
use crate::STD_BINCODE_CONFIG;
use std::collections::HashMap;
use bincode::{encode_to_vec,decode_from_slice};
use crate as compressed_map;

#[test]
fn test_nonuniform_map() {
use rand::{Rng,SeedableRng};
use rand::rngs::StdRng;
use crate::nonuniform::{CompressedMap,BuildOptions};
use crate::STD_BINCODE_CONFIG;
use std::collections::HashMap;
use bincode::{encode_to_vec,decode_from_slice};

assert!(CompressedMap::<_,_>::build(&HashMap::<u32,u32>::new(), &mut BuildOptions::default()).is_none());
for i in 0u32..100 {
let mut seed = [0u8;32];
Expand Down Expand Up @@ -702,4 +704,53 @@ mod tests {
assert_eq!(compressed_map, deser.unwrap().0);
}
}

#[test]
fn simple_test_nonuniform_map() {
// Import relevant libraries
use rand::{Rng,thread_rng,distributions::{Bernoulli, Distribution}};
use compressed_map::{
CompressedMap,BuildOptions,
serialized_size,STD_BINCODE_CONFIG
};
use std::collections::HashMap;

// Set up the RNG
let distribution = Bernoulli::new(0.05).unwrap();
let mut rng = thread_rng();

// Create a map with 100k items, about 95% "no" : 5% "yes"
let nitems = 100000;
let mut map = HashMap::new();
for _ in 0..nitems {
map.insert(rng.gen::<u64>(),distribution.sample(&mut rng));
}

// Compress the map
let cmap = CompressedMap::<'_,u64,bool>::build(&map,
&mut BuildOptions::default()).unwrap();

// Query the compressed map: the answer is the same as for the hashmap
// Also count the true proportion so we can compute the Shannon limit
let mut nyes = 0;
for (key,value) in &map {
assert_eq!(value,cmap.query(&key));
nyes += *value as u64;
}
let p = nyes as f64 / nitems as f64;

// How big is the map?
let hash_sersize = serialized_size(&map,STD_BINCODE_CONFIG).unwrap();
let sersize = serialized_size(&cmap,STD_BINCODE_CONFIG).unwrap();
let shannon = nitems as f64 * -(p*p.log2() + (1.-p)*(1.-p).log2());
println!("hashmap={} bytes, cmap={} bytes, ratio={:0.1}, {:0.2} bits/key",
hash_sersize, sersize, hash_sersize as f64/sersize as f64,
sersize as f64 * 8. / nitems as f64);
println!("Shannon limit for {:0.2}%={} bytes, overhead={:0.2}%",
p*100., (shannon/8.) as u64, (sersize as f64*8. / shannon - 1.) * 100.);

// Example output:
// hashmap=900008 bytes, cmap=3952 bytes, ratio=227.7, 0.32 bits/key
// Shannon limit for 5.03%=3596 bytes, overhead=9.87%
}
}

0 comments on commit fb897d3

Please sign in to comment.