Skip to content

Commit

Permalink
tentative fix for underflow/overflow issue
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Hamburg committed Nov 14, 2022
1 parent ef19a6c commit 85e081c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 15 deletions.
6 changes: 4 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

# Other v0.2 release items

* Test on Armv7 and x86
* Test on Armv7 and x86, RISC-V and some big-endian system
* Demo apps
* Optimize results for small maps? Here storing the hash key and rounding up to one block (=32 bits) are costly.

# Post 0.2 quality items

* Distinguish between "out of memory", "can't create thread" etc, and "matrix is not invertible"
* Deal with overflow cases with billions of items in nonuniform maps, where 0 rounds up to 1.
* Enable mmap?
* Allow other implementations such as binary fuse filters?

# Performance

Expand All @@ -31,6 +32,7 @@

# Longer term

* Whitepaper
* Make production-quality (1.0).
* no_std core for embedded systems
* Add SSSE3 version? ARM SEV??
Expand Down
54 changes: 41 additions & 13 deletions src/nonuniform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ fn floor_power_of_2(x:Locator) -> Locator {
* ResponseMap: locator interval -> V
*/
fn formulate_plan<'a, V:Ord+Clone+Hash>(counts: HashMap<&'a V,usize>)
-> (Plan, HashMap<&'a V, usize>, Vec<(usize,Locator,Locator)>, ResponseMap<V>)
-> Option<
(Plan, HashMap<&'a V, usize>, Vec<(usize,Locator,Locator)>, ResponseMap<V>)
>
{
/* Deal with special cases */
let nitems = counts.len();
Expand All @@ -73,22 +75,47 @@ fn formulate_plan<'a, V:Ord+Clone+Hash>(counts: HashMap<&'a V,usize>)
interval_vec.push((*c,0,Locator::MAX));
resp.push((0 as Locator,(*x).clone()));
}
return (0,value_map,interval_vec,resp);
return Some((0,value_map,interval_vec,resp));
}

/* Count the weighted total number of items */
let mut total = 0;
for v in counts.values() { total += v; }
debug_assert!(total > 0);

let mut total_width = 0 as Locator;
let mut items = Vec::new();
for (k,v) in counts.iter() {
let count = *v as u128;
let ratio = ((count << Locator::BITS) as u128) / (total as u128);
let width = floor_power_of_2(ratio as Locator);
items.push((k,width,*v));
total_width = total_width.wrapping_add(width);
/* Assign an initial interval size, which is its count/total, as a
* 32-bit binary fraction, rounded down to the next power of 2.
*
* Because we always round down, the sum will always be in the interval
* [2^31, 2^32] and the following code depends on this.
*
* Except: the fraction "0" (indicating < 1/2^32, since all counts are
* actually nonzero) must round up to 1 instead. In that case, the sum
* could in fact overflow. To fix this, we can "fudge" by halving all
* nonzero interval sizes.
*
* TODO: test this fix.
*/
let mut total_width;
let mut items;
let mut fudge = 0;
loop {
items = Vec::new();
total_width = 0u64;
for (k,v) in counts.iter() {
let count = *v as u128;
let ratio = (count << (Locator::BITS-fudge)) / (total as u128);
let width = floor_power_of_2(ratio as Locator);
items.push((k,width,*v));
total_width += width as u64;
}
if Locator::BITS >= 64 || total_width <= (1u64 << Locator::BITS) {
break;
}
fudge += 1;
if fudge > 16 {
return None;
}
}

/* Sort them into priority order by "fit". This is used to expand them
Expand All @@ -105,7 +132,8 @@ fn formulate_plan<'a, V:Ord+Clone+Hash>(counts: HashMap<&'a V,usize>)
items.sort_by(compare_fit);

/* Extend the intervals to the next power of 2 in priority order */
let mut remaining_width = total_width.wrapping_neg();
/* 2u64 << bits-1 = 1u64<<bits, unless bits==64 in which case it's 0 (but without error) */
let mut remaining_width = (2u64 << (Locator::BITS-1)).wrapping_sub(total_width) as Locator;
for i in 0..items.len() {
if remaining_width == 0 { break; }
let expand = min(remaining_width, items[i].1);
Expand Down Expand Up @@ -133,7 +161,7 @@ fn formulate_plan<'a, V:Ord+Clone+Hash>(counts: HashMap<&'a V,usize>)
}

/* Done */
(plan, value_map, interval_vec, resp)
Some((plan, value_map, interval_vec, resp))
}

/// Compressed static functions.
Expand Down Expand Up @@ -229,7 +257,7 @@ impl <'a,K:Hash,V,H:KeyedHasher128> CompressedMap<'a,K,V,H> {
});
}

let (plan, value_map, interval_vec, response_map) = formulate_plan(counts);
let (plan, value_map, interval_vec, response_map) = formulate_plan(counts)?;
let nphases = plan.count_ones() as usize;

/* Record which bits are to be determined in each phase */
Expand Down

0 comments on commit 85e081c

Please sign in to comment.