From 483b29a0d1cf9f4dadf344457c6398e1e541a51b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 19:26:13 +0000 Subject: [PATCH 01/41] Add function declaration for pairwise_sum --- src/numeric_util.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index b36d11605..29b380341 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -6,9 +6,17 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. use std::cmp; - +use std::ops::Add; +use num_traits::{self, Zero}; use crate::LinalgScalar; +pub(crate) fn pairwise_sum(v: &[A]) -> A +where + A: Clone + Add + Zero, +{ + unimplemented!() +} + /// Fold over the manually unrolled `xs` with `f` pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A where A: Clone, From 75860b60e10d515c104f345b32097d87dbcbdb31 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 19:26:59 +0000 Subject: [PATCH 02/41] Base case: array with 512 elements --- src/numeric_util.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 29b380341..db83f7a7a 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -14,7 +14,11 @@ pub(crate) fn pairwise_sum(v: &[A]) -> A where A: Clone + Add + Zero, { - unimplemented!() + if v.len() <= 512 { + unimplemented!() + } else { + unimplemented!() + } } /// Fold over the manually unrolled `xs` with `f` From b8304c0a80aebc3e21ff5841074de354ec993a82 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 19:27:36 +0000 Subject: [PATCH 03/41] Base case: use unrolled_fold --- src/numeric_util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index db83f7a7a..00108c4e1 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -15,7 +15,7 @@ where A: Clone + Add + Zero, { if v.len() <= 512 { - unimplemented!() + return unrolled_fold(v, A::zero, A::add); } else { unimplemented!() } From ec3722a93d07dade4eccc17b80ccedbfd741c15b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 19:30:25 +0000 Subject: [PATCH 04/41] Implemented algorithm for not-base case branch --- src/numeric_util.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 00108c4e1..6ef6d900b 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -14,10 +14,13 @@ pub(crate) fn pairwise_sum(v: &[A]) -> A where A: Clone + Add + Zero, { - if v.len() <= 512 { + let n = v.len(); + if n <= 512 { return unrolled_fold(v, A::zero, A::add); } else { - unimplemented!() + let mid_index = n / 2; + let (v1, v2) = v.split_at(mid_index); + pairwise_sum(v1) + pairwise_sum(v2) } } From 70d32b742bb2f6727b705a2a56223f4e3c4d3407 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 20:52:14 +0000 Subject: [PATCH 05/41] Implemented pairwise summation algorithm for an iterator parameter --- src/numeric/impl_numeric.rs | 6 +++--- src/numeric_util.rs | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 0016e21c5..179e0f852 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -33,14 +33,14 @@ impl ArrayBase where A: Clone + Add + num_traits::Zero, { if let Some(slc) = self.as_slice_memory_order() { - return numeric_util::unrolled_fold(slc, A::zero, A::add); + return numeric_util::pairwise_sum(&slc) } let mut sum = A::zero(); for row in self.inner_rows() { if let Some(slc) = row.as_slice() { - sum = sum + numeric_util::unrolled_fold(slc, A::zero, A::add); + sum = sum + numeric_util::pairwise_sum(&slc); } else { - sum = sum + row.iter().fold(A::zero(), |acc, elt| acc + elt.clone()); + sum = sum + numeric_util::iterator_pairwise_sum(row.iter()); } } sum diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 6ef6d900b..d6beca9cb 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -24,6 +24,23 @@ where } } +pub(crate) fn iterator_pairwise_sum<'a, I, A: 'a>(iter: I) -> A + where + I: Iterator, + A: Clone + Add + Zero, +{ + let mut partial_sums = vec![]; + let mut partial_sum = A::zero(); + for (i, x) in iter.enumerate() { + partial_sum = partial_sum + x.clone(); + if i % 512 == 511 { + partial_sums.push(partial_sum); + partial_sum = A::zero(); + } + } + pairwise_sum(&partial_sums) +} + /// Fold over the manually unrolled `xs` with `f` pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A where A: Clone, From c66dc98ef42038fb75fc2c70279b3c0ece03a5ec Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 21:46:09 +0000 Subject: [PATCH 06/41] Implemented pairwise summation algorithm for an iterator parameter with array items --- src/numeric/impl_numeric.rs | 12 ++++++------ src/numeric_util.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 179e0f852..9551a1d30 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -104,21 +104,21 @@ impl ArrayBase D: RemoveAxis, { let n = self.len_of(axis); - let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); let stride = self.strides()[axis.index()]; + let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); if self.ndim() == 2 && stride == 1 { // contiguous along the axis we are summing let ax = axis.index(); for (i, elt) in enumerate(&mut res) { *elt = self.index_axis(Axis(1 - ax), i).sum(); } + res } else { - for i in 0..n { - let view = self.index_axis(axis, i); - res = res + &view; - } + numeric_util::array_pairwise_sum( + (0..n).map(|i| self.index_axis(axis, i)), + || res.clone() + ) } - res } /// Return mean along `axis`. diff --git a/src/numeric_util.rs b/src/numeric_util.rs index d6beca9cb..5c4cebf66 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -8,6 +8,7 @@ use std::cmp; use std::ops::Add; use num_traits::{self, Zero}; +use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; pub(crate) fn pairwise_sum(v: &[A]) -> A @@ -41,6 +42,34 @@ pub(crate) fn iterator_pairwise_sum<'a, I, A: 'a>(iter: I) -> A pairwise_sum(&partial_sums) } +pub(crate) fn array_pairwise_sum(iter: I, zero: F) -> Array + where + I: Iterator>, + S: Data, + D: Dimension, + A: Clone + Add, + F: Fn() -> Array, +{ + let mut partial_sums = vec![]; + let mut partial_sum = zero(); + for (i, x) in iter.enumerate() { + partial_sum = partial_sum + x; + if i % 512 == 511 { + partial_sums.push(partial_sum); + partial_sum = zero(); + } + } + if partial_sums.len() > 512 { + array_pairwise_sum(partial_sums.into_iter(), zero) + } else { + let mut res = zero(); + for x in partial_sums.iter() { + res = res + x; + } + res + } +} + /// Fold over the manually unrolled `xs` with `f` pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A where A: Clone, From 175f9a2c2c2416291a895ecff17e36d0ac316e12 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 21:49:27 +0000 Subject: [PATCH 07/41] Refactored: use fold to sum --- src/numeric_util.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 5c4cebf66..fd36bd769 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -59,14 +59,16 @@ pub(crate) fn array_pairwise_sum(iter: I, zero: F) -> Array partial_sum = zero(); } } - if partial_sums.len() > 512 { - array_pairwise_sum(partial_sums.into_iter(), zero) + + if partial_sums.len() <= 512 { + partial_sums + .iter() + .fold( + zero(), + |acc, elem| acc + elem + ) } else { - let mut res = zero(); - for x in partial_sums.iter() { - res = res + x; - } - res + array_pairwise_sum(partial_sums.into_iter(), zero) } } From 8e403af379ea64420682e4d16fc5bec43d72516c Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 21:57:39 +0000 Subject: [PATCH 08/41] Refactored: use a constant to reuse size value of recursion base case across implementations --- src/numeric_util.rs | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index fd36bd769..8e3482177 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -11,12 +11,14 @@ use num_traits::{self, Zero}; use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; +const NAIVE_SUM_THRESHOLD: usize = 512; + pub(crate) fn pairwise_sum(v: &[A]) -> A where A: Clone + Add + Zero, { let n = v.len(); - if n <= 512 { + if n <= NAIVE_SUM_THRESHOLD { return unrolled_fold(v, A::zero, A::add); } else { let mid_index = n / 2; @@ -26,15 +28,15 @@ where } pub(crate) fn iterator_pairwise_sum<'a, I, A: 'a>(iter: I) -> A - where - I: Iterator, - A: Clone + Add + Zero, +where + I: Iterator, + A: Clone + Add + Zero, { let mut partial_sums = vec![]; let mut partial_sum = A::zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x.clone(); - if i % 512 == 511 { + if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = A::zero(); } @@ -43,30 +45,25 @@ pub(crate) fn iterator_pairwise_sum<'a, I, A: 'a>(iter: I) -> A } pub(crate) fn array_pairwise_sum(iter: I, zero: F) -> Array - where - I: Iterator>, - S: Data, - D: Dimension, - A: Clone + Add, - F: Fn() -> Array, +where + I: Iterator>, + S: Data, + D: Dimension, + A: Clone + Add, + F: Fn() -> Array, { let mut partial_sums = vec![]; let mut partial_sum = zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x; - if i % 512 == 511 { + if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = zero(); } } - if partial_sums.len() <= 512 { - partial_sums - .iter() - .fold( - zero(), - |acc, elem| acc + elem - ) + if partial_sums.len() <= NAIVE_SUM_THRESHOLD { + partial_sums.iter().fold(zero(), |acc, elem| acc + elem) } else { array_pairwise_sum(partial_sums.into_iter(), zero) } From 465063f7b794d8ae87897bb5b646fefcc807ddf1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 22:16:39 +0000 Subject: [PATCH 09/41] Added documentation --- src/numeric_util.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 8e3482177..e45e38b51 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -11,8 +11,25 @@ use num_traits::{self, Zero}; use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; +/// Size threshold to switch to naive summation in all implementations of pairwise summation. const NAIVE_SUM_THRESHOLD: usize = 512; +/// An implementation of pairwise summation for a vector slice. +/// +/// Pairwise summation compute the sum of a set of *n* numbers by splitting +/// it recursively in two halves, summing their elements and then adding the respective +/// sums. +/// It switches to the naive sum algorithm once the size of the set to be summed +/// is below a certain pre-defined threshold ([`threshold`]). +/// +/// Pairwise summation is useful to reduce the accumulated round-off error +/// when summing floating point numbers. +/// Pairwise summation provides an asymptotic error bound of *O(eps log n)*, where +/// *eps* is machine precision, compared to *O(eps n)* of the naive summation algorithm. +/// For more details, see [`paper`]. +/// +/// [`paper`]: https://epubs.siam.org/doi/10.1137/0914050 +/// [`threshold`]: constant.NAIVE_SUM_THRESHOLD.html pub(crate) fn pairwise_sum(v: &[A]) -> A where A: Clone + Add + Zero, @@ -27,6 +44,11 @@ where } } +/// An implementation of pairwise summation for an iterator. +/// +/// See [`pairwise_sum`] for details on the algorithm. +/// +/// [`pairwise_sum`]: fn.pairwise_sum.html pub(crate) fn iterator_pairwise_sum<'a, I, A: 'a>(iter: I) -> A where I: Iterator, @@ -44,6 +66,11 @@ where pairwise_sum(&partial_sums) } +/// An implementation of pairwise summation for an iterator over *n*-dimensional arrays. +/// +/// See [`pairwise_sum`] for details on the algorithm. +/// +/// [`pairwise_sum`]: fn.pairwise_sum.html pub(crate) fn array_pairwise_sum(iter: I, zero: F) -> Array where I: Iterator>, From 6427d45cdfd6ccda9530e229351605d2f6208e04 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 22:22:42 +0000 Subject: [PATCH 10/41] Minor edits to the docs --- src/numeric_util.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index e45e38b51..bae732438 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -24,11 +24,12 @@ const NAIVE_SUM_THRESHOLD: usize = 512; /// /// Pairwise summation is useful to reduce the accumulated round-off error /// when summing floating point numbers. -/// Pairwise summation provides an asymptotic error bound of *O(eps log n)*, where -/// *eps* is machine precision, compared to *O(eps n)* of the naive summation algorithm. -/// For more details, see [`paper`]. +/// Pairwise summation provides an asymptotic error bound of *O(ε* log *n)*, where +/// *ε* is machine precision, compared to *O(εn)* of the naive summation algorithm. +/// For more details, see [`paper`] or [`Wikipedia`]. /// /// [`paper`]: https://epubs.siam.org/doi/10.1137/0914050 +/// [`Wikipedia`]: https://en.wikipedia.org/wiki/Pairwise_summation /// [`threshold`]: constant.NAIVE_SUM_THRESHOLD.html pub(crate) fn pairwise_sum(v: &[A]) -> A where From 4414450a30641fcd4999c43e5775b6947a9134cc Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 4 Jan 2019 22:51:30 +0000 Subject: [PATCH 11/41] Don't forget to add the sum of the last elements (<512 ending block). --- src/numeric_util.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index bae732438..fdf61360e 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -64,6 +64,8 @@ where partial_sum = A::zero(); } } + partial_sums.push(partial_sum); + pairwise_sum(&partial_sums) } @@ -89,6 +91,7 @@ where partial_sum = zero(); } } + partial_sums.push(partial_sum); if partial_sums.len() <= NAIVE_SUM_THRESHOLD { partial_sums.iter().fold(zero(), |acc, elem| acc + elem) From aeaad0ee8cdd58119fb90064fdf2cc7c96260250 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 12:24:26 +0000 Subject: [PATCH 12/41] Add a benchmark for summing a contiguous array --- benches/numeric.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/benches/numeric.rs b/benches/numeric.rs index db0252c8a..1686d3874 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -1,6 +1,5 @@ #![feature(test)] - extern crate test; use test::Bencher; @@ -25,3 +24,13 @@ fn clip(bench: &mut Bencher) }) }); } + +#[bench] +fn contiguous_sum(bench: &mut Bencher) +{ + let n = 100000; + let a = Array::linspace(-1e6, 1e6, n); + bench.iter(|| { + a.sum() + }); +} From 75109b1e66548f3c9fa246f2d6a32d251ede5814 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 12:30:13 +0000 Subject: [PATCH 13/41] Benchmarks for arrays of different length --- benches/numeric.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/benches/numeric.rs b/benches/numeric.rs index 1686d3874..02c43aa53 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -26,11 +26,32 @@ fn clip(bench: &mut Bencher) } #[bench] -fn contiguous_sum(bench: &mut Bencher) +fn contiguous_sum_1e7(bench: &mut Bencher) { - let n = 100000; + let n = 1e7 as usize; let a = Array::linspace(-1e6, 1e6, n); bench.iter(|| { a.sum() }); } + +#[bench] +fn contiguous_sum_1e4(bench: &mut Bencher) +{ + let n = 1e4 as usize; + let a = Array::linspace(-1e6, 1e6, n); + bench.iter(|| { + a.sum() + }); +} + +#[bench] +fn contiguous_sum_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::linspace(-1e6, 1e6, n); + bench.iter(|| { + a.sum() + }); +} + From 30851943b6f0a5f752d0aa0a5419235b297ce633 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 12:56:01 +0000 Subject: [PATCH 14/41] Don't split midpoint, saving one operation --- src/numeric_util.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index fdf61360e..f7a3f96bc 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -37,10 +37,9 @@ where { let n = v.len(); if n <= NAIVE_SUM_THRESHOLD { - return unrolled_fold(v, A::zero, A::add); + unrolled_fold(v, A::zero, A::add) } else { - let mid_index = n / 2; - let (v1, v2) = v.split_at(mid_index); + let (v1, v2) = v.split_at(NAIVE_SUM_THRESHOLD); pairwise_sum(v1) + pairwise_sum(v2) } } From 797e212c23e8c0569ca419b191071b93468b19af Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 12:56:57 +0000 Subject: [PATCH 15/41] Revert "Don't split midpoint, saving one operation" This reverts commit 30851943b6f0a5f752d0aa0a5419235b297ce633. --- src/numeric_util.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index f7a3f96bc..fdf61360e 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -37,9 +37,10 @@ where { let n = v.len(); if n <= NAIVE_SUM_THRESHOLD { - unrolled_fold(v, A::zero, A::add) + return unrolled_fold(v, A::zero, A::add); } else { - let (v1, v2) = v.split_at(NAIVE_SUM_THRESHOLD); + let mid_index = n / 2; + let (v1, v2) = v.split_at(mid_index); pairwise_sum(v1) + pairwise_sum(v2) } } From d2b636b635b5790fd31ca4ff0acde9ba25b173eb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 13:13:16 +0000 Subject: [PATCH 16/41] Benches for sum_axis --- benches/numeric.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/benches/numeric.rs b/benches/numeric.rs index 02c43aa53..4239652bb 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -55,3 +55,26 @@ fn contiguous_sum_1e2(bench: &mut Bencher) }); } +#[bench] +fn sum_by_row_1e4(bench: &mut Bencher) +{ + let n = 1e3 as usize; + let a = Array::linspace(-1e6, 1e6, n * n) + .into_shape([n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(0)) + }); +} + +#[bench] +fn sum_by_col_1e4(bench: &mut Bencher) +{ + let n = 1e3 as usize; + let a = Array::linspace(-1e6, 1e6, n * n) + .into_shape([n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(1)) + }); +} From b3d2b42325f0b78fe3f1dcbd8bc327bde2f87db8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 5 Jan 2019 13:25:53 +0000 Subject: [PATCH 17/41] Bench for contiguous sum with integer values --- benches/numeric.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/benches/numeric.rs b/benches/numeric.rs index 4239652bb..76d07f1e4 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -25,6 +25,16 @@ fn clip(bench: &mut Bencher) }); } +#[bench] +fn contiguous_sum_int_1e4(bench: &mut Bencher) +{ + let n = 1e4 as usize; + let a = Array::from_vec((0..n).collect()); + bench.iter(|| { + a.sum() + }); +} + #[bench] fn contiguous_sum_1e7(bench: &mut Bencher) { From 8f95705c4ab045e87a077e12e67edcb5f530177f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 9 Jan 2019 08:48:45 +0000 Subject: [PATCH 18/41] Alternative implementation for sum_axis --- src/numeric/impl_numeric.rs | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 9551a1d30..bed2ae78c 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -103,22 +103,11 @@ impl ArrayBase where A: Clone + Zero + Add, D: RemoveAxis, { - let n = self.len_of(axis); - let stride = self.strides()[axis.index()]; - let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); - if self.ndim() == 2 && stride == 1 { - // contiguous along the axis we are summing - let ax = axis.index(); - for (i, elt) in enumerate(&mut res) { - *elt = self.index_axis(Axis(1 - ax), i).sum(); - } - res - } else { - numeric_util::array_pairwise_sum( - (0..n).map(|i| self.index_axis(axis, i)), - || res.clone() - ) - } + let mut out = Array::zeros(self.dim.remove_axis(axis)); + Zip::from(&mut out) + .and(self.lanes(axis)) + .apply(|out, lane| *out = lane.sum()); + out } /// Return mean along `axis`. From 74a74ae2a745b77eee467f90dc18009a53b4fa59 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 9 Jan 2019 08:55:25 +0000 Subject: [PATCH 19/41] Revert "Alternative implementation for sum_axis" This reverts commit 8f95705c4ab045e87a077e12e67edcb5f530177f. --- src/numeric/impl_numeric.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index bed2ae78c..9551a1d30 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -103,11 +103,22 @@ impl ArrayBase where A: Clone + Zero + Add, D: RemoveAxis, { - let mut out = Array::zeros(self.dim.remove_axis(axis)); - Zip::from(&mut out) - .and(self.lanes(axis)) - .apply(|out, lane| *out = lane.sum()); - out + let n = self.len_of(axis); + let stride = self.strides()[axis.index()]; + let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); + if self.ndim() == 2 && stride == 1 { + // contiguous along the axis we are summing + let ax = axis.index(); + for (i, elt) in enumerate(&mut res) { + *elt = self.index_axis(Axis(1 - ax), i).sum(); + } + res + } else { + numeric_util::array_pairwise_sum( + (0..n).map(|i| self.index_axis(axis, i)), + || res.clone() + ) + } } /// Return mean along `axis`. From a592a7dc537072a174fa6c4affdef7c483a122f8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 9 Jan 2019 08:57:23 +0000 Subject: [PATCH 20/41] Ensure equal block size independently of underlying implementation --- src/numeric_util.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index fdf61360e..fe41aa87f 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -12,7 +12,8 @@ use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; /// Size threshold to switch to naive summation in all implementations of pairwise summation. -const NAIVE_SUM_THRESHOLD: usize = 512; +const SIMD_NAIVE_SUM_THRESHOLD: usize = 512; +const NO_SIMD_NAIVE_SUM_THRESHOLD: usize = SIMD_NAIVE_SUM_THRESHOLD / 8; /// An implementation of pairwise summation for a vector slice. /// @@ -36,7 +37,7 @@ where A: Clone + Add + Zero, { let n = v.len(); - if n <= NAIVE_SUM_THRESHOLD { + if n <= SIMD_NAIVE_SUM_THRESHOLD { return unrolled_fold(v, A::zero, A::add); } else { let mid_index = n / 2; @@ -59,7 +60,7 @@ where let mut partial_sum = A::zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x.clone(); - if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { + if i % NO_SIMD_NAIVE_SUM_THRESHOLD == NO_SIMD_NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = A::zero(); } @@ -86,14 +87,14 @@ where let mut partial_sum = zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x; - if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { + if i % NO_SIMD_NAIVE_SUM_THRESHOLD == NO_SIMD_NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = zero(); } } partial_sums.push(partial_sum); - if partial_sums.len() <= NAIVE_SUM_THRESHOLD { + if partial_sums.len() <= NO_SIMD_NAIVE_SUM_THRESHOLD { partial_sums.iter().fold(zero(), |acc, elem| acc + elem) } else { array_pairwise_sum(partial_sums.into_iter(), zero) From f73fb2d449c604f2ab0bd57d3f8f5b2680a76bf2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 07:47:05 +0000 Subject: [PATCH 21/41] Change threshold names --- src/numeric_util.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index fe41aa87f..80047c171 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -12,8 +12,9 @@ use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; /// Size threshold to switch to naive summation in all implementations of pairwise summation. -const SIMD_NAIVE_SUM_THRESHOLD: usize = 512; -const NO_SIMD_NAIVE_SUM_THRESHOLD: usize = SIMD_NAIVE_SUM_THRESHOLD / 8; +const NAIVE_SUM_THRESHOLD: usize = 64; +/// Number of elements processed by unrolled operators (to leverage SIMD instructions). +const UNROLL_SIZE: usize = 8; /// An implementation of pairwise summation for a vector slice. /// @@ -37,7 +38,7 @@ where A: Clone + Add + Zero, { let n = v.len(); - if n <= SIMD_NAIVE_SUM_THRESHOLD { + if n <= NAIVE_SUM_THRESHOLD * UNROLL_SIZE { return unrolled_fold(v, A::zero, A::add); } else { let mid_index = n / 2; @@ -60,7 +61,7 @@ where let mut partial_sum = A::zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x.clone(); - if i % NO_SIMD_NAIVE_SUM_THRESHOLD == NO_SIMD_NAIVE_SUM_THRESHOLD - 1 { + if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = A::zero(); } @@ -87,14 +88,14 @@ where let mut partial_sum = zero(); for (i, x) in iter.enumerate() { partial_sum = partial_sum + x; - if i % NO_SIMD_NAIVE_SUM_THRESHOLD == NO_SIMD_NAIVE_SUM_THRESHOLD - 1 { + if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { partial_sums.push(partial_sum); partial_sum = zero(); } } partial_sums.push(partial_sum); - if partial_sums.len() <= NO_SIMD_NAIVE_SUM_THRESHOLD { + if partial_sums.len() <= NAIVE_SUM_THRESHOLD { partial_sums.iter().fold(zero(), |acc, elem| acc + elem) } else { array_pairwise_sum(partial_sums.into_iter(), zero) From c7fa091c9d160ec5e173783cb350aacf0c5dc368 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 07:55:18 +0000 Subject: [PATCH 22/41] Change sum_axis implementation --- src/numeric/impl_numeric.rs | 10 +++++----- src/numeric_util.rs | 33 +-------------------------------- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 9551a1d30..15ddd789e 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -105,19 +105,19 @@ impl ArrayBase { let n = self.len_of(axis); let stride = self.strides()[axis.index()]; - let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); if self.ndim() == 2 && stride == 1 { // contiguous along the axis we are summing + let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); let ax = axis.index(); for (i, elt) in enumerate(&mut res) { *elt = self.index_axis(Axis(1 - ax), i).sum(); } res + } else if self.len_of(axis) <= numeric_util::NAIVE_SUM_THRESHOLD { + self.fold_axis(axis, A::zero(), |acc, x| acc.clone() + x.clone()) } else { - numeric_util::array_pairwise_sum( - (0..n).map(|i| self.index_axis(axis, i)), - || res.clone() - ) + let (v1, v2) = self.view().split_at(axis, n / 2); + v1.sum_axis(axis) + v2.sum_axis(axis) } } diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 80047c171..f36e81e97 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -12,7 +12,7 @@ use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; /// Size threshold to switch to naive summation in all implementations of pairwise summation. -const NAIVE_SUM_THRESHOLD: usize = 64; +pub(crate) const NAIVE_SUM_THRESHOLD: usize = 64; /// Number of elements processed by unrolled operators (to leverage SIMD instructions). const UNROLL_SIZE: usize = 8; @@ -71,37 +71,6 @@ where pairwise_sum(&partial_sums) } -/// An implementation of pairwise summation for an iterator over *n*-dimensional arrays. -/// -/// See [`pairwise_sum`] for details on the algorithm. -/// -/// [`pairwise_sum`]: fn.pairwise_sum.html -pub(crate) fn array_pairwise_sum(iter: I, zero: F) -> Array -where - I: Iterator>, - S: Data, - D: Dimension, - A: Clone + Add, - F: Fn() -> Array, -{ - let mut partial_sums = vec![]; - let mut partial_sum = zero(); - for (i, x) in iter.enumerate() { - partial_sum = partial_sum + x; - if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { - partial_sums.push(partial_sum); - partial_sum = zero(); - } - } - partial_sums.push(partial_sum); - - if partial_sums.len() <= NAIVE_SUM_THRESHOLD { - partial_sums.iter().fold(zero(), |acc, elem| acc + elem) - } else { - array_pairwise_sum(partial_sums.into_iter(), zero) - } -} - /// Fold over the manually unrolled `xs` with `f` pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A where A: Clone, From f72164ab26b31a62c03ae80de2ddc51e31a2bf62 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 07:59:29 +0000 Subject: [PATCH 23/41] Reduce partial accumulators pairwise in unrolled_fold --- src/numeric_util.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index f36e81e97..ea838b7e4 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -79,7 +79,6 @@ pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A { // eightfold unrolled so that floating point can be vectorized // (even with strict floating point accuracy semantics) - let mut acc = init(); let (mut p0, mut p1, mut p2, mut p3, mut p4, mut p5, mut p6, mut p7) = (init(), init(), init(), init(), @@ -96,18 +95,19 @@ pub fn unrolled_fold(mut xs: &[A], init: I, f: F) -> A xs = &xs[8..]; } - acc = f(acc.clone(), f(p0, p4)); - acc = f(acc.clone(), f(p1, p5)); - acc = f(acc.clone(), f(p2, p6)); - acc = f(acc.clone(), f(p3, p7)); + let (q0, q1, q2, q3) = (f(p0, p4), f(p1, p5), f(p2, p6), f(p3, p7)); + let (r0, r1) = (f(q0, q2), f(q1, q3)); + let unrolled = f(r0, r1); // make it clear to the optimizer that this loop is short // and can not be autovectorized. + let mut partial = init(); for i in 0..xs.len() { if i >= 7 { break; } - acc = f(acc.clone(), xs[i].clone()) + partial = f(partial.clone(), xs[i].clone()) } - acc + + f(unrolled, partial) } /// Compute the dot product. From 9f1c4d261cf23106a117d1ac31b4bf53b7652dd0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 08:00:44 +0000 Subject: [PATCH 24/41] Remove unused imports --- src/numeric_util.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index ea838b7e4..17470446a 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -8,7 +8,6 @@ use std::cmp; use std::ops::Add; use num_traits::{self, Zero}; -use super::{ArrayBase, Array, Data, Dimension}; use crate::LinalgScalar; /// Size threshold to switch to naive summation in all implementations of pairwise summation. From bbc4a756947e5b4bcbb39e5fefa0e3628de860a9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 08:09:41 +0000 Subject: [PATCH 25/41] Get uniform behaviour across all pairwise_sum implementations --- src/numeric_util.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 17470446a..bcd1080e8 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -67,7 +67,25 @@ where } partial_sums.push(partial_sum); - pairwise_sum(&partial_sums) + pure_pairwise_sum(&partial_sums) +} + +/// An implementation of pairwise summation for a vector slice that never +/// switches to the naive sum algorithm. +fn pure_pairwise_sum(v: &[A]) -> A + where + A: Clone + Add + Zero, +{ + let n = v.len(); + match n { + 0 => A::zero(), + 1 => v[0].clone(), + n => { + let mid_index = n / 2; + let (v1, v2) = v.split_at(mid_index); + pure_pairwise_sum(v1) + pure_pairwise_sum(v2) + } + } } /// Fold over the manually unrolled `xs` with `f` From b98e30b2d59d30df7de7238bc29ba36903026e32 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 2 Feb 2019 22:31:07 -0500 Subject: [PATCH 26/41] Add more benchmarks of sum/sum_axis --- benches/numeric.rs | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/benches/numeric.rs b/benches/numeric.rs index 76d07f1e4..5a7c111a5 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -1,7 +1,7 @@ #![feature(test)] extern crate test; -use test::Bencher; +use test::{black_box, Bencher}; extern crate ndarray; use ndarray::prelude::*; @@ -65,6 +65,38 @@ fn contiguous_sum_1e2(bench: &mut Bencher) }); } +#[bench] +fn contiguous_sum_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::linspace(-1e6, 1e6, n * n * n) + .into_shape([n, n, n]) + .unwrap(); + bench.iter(|| black_box(&a).sum()); +} + +#[bench] +fn inner_discontiguous_sum_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::linspace(-1e6, 1e6, n * n * 2*n) + .into_shape([n, n, 2*n]) + .unwrap(); + let v = a.slice(s![.., .., ..;2]); + bench.iter(|| black_box(&v).sum()); +} + +#[bench] +fn middle_discontiguous_sum_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::linspace(-1e6, 1e6, n * 2*n * n) + .into_shape([n, 2*n, n]) + .unwrap(); + let v = a.slice(s![.., ..;2, ..]); + bench.iter(|| black_box(&v).sum()); +} + #[bench] fn sum_by_row_1e4(bench: &mut Bencher) { @@ -88,3 +120,15 @@ fn sum_by_col_1e4(bench: &mut Bencher) a.sum_axis(Axis(1)) }); } + +#[bench] +fn sum_by_middle_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::linspace(-1e6, 1e6, n * n * n) + .into_shape([n, n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(1)) + }); +} From ed88e2e9a8445c1f9dccf1d408f6a79ea1ace17b Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 2 Feb 2019 23:32:56 -0500 Subject: [PATCH 27/41] Improve performance of iterator_pairwise_sum --- src/numeric_util.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index bcd1080e8..23966b9a9 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -56,16 +56,18 @@ where I: Iterator, A: Clone + Add + Zero, { - let mut partial_sums = vec![]; - let mut partial_sum = A::zero(); - for (i, x) in iter.enumerate() { - partial_sum = partial_sum + x.clone(); - if i % NAIVE_SUM_THRESHOLD == NAIVE_SUM_THRESHOLD - 1 { + let (len, _) = iter.size_hint(); + let cap = len.saturating_sub(1) / NAIVE_SUM_THRESHOLD + 1; // ceiling of division + let mut partial_sums = Vec::with_capacity(cap); + let (_, last_sum) = iter.fold((0, A::zero()), |(count, partial_sum), x| { + if count < NAIVE_SUM_THRESHOLD { + (count + 1, partial_sum + x.clone()) + } else { partial_sums.push(partial_sum); - partial_sum = A::zero(); + (1, x.clone()) } - } - partial_sums.push(partial_sum); + }); + partial_sums.push(last_sum); pure_pairwise_sum(&partial_sums) } @@ -205,3 +207,17 @@ pub fn unrolled_eq(xs: &[A], ys: &[A]) -> bool true } + +#[cfg(test)] +mod tests { + use quickcheck::quickcheck; + use std::num::Wrapping; + use super::iterator_pairwise_sum; + + quickcheck! { + fn iterator_pairwise_sum_is_correct(xs: Vec) -> bool { + let xs: Vec<_> = xs.into_iter().map(|x| Wrapping(x)).collect(); + iterator_pairwise_sum(xs.iter()) == xs.iter().sum() + } + } +} From e7835ee671eb547bab955251eeda9bcc6a75f38f Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 2 Feb 2019 23:40:02 -0500 Subject: [PATCH 28/41] Make sum pairwise over all dimensions --- src/numeric/impl_numeric.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 15ddd789e..7c417a1aa 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -33,17 +33,10 @@ impl ArrayBase where A: Clone + Add + num_traits::Zero, { if let Some(slc) = self.as_slice_memory_order() { - return numeric_util::pairwise_sum(&slc) - } - let mut sum = A::zero(); - for row in self.inner_rows() { - if let Some(slc) = row.as_slice() { - sum = sum + numeric_util::pairwise_sum(&slc); - } else { - sum = sum + numeric_util::iterator_pairwise_sum(row.iter()); - } + numeric_util::pairwise_sum(&slc) + } else { + numeric_util::iterator_pairwise_sum(self.iter()) } - sum } /// Return the sum of all elements in the array. From 8301c25bb242bd788bbaf6359a30d2e286c3753a Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 2 Feb 2019 23:59:30 -0500 Subject: [PATCH 29/41] Implement contiguous sum_axis in terms of Zip This is slightly faster and is easier to understand. --- src/numeric/impl_numeric.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 7c417a1aa..0dd273525 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -8,7 +8,6 @@ use std::ops::{Add, Div, Mul}; use num_traits::{self, Zero, Float, FromPrimitive}; -use itertools::free::enumerate; use crate::imp_prelude::*; use crate::numeric_util; @@ -97,14 +96,12 @@ impl ArrayBase D: RemoveAxis, { let n = self.len_of(axis); - let stride = self.strides()[axis.index()]; - if self.ndim() == 2 && stride == 1 { + if self.stride_of(axis) == 1 { // contiguous along the axis we are summing let mut res = Array::zeros(self.raw_dim().remove_axis(axis)); - let ax = axis.index(); - for (i, elt) in enumerate(&mut res) { - *elt = self.index_axis(Axis(1 - ax), i).sum(); - } + Zip::from(&mut res) + .and(self.lanes(axis)) + .apply(|sum, lane| *sum = lane.sum()); res } else if self.len_of(axis) <= numeric_util::NAIVE_SUM_THRESHOLD { self.fold_axis(axis, A::zero(), |acc, x| acc.clone() + x.clone()) From 82453dfc73a217acc24fa85aa7f9108e28da5713 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 00:02:24 -0500 Subject: [PATCH 30/41] Remove redundant len_of call --- src/numeric/impl_numeric.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 0dd273525..0be4a0ce0 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -103,7 +103,7 @@ impl ArrayBase .and(self.lanes(axis)) .apply(|sum, lane| *sum = lane.sum()); res - } else if self.len_of(axis) <= numeric_util::NAIVE_SUM_THRESHOLD { + } else if n <= numeric_util::NAIVE_SUM_THRESHOLD { self.fold_axis(axis, A::zero(), |acc, x| acc.clone() + x.clone()) } else { let (v1, v2) = self.view().split_at(axis, n / 2); From 978f45a29a11608c542994c6b029c893a3ff4afb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 3 Feb 2019 16:30:04 +0000 Subject: [PATCH 31/41] Added test for axis independence --- src/numeric/impl_numeric.rs | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 0be4a0ce0..061b574db 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -277,3 +277,63 @@ impl ArrayBase } } +#[cfg(test)] +mod tests { + use super::*; + use self::Array; + use quickcheck::quickcheck; + + quickcheck! { + fn sum_value_does_not_depend_on_axis(xs: Vec) -> bool { + // We want three axis of equal length - we drop some elements + // to get the right number + let axis_length = (xs.len() as f64).cbrt().floor() as usize; + let xs = &xs[..axis_length.pow(3)]; + + // We want to check that summing with respect to an axis + // is independent from the specific underlying implementation of + // pairwise sum, which is itself conditional on the arrangement + // in memory of the array elements. + // We will thus swap axes and compute the sum, in turn, with respect to + // axes 0, 1 and 2, while making sure that mathematically the same + // number should be spit out (because we are properly transposing before summing). + if axis_length > 0 { + let a = Array::from_vec(xs.to_vec()) + .into_shape((axis_length, axis_length, axis_length)) + .unwrap(); + assert!(a.is_standard_layout()); + let sum1 = a.sum_axis(Axis(0)); + + let mut b = Array::zeros(a.raw_dim()); + assert!(b.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + b[(i, j, k)] = a[(j, i, k)].clone(); + } + } + } + let sum2 = b.sum_axis(Axis(1)); + + let mut c = Array::zeros(a.raw_dim()); + assert!(c.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + c[(i, j, k)] = a[(k, i, j)].clone(); + } + } + } + let sum3 = c.sum_axis(Axis(2)); + + let tol = 1e-10; + let first = (sum2.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let second = (sum3.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let third = (sum3.clone() - sum2.clone()).iter().all(|x| x.abs() < tol); + first && second && third + } else { + true + } + } + } +} \ No newline at end of file From fa0ba30c25619c69efeaad9e30218ba84bde4ad7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 3 Feb 2019 16:57:36 +0000 Subject: [PATCH 32/41] Make sure we actually exercise the pairwise technique --- Cargo.toml | 1 + src/lib.rs | 2 + src/numeric/impl_numeric.rs | 105 +++++++++++++++++++++--------------- src/numeric_util.rs | 7 ++- 4 files changed, 72 insertions(+), 43 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2f4f8bb0a..bdab7df52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ serde = { version = "1.0", optional = true } defmac = "0.2" quickcheck = { version = "0.7.2", default-features = false } rawpointer = "0.1" +rand = "0.5.5" [features] # Enable blas usage diff --git a/src/lib.rs b/src/lib.rs index d998be49f..39f102e6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,6 +104,8 @@ extern crate num_integer; #[cfg(test)] extern crate quickcheck; +#[cfg(test)] +extern crate rand; #[cfg(feature = "docs")] pub mod doc; diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 061b574db..261429651 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -280,60 +280,81 @@ impl ArrayBase #[cfg(test)] mod tests { use super::*; + use super::numeric_util::{NAIVE_SUM_THRESHOLD, UNROLL_SIZE}; use self::Array; - use quickcheck::quickcheck; + use quickcheck::{QuickCheck, StdGen, TestResult}; - quickcheck! { - fn sum_value_does_not_depend_on_axis(xs: Vec) -> bool { - // We want three axis of equal length - we drop some elements - // to get the right number - let axis_length = (xs.len() as f64).cbrt().floor() as usize; - let xs = &xs[..axis_length.pow(3)]; + #[test] + fn test_sum_value_does_not_depend_on_axis() { + // `size` controls the length of the array of data + // We set it to be randomly drawn between 0 and + // a number larger than NAIVE_SUM_THRESHOLD * UNROLL_SIZE + let rng = StdGen::new( + rand::thread_rng(), + 5* (NAIVE_SUM_THRESHOLD * UNROLL_SIZE).pow(3) + ); + let mut quickcheck = QuickCheck::new().gen(rng).tests(100); + quickcheck.quickcheck( + _sum_value_does_not_depend_on_axis + as fn( + Vec + ) -> TestResult, + ); + } + + fn _sum_value_does_not_depend_on_axis(xs: Vec) -> TestResult { + // We want three axis of equal length - we drop some elements + // to get the right number + let axis_length = (xs.len() as f64).cbrt().floor() as usize; + let xs = &xs[..axis_length.pow(3)]; - // We want to check that summing with respect to an axis - // is independent from the specific underlying implementation of - // pairwise sum, which is itself conditional on the arrangement - // in memory of the array elements. - // We will thus swap axes and compute the sum, in turn, with respect to - // axes 0, 1 and 2, while making sure that mathematically the same - // number should be spit out (because we are properly transposing before summing). - if axis_length > 0 { - let a = Array::from_vec(xs.to_vec()) - .into_shape((axis_length, axis_length, axis_length)) - .unwrap(); - assert!(a.is_standard_layout()); - let sum1 = a.sum_axis(Axis(0)); + // We want to check that summing with respect to an axis + // is independent from the specific underlying implementation of + // pairwise sum, which is itself conditional on the arrangement + // in memory of the array elements. + // We will thus swap axes and compute the sum, in turn, with respect to + // axes 0, 1 and 2, while making sure that mathematically the same + // number should be spit out (because we are properly transposing before summing). + if axis_length > 0 { + let a = Array::from_vec(xs.to_vec()) + .into_shape((axis_length, axis_length, axis_length)) + .unwrap(); + assert!(a.is_standard_layout()); + let sum1 = a.sum_axis(Axis(0)); - let mut b = Array::zeros(a.raw_dim()); - assert!(b.is_standard_layout()); - for i in 0..axis_length { - for j in 0..axis_length { - for k in 0..axis_length { - b[(i, j, k)] = a[(j, i, k)].clone(); - } + let mut b = Array::zeros(a.raw_dim()); + assert!(b.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + b[(i, j, k)] = a[(j, i, k)].clone(); } } - let sum2 = b.sum_axis(Axis(1)); + } + let sum2 = b.sum_axis(Axis(1)); - let mut c = Array::zeros(a.raw_dim()); - assert!(c.is_standard_layout()); - for i in 0..axis_length { - for j in 0..axis_length { - for k in 0..axis_length { - c[(i, j, k)] = a[(k, i, j)].clone(); - } + let mut c = Array::zeros(a.raw_dim()); + assert!(c.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + c[(i, j, k)] = a[(k, i, j)].clone(); } } - let sum3 = c.sum_axis(Axis(2)); + } + let sum3 = c.sum_axis(Axis(2)); - let tol = 1e-10; - let first = (sum2.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); - let second = (sum3.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); - let third = (sum3.clone() - sum2.clone()).iter().all(|x| x.abs() < tol); - first && second && third + let tol = 1e-10; + let first = (sum2.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let second = (sum3.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let third = (sum3.clone() - sum2.clone()).iter().all(|x| x.abs() < tol); + if first && second && third { + TestResult::passed() } else { - true + TestResult::failed() } + } else { + TestResult::passed() } } } \ No newline at end of file diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 23966b9a9..2557f2c68 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -11,9 +11,14 @@ use num_traits::{self, Zero}; use crate::LinalgScalar; /// Size threshold to switch to naive summation in all implementations of pairwise summation. +#[cfg(not(test))] pub(crate) const NAIVE_SUM_THRESHOLD: usize = 64; +// Set it to a smaller number for testing purposes +#[cfg(test)] +pub(crate) const NAIVE_SUM_THRESHOLD: usize = 2; + /// Number of elements processed by unrolled operators (to leverage SIMD instructions). -const UNROLL_SIZE: usize = 8; +pub(crate) const UNROLL_SIZE: usize = 8; /// An implementation of pairwise summation for a vector slice. /// From b4136d77a80110f71dc85ee0f2cd06228573c394 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 3 Feb 2019 17:15:31 +0000 Subject: [PATCH 33/41] Test discontinuous arrays --- src/numeric/impl_numeric.rs | 114 ++++++++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 24 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index 261429651..b8c393372 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -281,7 +281,7 @@ impl ArrayBase mod tests { use super::*; use super::numeric_util::{NAIVE_SUM_THRESHOLD, UNROLL_SIZE}; - use self::Array; + use self::{Array, s}; use quickcheck::{QuickCheck, StdGen, TestResult}; #[test] @@ -316,38 +316,70 @@ mod tests { // axes 0, 1 and 2, while making sure that mathematically the same // number should be spit out (because we are properly transposing before summing). if axis_length > 0 { - let a = Array::from_vec(xs.to_vec()) - .into_shape((axis_length, axis_length, axis_length)) - .unwrap(); - assert!(a.is_standard_layout()); - let sum1 = a.sum_axis(Axis(0)); + let (a, b, c) = equivalent_arrays(xs.to_vec(), axis_length); - let mut b = Array::zeros(a.raw_dim()); - assert!(b.is_standard_layout()); - for i in 0..axis_length { - for j in 0..axis_length { - for k in 0..axis_length { - b[(i, j, k)] = a[(j, i, k)].clone(); - } - } - } + let sum1 = a.sum_axis(Axis(0)); let sum2 = b.sum_axis(Axis(1)); + let sum3 = c.sum_axis(Axis(2)); - let mut c = Array::zeros(a.raw_dim()); - assert!(c.is_standard_layout()); - for i in 0..axis_length { - for j in 0..axis_length { - for k in 0..axis_length { - c[(i, j, k)] = a[(k, i, j)].clone(); - } - } + let tol = 1e-10; + let first = (sum2.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let second = (sum3.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); + let third = (sum3.clone() - sum2.clone()).iter().all(|x| x.abs() < tol); + + if first && second && third { + TestResult::passed() + } else { + TestResult::failed() } - let sum3 = c.sum_axis(Axis(2)); + } else { + TestResult::passed() + } + } + + #[test] + fn test_sum_value_does_not_depend_on_axis_with_discontinuous_array() { + // `size` controls the length of the array of data + // We set it to be randomly drawn between 0 and + // a number larger than NAIVE_SUM_THRESHOLD * UNROLL_SIZE + let rng = StdGen::new( + rand::thread_rng(), + 5* (NAIVE_SUM_THRESHOLD * UNROLL_SIZE).pow(3) + ); + let mut quickcheck = QuickCheck::new().gen(rng).tests(100); + quickcheck.quickcheck( + _sum_value_does_not_depend_on_axis_w_discontinuous_array + as fn( + Vec + ) -> TestResult, + ); + } + + fn _sum_value_does_not_depend_on_axis_w_discontinuous_array(xs: Vec) -> TestResult { + // We want three axis of equal length - we drop some elements + // to get the right number + let axis_length = (xs.len() as f64).cbrt().floor() as usize; + let xs = &xs[..axis_length.pow(3)]; + + // We want to check that summing with respect to an axis + // is independent from the specific underlying implementation of + // pairwise sum, which is itself conditional on the arrangement + // in memory of the array elements. + // We will thus swap axes and compute the sum, in turn, with respect to + // axes 0, 1 and 2, while making sure that mathematically the same + // number should be spit out (because we are properly transposing before summing). + if axis_length > 0 { + let (a, b, c) = equivalent_arrays(xs.to_vec(), axis_length); + + let sum1 = a.slice(s![..;2, .., ..]).sum_axis(Axis(0)); + let sum2 = b.slice(s![.., ..;2, ..]).sum_axis(Axis(1)); + let sum3 = c.slice(s![.., .., ..;2]).sum_axis(Axis(2)); let tol = 1e-10; let first = (sum2.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); let second = (sum3.clone() - sum1.clone()).iter().all(|x| x.abs() < tol); let third = (sum3.clone() - sum2.clone()).iter().all(|x| x.abs() < tol); + if first && second && third { TestResult::passed() } else { @@ -357,4 +389,38 @@ mod tests { TestResult::passed() } } + + // Given a vector with axis_length^3 elements, it returns three arrays, + // built using the vector elements, such that (mathematically): + // a.sum_axis(Axis(0) == b.sum_axis(Axis(1)) == c.sum_axis(Axis(2)) + fn equivalent_arrays(xs: Vec, axis_length: usize) -> (Array3, Array3, Array3) { + assert!(xs.len() == axis_length.pow(3)); + + let a = Array::from_vec(xs) + .into_shape((axis_length, axis_length, axis_length)) + .unwrap(); + assert!(a.is_standard_layout()); + + let mut b = Array::zeros(a.raw_dim()); + assert!(b.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + b[(i, j, k)] = a[(j, i, k)].clone(); + } + } + } + + let mut c = Array::zeros(a.raw_dim()); + assert!(c.is_standard_layout()); + for i in 0..axis_length { + for j in 0..axis_length { + for k in 0..axis_length { + c[(i, j, k)] = a[(k, i, j)].clone(); + } + } + } + return (a, b, c) + } + } \ No newline at end of file From 4a63cb3c5669c4463e45b302a983e5266e7a1691 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 3 Feb 2019 17:26:52 +0000 Subject: [PATCH 34/41] Add more integer benchmark equivalents --- benches/numeric.rs | 103 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 7 deletions(-) diff --git a/benches/numeric.rs b/benches/numeric.rs index 5a7c111a5..d94ad0835 100644 --- a/benches/numeric.rs +++ b/benches/numeric.rs @@ -25,21 +25,22 @@ fn clip(bench: &mut Bencher) }); } + #[bench] -fn contiguous_sum_int_1e4(bench: &mut Bencher) +fn contiguous_sum_1e7(bench: &mut Bencher) { - let n = 1e4 as usize; - let a = Array::from_vec((0..n).collect()); + let n = 1e7 as usize; + let a = Array::linspace(-1e6, 1e6, n); bench.iter(|| { a.sum() }); } #[bench] -fn contiguous_sum_1e7(bench: &mut Bencher) +fn contiguous_sum_int_1e7(bench: &mut Bencher) { let n = 1e7 as usize; - let a = Array::linspace(-1e6, 1e6, n); + let a = Array::from_vec((0..n).collect()); bench.iter(|| { a.sum() }); @@ -55,6 +56,16 @@ fn contiguous_sum_1e4(bench: &mut Bencher) }); } +#[bench] +fn contiguous_sum_int_1e4(bench: &mut Bencher) +{ + let n = 1e4 as usize; + let a = Array::from_vec((0..n).collect()); + bench.iter(|| { + a.sum() + }); +} + #[bench] fn contiguous_sum_1e2(bench: &mut Bencher) { @@ -65,6 +76,16 @@ fn contiguous_sum_1e2(bench: &mut Bencher) }); } +#[bench] +fn contiguous_sum_int_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::from_vec((0..n).collect()); + bench.iter(|| { + a.sum() + }); +} + #[bench] fn contiguous_sum_ix3_1e2(bench: &mut Bencher) { @@ -75,6 +96,16 @@ fn contiguous_sum_ix3_1e2(bench: &mut Bencher) bench.iter(|| black_box(&a).sum()); } +#[bench] +fn contiguous_sum_int_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::from_vec((0..n.pow(3)).collect()) + .into_shape([n, n, n]) + .unwrap(); + bench.iter(|| black_box(&a).sum()); +} + #[bench] fn inner_discontiguous_sum_ix3_1e2(bench: &mut Bencher) { @@ -86,6 +117,17 @@ fn inner_discontiguous_sum_ix3_1e2(bench: &mut Bencher) bench.iter(|| black_box(&v).sum()); } +#[bench] +fn inner_discontiguous_sum_int_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::from_vec((0..(n.pow(3) * 2)).collect()) + .into_shape([n, n, 2*n]) + .unwrap(); + let v = a.slice(s![.., .., ..;2]); + bench.iter(|| black_box(&v).sum()); +} + #[bench] fn middle_discontiguous_sum_ix3_1e2(bench: &mut Bencher) { @@ -97,10 +139,21 @@ fn middle_discontiguous_sum_ix3_1e2(bench: &mut Bencher) bench.iter(|| black_box(&v).sum()); } +#[bench] +fn middle_discontiguous_sum_int_ix3_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::from_vec((0..(n.pow(3) * 2)).collect()) + .into_shape([n, 2*n, n]) + .unwrap(); + let v = a.slice(s![.., ..;2, ..]); + bench.iter(|| black_box(&v).sum()); +} + #[bench] fn sum_by_row_1e4(bench: &mut Bencher) { - let n = 1e3 as usize; + let n = 1e4 as usize; let a = Array::linspace(-1e6, 1e6, n * n) .into_shape([n, n]) .unwrap(); @@ -109,10 +162,22 @@ fn sum_by_row_1e4(bench: &mut Bencher) }); } +#[bench] +fn sum_by_row_int_1e4(bench: &mut Bencher) +{ + let n = 1e4 as usize; + let a = Array::from_vec((0..n.pow(2)).collect()) + .into_shape([n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(0)) + }); +} + #[bench] fn sum_by_col_1e4(bench: &mut Bencher) { - let n = 1e3 as usize; + let n = 1e4 as usize; let a = Array::linspace(-1e6, 1e6, n * n) .into_shape([n, n]) .unwrap(); @@ -121,6 +186,18 @@ fn sum_by_col_1e4(bench: &mut Bencher) }); } +#[bench] +fn sum_by_col_int_1e4(bench: &mut Bencher) +{ + let n = 1e4 as usize; + let a = Array::from_vec((0..n.pow(2)).collect()) + .into_shape([n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(1)) + }); +} + #[bench] fn sum_by_middle_1e2(bench: &mut Bencher) { @@ -132,3 +209,15 @@ fn sum_by_middle_1e2(bench: &mut Bencher) a.sum_axis(Axis(1)) }); } + +#[bench] +fn sum_by_middle_int_1e2(bench: &mut Bencher) +{ + let n = 1e2 as usize; + let a = Array::from_vec((0..n.pow(3)).collect()) + .into_shape([n, n, n]) + .unwrap(); + bench.iter(|| { + a.sum_axis(Axis(1)) + }); +} From f306b5f2812652a2763ae86520cce18328454366 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 18:11:37 -0500 Subject: [PATCH 35/41] Fix min_stride_axis to prefer axes with length > 1 --- src/dimension/dimension_trait.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dimension/dimension_trait.rs b/src/dimension/dimension_trait.rs index f8357f069..ba42ead07 100644 --- a/src/dimension/dimension_trait.rs +++ b/src/dimension/dimension_trait.rs @@ -291,8 +291,8 @@ pub trait Dimension : Clone + Eq + Debug + Send + Sync + Default + indices } - /// Compute the minimum stride axis (absolute value), under the constraint - /// that the length of the axis is > 1; + /// Compute the minimum stride axis (absolute value), preferring axes with + /// length > 1. #[doc(hidden)] fn min_stride_axis(&self, strides: &Self) -> Axis { let n = match self.ndim() { @@ -301,7 +301,7 @@ pub trait Dimension : Clone + Eq + Debug + Send + Sync + Default + n => n, }; axes_of(self, strides) - .rev() + .filter(|ax| ax.len() > 1) .min_by_key(|ax| ax.stride().abs()) .map_or(Axis(n - 1), |ax| ax.axis()) } @@ -588,9 +588,9 @@ impl Dimension for Dim<[Ix; 2]> { #[inline] fn min_stride_axis(&self, strides: &Self) -> Axis { - let s = get!(strides, 0) as Ixs; - let t = get!(strides, 1) as Ixs; - if s.abs() < t.abs() { + let s = (get!(strides, 0) as isize).abs(); + let t = (get!(strides, 1) as isize).abs(); + if s < t && get!(self, 0) > 1 { Axis(0) } else { Axis(1) From b7951df25e396e3d8cfd9b1b674331314c937c45 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 18:34:13 -0500 Subject: [PATCH 36/41] Specialize min_stride_axis for Ix3 --- src/dimension/dimension_trait.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/dimension/dimension_trait.rs b/src/dimension/dimension_trait.rs index ba42ead07..038e5473c 100644 --- a/src/dimension/dimension_trait.rs +++ b/src/dimension/dimension_trait.rs @@ -697,6 +697,23 @@ impl Dimension for Dim<[Ix; 3]> { Some(Ix3(i, j, k)) } + #[inline] + fn min_stride_axis(&self, strides: &Self) -> Axis { + let s = (get!(strides, 0) as isize).abs(); + let t = (get!(strides, 1) as isize).abs(); + let u = (get!(strides, 2) as isize).abs(); + let (argmin, min) = if t < u && get!(self, 1) > 1 { + (Axis(1), t) + } else { + (Axis(2), u) + }; + if s < min && get!(self, 0) > 1 { + Axis(0) + } else { + argmin + } + } + /// Self is an index, return the stride offset #[inline] fn stride_offset(index: &Self, strides: &Self) -> isize { From 3326de4b8c80aaa4cbccc9068ede3dcc92d6036d Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 18:09:09 -0500 Subject: [PATCH 37/41] Enable min_stride_axis as pub(crate) method --- src/impl_methods.rs | 46 +++++++++++++++++++++++++++++++++++++++++---- tests/dimension.rs | 31 ------------------------------ 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/impl_methods.rs b/src/impl_methods.rs index de74af795..02e81736a 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -1619,12 +1619,11 @@ where axes_of(&self.dim, &self.strides) } - /* - /// Return the axis with the least stride (by absolute value) - pub fn min_stride_axis(&self) -> Axis { + /// Return the axis with the least stride (by absolute value), + /// preferring axes with len > 1. + pub(crate) fn min_stride_axis(&self) -> Axis { self.dim.min_stride_axis(&self.strides) } - */ /// Return the axis with the greatest stride (by absolute value), /// preferring axes with len > 1. @@ -2103,3 +2102,42 @@ where }) } } + +#[cfg(test)] +mod tests { + use crate::prelude::*; + + #[test] + fn min_stride_axis() { + let a = Array1::::zeros(10); + assert_eq!(a.min_stride_axis(), Axis(0)); + + let a = Array2::::zeros((3, 3)); + assert_eq!(a.min_stride_axis(), Axis(1)); + assert_eq!(a.t().min_stride_axis(), Axis(0)); + + let a = ArrayD::::zeros(vec![3, 3]); + assert_eq!(a.min_stride_axis(), Axis(1)); + assert_eq!(a.t().min_stride_axis(), Axis(0)); + + let min_axis = a.axes().min_by_key(|t| t.2.abs()).unwrap().axis(); + assert_eq!(min_axis, Axis(1)); + + let mut b = ArrayD::::zeros(vec![2, 3, 4, 5]); + assert_eq!(b.min_stride_axis(), Axis(3)); + for ax in 0..3 { + b.swap_axes(3, ax); + assert_eq!(b.min_stride_axis(), Axis(ax)); + b.swap_axes(3, ax); + } + let mut v = b.view(); + v.collapse_axis(Axis(3), 0); + assert_eq!(v.min_stride_axis(), Axis(2)); + + let a = Array2::::zeros((3, 3)); + let v = a.broadcast((8, 3, 3)).unwrap(); + assert_eq!(v.min_stride_axis(), Axis(0)); + let v2 = a.broadcast((1, 3, 3)).unwrap(); + assert_eq!(v2.min_stride_axis(), Axis(2)); + } +} diff --git a/tests/dimension.rs b/tests/dimension.rs index c76b8d7ad..11d6b3e0f 100644 --- a/tests/dimension.rs +++ b/tests/dimension.rs @@ -132,37 +132,6 @@ fn fastest_varying_order() { type ArrayF32 = Array; -/* -#[test] -fn min_stride_axis() { - let a = ArrayF32::zeros(10); - assert_eq!(a.min_stride_axis(), Axis(0)); - - let a = ArrayF32::zeros((3, 3)); - assert_eq!(a.min_stride_axis(), Axis(1)); - assert_eq!(a.t().min_stride_axis(), Axis(0)); - - let a = ArrayF32::zeros(vec![3, 3]); - assert_eq!(a.min_stride_axis(), Axis(1)); - assert_eq!(a.t().min_stride_axis(), Axis(0)); - - let min_axis = a.axes().min_by_key(|t| t.2.abs()).unwrap().axis(); - assert_eq!(min_axis, Axis(1)); - - let mut b = ArrayF32::zeros(vec![2, 3, 4, 5]); - assert_eq!(b.min_stride_axis(), Axis(3)); - for ax in 0..3 { - b.swap_axes(3, ax); - assert_eq!(b.min_stride_axis(), Axis(ax)); - b.swap_axes(3, ax); - } - - let a = ArrayF32::zeros((3, 3)); - let v = a.broadcast((8, 3, 3)).unwrap(); - assert_eq!(v.min_stride_axis(), Axis(0)); -} -*/ - #[test] fn max_stride_axis() { let a = ArrayF32::zeros(10); From 65b6046b0d7e5e1060b81b04d564decde8dc5e66 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 18:20:12 -0500 Subject: [PATCH 38/41] Simplify fold to use min_stride_axis This causes no change in performance according to the relevant benchmarks in `bench1`. --- src/impl_methods.rs | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/src/impl_methods.rs b/src/impl_methods.rs index 02e81736a..2e2c632e9 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -1853,25 +1853,11 @@ where } else { let mut v = self.view(); // put the narrowest axis at the last position - match v.ndim() { - 0 | 1 => {} - 2 => { - if self.len_of(Axis(1)) <= 1 - || self.len_of(Axis(0)) > 1 - && self.stride_of(Axis(0)).abs() < self.stride_of(Axis(1)).abs() - { - v.swap_axes(0, 1); - } - } - n => { - let last = n - 1; - let narrow_axis = v - .axes() - .filter(|ax| ax.len() > 1) - .min_by_key(|ax| ax.stride().abs()) - .map_or(last, |ax| ax.axis().index()); - v.swap_axes(last, narrow_axis); - } + let n = v.ndim(); + if n > 1 { + let last = n - 1; + let narrow_axis = self.min_stride_axis(); + v.swap_axes(last, narrow_axis.index()); } v.into_elements_base().fold(init, f) } From b0b391aa878a2839a8952fe1d5c6bb8fff6ced2d Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 18:31:30 -0500 Subject: [PATCH 39/41] Improve performance of sum in certain cases This improves the performance of `sum` when an axis is contiguous but the array as a whole is not contiguous. --- src/numeric/impl_numeric.rs | 13 ++++++++++--- src/numeric_util.rs | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/numeric/impl_numeric.rs b/src/numeric/impl_numeric.rs index b8c393372..cf2c0eafd 100644 --- a/src/numeric/impl_numeric.rs +++ b/src/numeric/impl_numeric.rs @@ -32,10 +32,17 @@ impl ArrayBase where A: Clone + Add + num_traits::Zero, { if let Some(slc) = self.as_slice_memory_order() { - numeric_util::pairwise_sum(&slc) - } else { - numeric_util::iterator_pairwise_sum(self.iter()) + return numeric_util::pairwise_sum(&slc); + } + if self.ndim() > 1 { + let ax = self.dim.min_stride_axis(&self.strides); + if self.len_of(ax) >= numeric_util::UNROLL_SIZE && self.stride_of(ax) == 1 { + let partial_sums: Vec<_> = + self.lanes(ax).into_iter().map(|lane| lane.sum()).collect(); + return numeric_util::pure_pairwise_sum(&partial_sums); + } } + numeric_util::iterator_pairwise_sum(self.iter()) } /// Return the sum of all elements in the array. diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 2557f2c68..855a1b803 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -79,7 +79,7 @@ where /// An implementation of pairwise summation for a vector slice that never /// switches to the naive sum algorithm. -fn pure_pairwise_sum(v: &[A]) -> A +pub(crate) fn pure_pairwise_sum(v: &[A]) -> A where A: Clone + Add + Zero, { From 7f04e6fa56b945330278b3e9332b05611165d378 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 13:40:39 -0500 Subject: [PATCH 40/41] Update quickcheck and use quickcheck_macros --- Cargo.toml | 3 +- src/dimension/mod.rs | 157 +++++++++++++++++++++---------------------- src/lib.rs | 2 + src/numeric_util.rs | 10 ++- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bdab7df52..ce3b563c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,8 @@ serde = { version = "1.0", optional = true } [dev-dependencies] defmac = "0.2" -quickcheck = { version = "0.7.2", default-features = false } +quickcheck = { version = "0.8.1", default-features = false } +quickcheck_macros = "0.8" rawpointer = "0.1" rand = "0.5.5" diff --git a/src/dimension/mod.rs b/src/dimension/mod.rs index aebcfc662..719acc678 100644 --- a/src/dimension/mod.rs +++ b/src/dimension/mod.rs @@ -629,7 +629,8 @@ mod test { use crate::error::{from_kind, ErrorKind}; use crate::slice::Slice; use num_integer::gcd; - use quickcheck::{quickcheck, TestResult}; + use quickcheck::TestResult; + use quickcheck_macros::quickcheck; #[test] fn slice_indexing_uncommon_strides() { @@ -738,30 +739,29 @@ mod test { can_index_slice::<(), _>(&[], &Ix2(0, 2), &Ix2(2, 1)).unwrap_err(); } - quickcheck! { - fn can_index_slice_not_custom_same_as_can_index_slice(data: Vec, dim: Vec) -> bool { - let dim = IxDyn(&dim); - let result = can_index_slice_not_custom(&data, &dim); - if dim.size_checked().is_none() { - // Avoid overflow `dim.default_strides()` or `dim.fortran_strides()`. - result.is_err() - } else { - result == can_index_slice(&data, &dim, &dim.default_strides()) && - result == can_index_slice(&data, &dim, &dim.fortran_strides()) - } + #[quickcheck] + fn can_index_slice_not_custom_same_as_can_index_slice(data: Vec, dim: Vec) -> bool { + let dim = IxDyn(&dim); + let result = can_index_slice_not_custom(&data, &dim); + if dim.size_checked().is_none() { + // Avoid overflow `dim.default_strides()` or `dim.fortran_strides()`. + result.is_err() + } else { + result == can_index_slice(&data, &dim, &dim.default_strides()) && + result == can_index_slice(&data, &dim, &dim.fortran_strides()) } } - quickcheck! { - fn extended_gcd_solves_eq(a: isize, b: isize) -> bool { - let (g, (x, y)) = extended_gcd(a, b); - a * x + b * y == g - } + #[quickcheck] + fn extended_gcd_solves_eq(a: isize, b: isize) -> bool { + let (g, (x, y)) = extended_gcd(a, b); + a * x + b * y == g + } - fn extended_gcd_correct_gcd(a: isize, b: isize) -> bool { - let (g, _) = extended_gcd(a, b); - g == gcd(a, b) - } + #[quickcheck] + fn extended_gcd_correct_gcd(a: isize, b: isize) -> bool { + let (g, _) = extended_gcd(a, b); + g == gcd(a, b) } #[test] @@ -773,73 +773,72 @@ mod test { assert_eq!(extended_gcd(-5, 0), (5, (-1, 0))); } - quickcheck! { - fn solve_linear_diophantine_eq_solution_existence( - a: isize, b: isize, c: isize - ) -> TestResult { - if a == 0 || b == 0 { - TestResult::discard() - } else { - TestResult::from_bool( - (c % gcd(a, b) == 0) == solve_linear_diophantine_eq(a, b, c).is_some() - ) - } + #[quickcheck] + fn solve_linear_diophantine_eq_solution_existence( + a: isize, b: isize, c: isize + ) -> TestResult { + if a == 0 || b == 0 { + TestResult::discard() + } else { + TestResult::from_bool( + (c % gcd(a, b) == 0) == solve_linear_diophantine_eq(a, b, c).is_some() + ) } + } - fn solve_linear_diophantine_eq_correct_solution( - a: isize, b: isize, c: isize, t: isize - ) -> TestResult { - if a == 0 || b == 0 { - TestResult::discard() - } else { - match solve_linear_diophantine_eq(a, b, c) { - Some((x0, xd)) => { - let x = x0 + xd * t; - let y = (c - a * x) / b; - TestResult::from_bool(a * x + b * y == c) - } - None => TestResult::discard(), + #[quickcheck] + fn solve_linear_diophantine_eq_correct_solution( + a: isize, b: isize, c: isize, t: isize + ) -> TestResult { + if a == 0 || b == 0 { + TestResult::discard() + } else { + match solve_linear_diophantine_eq(a, b, c) { + Some((x0, xd)) => { + let x = x0 + xd * t; + let y = (c - a * x) / b; + TestResult::from_bool(a * x + b * y == c) } + None => TestResult::discard(), } } } - quickcheck! { - fn arith_seq_intersect_correct( - first1: isize, len1: isize, step1: isize, - first2: isize, len2: isize, step2: isize - ) -> TestResult { - use std::cmp; + #[quickcheck] + fn arith_seq_intersect_correct( + first1: isize, len1: isize, step1: isize, + first2: isize, len2: isize, step2: isize + ) -> TestResult { + use std::cmp; - if len1 == 0 || len2 == 0 { - // This case is impossible to reach in `arith_seq_intersect()` - // because the `min*` and `max*` arguments are inclusive. - return TestResult::discard(); - } - let len1 = len1.abs(); - let len2 = len2.abs(); - - // Convert to `min*` and `max*` arguments for `arith_seq_intersect()`. - let last1 = first1 + step1 * (len1 - 1); - let (min1, max1) = (cmp::min(first1, last1), cmp::max(first1, last1)); - let last2 = first2 + step2 * (len2 - 1); - let (min2, max2) = (cmp::min(first2, last2), cmp::max(first2, last2)); - - // Naively determine if the sequences intersect. - let seq1: Vec<_> = (0..len1) - .map(|n| first1 + step1 * n) - .collect(); - let intersects = (0..len2) - .map(|n| first2 + step2 * n) - .any(|elem2| seq1.contains(&elem2)); - - TestResult::from_bool( - arith_seq_intersect( - (min1, max1, if step1 == 0 { 1 } else { step1 }), - (min2, max2, if step2 == 0 { 1 } else { step2 }) - ) == intersects - ) + if len1 == 0 || len2 == 0 { + // This case is impossible to reach in `arith_seq_intersect()` + // because the `min*` and `max*` arguments are inclusive. + return TestResult::discard(); } + let len1 = len1.abs(); + let len2 = len2.abs(); + + // Convert to `min*` and `max*` arguments for `arith_seq_intersect()`. + let last1 = first1 + step1 * (len1 - 1); + let (min1, max1) = (cmp::min(first1, last1), cmp::max(first1, last1)); + let last2 = first2 + step2 * (len2 - 1); + let (min2, max2) = (cmp::min(first2, last2), cmp::max(first2, last2)); + + // Naively determine if the sequences intersect. + let seq1: Vec<_> = (0..len1) + .map(|n| first1 + step1 * n) + .collect(); + let intersects = (0..len2) + .map(|n| first2 + step2 * n) + .any(|elem2| seq1.contains(&elem2)); + + TestResult::from_bool( + arith_seq_intersect( + (min1, max1, if step1 == 0 { 1 } else { step1 }), + (min2, max2, if step2 == 0 { 1 } else { step2 }) + ) == intersects + ) } #[test] diff --git a/src/lib.rs b/src/lib.rs index 39f102e6f..39402bf56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,6 +105,8 @@ extern crate num_integer; #[cfg(test)] extern crate quickcheck; #[cfg(test)] +extern crate quickcheck_macros; +#[cfg(test)] extern crate rand; #[cfg(feature = "docs")] diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 855a1b803..68be6fa14 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -215,14 +215,12 @@ pub fn unrolled_eq(xs: &[A], ys: &[A]) -> bool #[cfg(test)] mod tests { - use quickcheck::quickcheck; + use quickcheck_macros::quickcheck; use std::num::Wrapping; use super::iterator_pairwise_sum; - quickcheck! { - fn iterator_pairwise_sum_is_correct(xs: Vec) -> bool { - let xs: Vec<_> = xs.into_iter().map(|x| Wrapping(x)).collect(); - iterator_pairwise_sum(xs.iter()) == xs.iter().sum() - } + #[quickcheck] + fn iterator_pairwise_sum_is_correct(xs: Vec>) -> bool { + iterator_pairwise_sum(xs.iter()) == xs.iter().sum() } } From 1ed1a638e7ec86c37de36a30c21bdd12f92940c5 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 3 Feb 2019 19:07:03 -0500 Subject: [PATCH 41/41] Clarify capacity calculation in iterator_pairwise_sum --- src/numeric_util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/numeric_util.rs b/src/numeric_util.rs index 68be6fa14..f6540fb92 100644 --- a/src/numeric_util.rs +++ b/src/numeric_util.rs @@ -62,7 +62,7 @@ where A: Clone + Add + Zero, { let (len, _) = iter.size_hint(); - let cap = len.saturating_sub(1) / NAIVE_SUM_THRESHOLD + 1; // ceiling of division + let cap = len / NAIVE_SUM_THRESHOLD + if len % NAIVE_SUM_THRESHOLD != 0 { 1 } else { 0 }; let mut partial_sums = Vec::with_capacity(cap); let (_, last_sum) = iter.fold((0, A::zero()), |(count, partial_sum), x| { if count < NAIVE_SUM_THRESHOLD {