From edd44e533556fe1913d276a0c01381cd3a652349 Mon Sep 17 00:00:00 2001 From: ljeub-pometry <97447091+ljeub-pometry@users.noreply.github.com> Date: Tue, 30 Apr 2024 18:19:09 +0200 Subject: [PATCH] fix property aggregation methods (#1578) * fix property aggregation methods * clean up the implementations and try to fix lists of lengths one --- python/tests/test_iterables.py | 36 +- raphtory/src/core/mod.rs | 82 +++- .../python/graph/properties/temporal_props.rs | 445 +++++------------- 3 files changed, 223 insertions(+), 340 deletions(-) diff --git a/python/tests/test_iterables.py b/python/tests/test_iterables.py index 6e50d32c69..8492ff8ad8 100644 --- a/python/tests/test_iterables.py +++ b/python/tests/test_iterables.py @@ -88,20 +88,20 @@ def test_empty_lists(): for src, dst, val, time in edges_str: g.add_edge(time, src, dst, {"value_dec": val}) assert ( - g.nodes.out_edges.properties.temporal.get("value_dec") - .values() - .median() - .median() - .median() - == 5 + g.nodes.out_edges.properties.temporal.get("value_dec") + .values() + .median() + .median() + .median() + == 5 ) assert ( - g.nodes.out_edges.properties.temporal.get("value_dec") - .values() - .mean() - .mean() - .mean() - == 1.3333333333333335 + int(g.nodes.out_edges.properties.temporal.get("value_dec") + .values() + .mean() + .mean() + .mean() * 100) + == 616 ) @@ -157,13 +157,13 @@ def test_propiterable(): assert sorted(total) == [3, 5, 15, 32] total = g.nodes.out_edges.properties.get("value_dec").median() - assert list(total) == [10, 5, 10, 2, None] + assert list(total) == [10, 5, 5, 1, None] total = g.node("1").in_edges.properties.get("value_dec").sum() assert total == 6 total = g.node("1").in_edges.properties.get("value_dec").median() - assert total == 5 + assert total == 1 def test_pypropvalue_list_listlist(): @@ -197,7 +197,7 @@ def test_pypropvalue_list_listlist(): assert res.median() == 5 assert res_v.median() == 5 - assert res_ll.median() == [5, 5, 10, 5, 5] + assert res_ll.median() == [5, 5, 10, 2, 5] assert res.min() == 1 assert res_v.min() == 1 @@ -214,9 +214,9 @@ def test_pypropvalue_list_listlist(): assert res.mean() == res.average() == 18.5 assert res_v.mean() == res_v.average() == 26.2 assert ( - res_ll.mean() - == res_ll.average() - == [26.2, 35.666666666666664, 11.666666666666666, 4.5, 5.0] + res_ll.mean() + == res_ll.average() + == [26.2, 35.666666666666664, 11.666666666666666, 4.5, 5.0] ) diff --git a/raphtory/src/core/mod.rs b/raphtory/src/core/mod.rs index b0049ecfc1..35ff1f802c 100644 --- a/raphtory/src/core/mod.rs +++ b/raphtory/src/core/mod.rs @@ -37,6 +37,7 @@ use std::{ ops::Deref, sync::Arc, }; +use thiserror::Error; #[cfg(test)] extern crate core; @@ -172,6 +173,46 @@ pub enum PropType { DTime, } +impl PropType { + pub fn is_numeric(&self) -> bool { + matches!( + self, + PropType::U8 + | PropType::U16 + | PropType::U32 + | PropType::U64 + | PropType::I32 + | PropType::I64 + | PropType::F32 + | PropType::F64 + ) + } + + pub fn is_str(&self) -> bool { + matches!(self, PropType::Str) + } + + pub fn is_bool(&self) -> bool { + matches!(self, PropType::Bool) + } + + pub fn is_date(&self) -> bool { + matches!(self, PropType::DTime | PropType::NDTime) + } + + pub fn has_add(&self) -> bool { + self.is_numeric() || self.is_str() + } + + pub fn has_divide(&self) -> bool { + self.is_numeric() + } + + pub fn has_cmp(&self) -> bool { + self.is_bool() || self.is_numeric() || self.is_str() || self.is_date() + } +} + /// Denotes the types of properties allowed to be stored in the graph. #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub enum Prop { @@ -207,6 +248,7 @@ impl PartialOrd for Prop { (Prop::F64(a), Prop::F64(b)) => a.partial_cmp(b), (Prop::Bool(a), Prop::Bool(b)) => a.partial_cmp(b), (Prop::NDTime(a), Prop::NDTime(b)) => a.partial_cmp(b), + (Prop::DTime(a), Prop::DTime(b)) => a.partial_cmp(b), _ => None, } } @@ -283,6 +325,22 @@ impl Prop { } } + pub fn min(self, other: Prop) -> Option { + self.partial_cmp(&other).map(|ord| match ord { + Ordering::Less => self, + Ordering::Equal => self, + Ordering::Greater => other, + }) + } + + pub fn max(self, other: Prop) -> Option { + self.partial_cmp(&other).map(|ord| match ord { + Ordering::Less => other, + Ordering::Equal => self, + Ordering::Greater => self, + }) + } + pub fn divide(self, other: Prop) -> Option { match (self, other) { (Prop::U8(a), Prop::U8(b)) if b != 0 => Some(Prop::U8(a / b)), @@ -296,6 +354,20 @@ impl Prop { _ => None, } } + + pub fn as_f64(&self) -> Option { + match self { + Prop::U8(v) => Some(*v as f64), + Prop::U16(v) => Some(*v as f64), + Prop::I32(v) => Some(*v as f64), + Prop::I64(v) => Some(*v as f64), + Prop::U32(v) => Some(*v as f64), + Prop::U64(v) => Some(*v as f64), + Prop::F32(v) => Some(*v as f64), + Prop::F64(v) => Some(*v as f64), + _ => None, + } + } } pub trait PropUnwrap: Sized { @@ -786,7 +858,7 @@ mod serde_value_into_prop { #[cfg(test)] mod test_arc_str { - use crate::core::{ArcStr, OptionAsStr}; + use crate::core::{ArcStr, OptionAsStr, Prop}; use std::sync::Arc; #[test] @@ -814,4 +886,12 @@ mod test_arc_str { assert_eq!(opt_str_2, Some("test")); assert_eq!(opt_str3, Some("test")); } + + #[test] + fn test_prop_min_max() { + let v1 = Prop::I64(4); + let v2 = Prop::I64(2); + assert_eq!(v1.clone().max(v2.clone()), Some(Prop::I64(4))); + assert_eq!((v1.min(v2)), Some(Prop::I64(2))); + } } diff --git a/raphtory/src/python/graph/properties/temporal_props.rs b/raphtory/src/python/graph/properties/temporal_props.rs index 9a347447e1..ad6444455c 100644 --- a/raphtory/src/python/graph/properties/temporal_props.rs +++ b/raphtory/src/python/graph/properties/temporal_props.rs @@ -1,5 +1,5 @@ use crate::{ - core::{utils::time::IntoTime, ArcStr, Prop}, + core::{utils::time::IntoTime, ArcStr, Prop, PropType}, db::api::{ properties::{ dyn_props::{DynTemporalProperties, DynTemporalProperty}, @@ -262,30 +262,44 @@ impl PyTemporalProp { /// /// Returns: /// Prop: The sum of all property values. - pub fn sum(&self) -> Prop { - let mut it_iter = self.prop.iter(); - let first = it_iter.next().unwrap(); - it_iter.fold(first.1, |acc, elem| acc.add(elem.1).unwrap()) + pub fn sum(&self) -> Option { + compute_generalised_sum(self.prop.values(), |a, b| a.add(b), |d| d.dtype().has_add()) } /// Find the minimum property value and its associated time. /// /// Returns: /// (i64, Prop): A tuple containing the time and the minimum property value. - pub fn min(&self) -> (i64, Prop) { - let mut it_iter = self.prop.iter(); - let first = it_iter.next().unwrap(); - it_iter.fold(first, |acc, elem| if acc.1 <= elem.1 { acc } else { elem }) + pub fn min(&self) -> Option<(i64, Prop)> { + compute_generalised_sum( + self.prop.iter(), + |a, b| { + if a.1.partial_cmp(&b.1)?.is_le() { + Some(a) + } else { + Some(b) + } + }, + |d| d.1.dtype().has_cmp(), + ) } /// Find the maximum property value and its associated time. /// /// Returns: /// (i64, Prop): A tuple containing the time and the maximum property value. - pub fn max(&self) -> (i64, Prop) { - let mut it_iter = self.prop.iter(); - let first = it_iter.next().unwrap(); - it_iter.fold(first, |acc, elem| if acc.1 >= elem.1 { acc } else { elem }) + pub fn max(&self) -> Option<(i64, Prop)> { + compute_generalised_sum( + self.prop.iter(), + |a, b| { + if a.1.partial_cmp(&b.1)?.is_ge() { + Some(a) + } else { + Some(b) + } + }, + |d| d.1.dtype().has_cmp(), + ) } /// Count the number of properties. @@ -309,22 +323,7 @@ impl PyTemporalProp { /// Returns: /// Prop: The mean of each property values, or None if count is zero. pub fn mean(&self) -> Option { - let sum: Prop = self.sum(); - let count: usize = self.count(); - if count == 0 { - return None; - } - match sum { - Prop::I32(s) => Some(Prop::F32(s as f32 / count as f32)), - Prop::I64(s) => Some(Prop::F64(s as f64 / count as f64)), - Prop::U32(s) => Some(Prop::F32(s as f32 / count as f32)), - Prop::U8(s) => Some(Prop::F64(s as f64 / count as f64)), // needs a test - Prop::U16(s) => Some(Prop::F64(s as f64 / count as f64)), // needs a test - Prop::U64(s) => Some(Prop::F64(s as f64 / count as f64)), - Prop::F32(s) => Some(Prop::F32(s / count as f32)), - Prop::F64(s) => Some(Prop::F64(s / count as f64)), - _ => None, - } + compute_mean(self.prop.values()) } /// Compute the median of all property values. @@ -332,18 +331,17 @@ impl PyTemporalProp { /// Returns: /// (i64, Prop): A tuple containing the time and the median property value, or None if empty pub fn median(&self) -> Option<(i64, Prop)> { - let it_iter = self.prop.iter(); - let mut vec: Vec<(i64, Prop)> = it_iter.collect_vec(); - // let mut vec: Vec<(i64, Prop)> = it_iter.map(|(t, v)| (t, v.clone())).collect(); - vec.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); - let len = vec.len(); - if len == 0 { + let mut sorted: Vec<(i64, Prop)> = self.prop.iter().collect(); + if !sorted.get(0)?.1.dtype().has_cmp() { return None; } - if len % 2 == 0 { - return Some(vec[len / 2 - 1].clone()); + sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); + let len = sorted.len(); + if len == 0 { + None + } else { + Some(sorted[(len - 1) / 2].clone()) } - Some(vec[len / 2].clone()) } pub fn __repr__(&self) -> String { @@ -813,97 +811,60 @@ impl PyPropHistValueListList { } pub fn median(&self) -> PyPropValueListList { + let builder = self.builder.clone(); + (move || builder().map(|it| it.map(|data| compute_median(data)))).into() + } + + pub fn max(&self) -> PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - it.map(|itit| { - let mut sorted: Vec = itit.into_iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - 1 => Some(sorted[0].clone()), - _ => { - let a = &sorted[len / 2]; - Some(a.clone()) - } - } + it.map(|data| { + compute_generalised_sum(data, |a, b| a.max(b), |d| d.dtype().has_cmp()) }) }) }) .into() } - pub fn sum(&self) -> PyPropValueListList { + pub fn min(&self) -> PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - it.map(|itit| { - let mut itit_iter = itit.into_iter(); - let first = itit_iter.next(); - itit_iter.clone().fold(first, |acc, elem| match acc { - Some(a) => a.add(elem), - _ => None, - }) + it.map(|data| { + compute_generalised_sum(data, |a, b| a.min(b), |d| d.dtype().has_cmp()) }) }) }) .into() } - pub fn mean(&self) -> PyPropValueListList { + pub fn sum(&self) -> PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - it.map(|itit| { - let mut itit_iter = itit.into_iter(); - let first = itit_iter.next(); - let sum = itit_iter.clone().fold(first, |acc, elem| match acc { - Some(a) => a.add(elem), - _ => Some(elem), - }); - let count = itit_iter.count(); - if count == 0 { - return None; - } - match sum { - Some(Prop::U8(s)) => Some(Prop::U8(s / count as u8)), - Some(Prop::U16(s)) => Some(Prop::U16(s / count as u16)), - Some(Prop::I32(s)) => Some(Prop::I32(s / count as i32)), - Some(Prop::I64(s)) => Some(Prop::I64(s / count as i64)), - Some(Prop::U32(s)) => Some(Prop::U32(s / count as u32)), - Some(Prop::U64(s)) => Some(Prop::U64(s / count as u64)), - Some(Prop::F32(s)) => Some(Prop::F32(s / count as f32)), - Some(Prop::F64(s)) => Some(Prop::F64(s / count as f64)), - _ => None, - } + it.map(|data| { + compute_generalised_sum(data, |a, b| a.add(b), |d| d.dtype().has_add()) }) }) }) .into() } + + pub fn mean(&self) -> PyPropValueListList { + let builder = self.builder.clone(); + (move || builder().map(|it| it.map(|data| compute_mean(data)))).into() + } } #[pymethods] impl PropIterable { pub fn sum(&self) -> PropValue { - let mut it_iter = self.iter(); - let first = it_iter.next(); - it_iter.fold(first, |acc, elem| acc.and_then(|val| val.add(elem))) + compute_generalised_sum(self.iter(), |a, b| a.add(b), |d| d.dtype().has_add()) } pub fn median(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - 1 => Some(sorted[0].clone()), - _ => { - let a = &sorted[len / 2]; - Some(a.clone()) - } - } + compute_median(self.iter().collect()) } pub fn count(&self) -> usize { @@ -911,29 +872,11 @@ impl PropIterable { } pub fn min(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - _ => { - let a = &sorted[0]; - Some(a.clone()) - } - } + compute_generalised_sum(self.iter(), |a, b| a.min(b), |d| d.dtype().has_cmp()) } pub fn max(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - _ => { - let a = &sorted[len - 1]; - Some(a.clone()) - } - } + compute_generalised_sum(self.iter(), |a, b| a.max(b), |d| d.dtype().has_cmp()) } pub fn average(&self) -> PropValue { @@ -941,22 +884,7 @@ impl PropIterable { } pub fn mean(&self) -> PropValue { - let sum: PropValue = self.sum(); - let count: usize = self.iter().collect::>().len(); - if count == 0 { - return None; - } - match sum { - Some(Prop::U8(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U16(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::I32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::I64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::U64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::F32(s)) => Some(Prop::F32(s / count as f32)), - Some(Prop::F64(s)) => Some(Prop::F64(s / count as f64)), - _ => None, - } + compute_mean(self.iter()) } } @@ -965,11 +893,8 @@ impl PyPropHistValueList { pub fn sum(&self) -> PyPropValueList { let builder = self.builder.clone(); (move || { - builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next(); - it_iter.fold(first, |acc, elem| acc.and_then(|val| val.add(elem))) - }) + builder() + .map(|data| compute_generalised_sum(data, |a, b| a.add(b), |d| d.dtype().has_add())) }) .into() } @@ -977,16 +902,8 @@ impl PyPropHistValueList { pub fn min(&self) -> PyPropValueList { let builder = self.builder.clone(); (move || { - builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next(); - it_iter.fold(first, |a, b| { - match PartialOrd::partial_cmp(&a, &Some(b.clone())) { - Some(std::cmp::Ordering::Less) => a, - _ => Some(b), - } - }) - }) + builder() + .map(|data| compute_generalised_sum(data, |a, b| a.min(b), |d| d.dtype().has_cmp())) }) .into() } @@ -994,38 +911,15 @@ impl PyPropHistValueList { pub fn max(&self) -> PyPropValueList { let builder = self.builder.clone(); (move || { - builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next(); - it_iter.fold(first, |a, b| { - match PartialOrd::partial_cmp(&a, &Some(b.clone())) { - Some(std::cmp::Ordering::Greater) => a, - _ => Some(b), - } - }) - }) + builder() + .map(|data| compute_generalised_sum(data, |a, b| a.max(b), |d| d.dtype().has_cmp())) }) .into() } pub fn median(&self) -> PyPropValueList { let builder = self.builder.clone(); - (move || { - builder().map(|it| { - let mut sorted: Vec = it.clone(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - 1 => Some(sorted[0].clone()), - _ => { - let a = &sorted[len / 2]; - Some(a.clone()) - } - } - }) - }) - .into() + (move || builder().map(|data| compute_median(data))).into() } pub fn average(&self) -> PyPropValueList { @@ -1034,29 +928,7 @@ impl PyPropHistValueList { pub fn mean(&self) -> PyPropValueList { let builder = self.builder.clone(); - (move || { - builder().map(|it| { - let mut it_iter = it.clone().into_iter(); - let first = it_iter.next(); - let sum = it_iter.fold(first, |acc, elem| acc.and_then(|val| val.add(elem))); - let count = it.len(); - if count == 0 { - return None; - } - match sum { - Some(Prop::U8(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U16(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::I32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::I64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::U64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::F32(s)) => Some(Prop::F32(s / count as f32)), - Some(Prop::F64(s)) => Some(Prop::F64(s / count as f64)), - _ => None, - } - }) - }) - .into() + (move || builder().map(|data| compute_mean(data))).into() } pub fn count(&self) -> UsizeIterable { @@ -1073,14 +945,11 @@ impl PyPropHistValueList { #[pymethods] impl PyPropValueList { pub fn sum(&self) -> Option { - self.iter() - .reduce(|acc, elem| match (acc, elem) { - (Some(a), Some(b)) => a.add(b), - (Some(a), None) => Some(a), - (None, Some(b)) => Some(b), - _ => None, - }) - .flatten() + compute_generalised_sum( + self.iter().flatten(), + |a, b| a.add(b), + |d| d.dtype().has_add(), + ) } pub fn count(&self) -> usize { @@ -1088,67 +957,32 @@ impl PyPropValueList { } pub fn min(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - _ => { - let a = &sorted[0]; - a.clone() - } - } + compute_generalised_sum( + self.iter().flatten(), + |a, b| a.min(b), + |d| d.dtype().has_cmp(), + ) } pub fn max(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - _ => { - let a = &sorted[len - 1]; - a.clone() - } - } + compute_generalised_sum( + self.iter().flatten(), + |a, b| a.max(b), + |d| d.dtype().has_cmp(), + ) } pub fn drop_none(&self) -> PyPropValueList { let builder = self.builder.clone(); - (move || builder().filter(|x| x.is_some())).into() + (move || builder().flatten()).into() } pub fn median(&self) -> PropValue { - let mut sorted: Vec = self.iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - 1 => sorted[0].clone(), - _ => { - let a = &sorted[len / 2]; - a.clone() - } - } + compute_median(self.iter().flatten().collect()) } pub fn mean(&self) -> PropValue { - let sum: PropValue = self.sum(); - let count: usize = self.iter().collect::>().len(); - if count == 0 { - return None; - } - match sum { - Some(Prop::U8(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U16(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::I32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::I64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::U64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::F32(s)) => Some(Prop::F32(s / count as f32)), - Some(Prop::F64(s)) => Some(Prop::F64(s / count as f64)), - _ => None, - } + compute_mean(self.iter().flatten()) } pub fn average(&self) -> PropValue { @@ -1162,14 +996,7 @@ impl PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next().flatten(); - it_iter.fold(first, |acc, elem| match (acc, elem) { - (Some(a), Some(b)) => a.add(b), - (Some(a), None) => Some(a), - (None, Some(b)) => Some(b), - _ => None, - }) + compute_generalised_sum(it.flatten(), |a, b| a.add(b), |d| d.dtype().has_add()) }) }) .into() @@ -1179,14 +1006,7 @@ impl PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next().unwrap(); - it_iter.fold(first, |a, b| { - match PartialOrd::partial_cmp(&a, &Some(b.clone().unwrap())) { - Some(std::cmp::Ordering::Less) => a, - _ => Some(b.clone().unwrap()), - } - }) + compute_generalised_sum(it.flatten(), |a, b| a.min(b), |d| d.dtype().has_cmp()) }) }) .into() @@ -1196,14 +1016,7 @@ impl PyPropValueListList { let builder = self.builder.clone(); (move || { builder().map(|it| { - let mut it_iter = it.into_iter(); - let first = it_iter.next().unwrap(); - it_iter.fold(first, |a, b| { - match PartialOrd::partial_cmp(&a, &Some(b.clone().unwrap())) { - Some(std::cmp::Ordering::Greater) => a, - _ => Some(b.clone().unwrap()), - } - }) + compute_generalised_sum(it.flatten(), |a, b| a.max(b), |d| d.dtype().has_cmp()) }) }) .into() @@ -1215,56 +1028,13 @@ impl PyPropValueListList { pub fn mean(&self) -> PyPropValueList { let builder = self.builder.clone(); - (move || { - builder().map(|mut it| { - let mut count: usize = 1; - let first = it.next().flatten(); - let sum = it.fold(first, |acc, elem| { - count += 1; - match (acc, elem) { - (Some(a), Some(b)) => a.add(b), - (Some(a), None) => Some(a), - (None, Some(b)) => Some(b), - _ => None, - } - }); - if count == 0 { - return None; - } - match sum { - Some(Prop::U8(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U16(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::I32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::I64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::U32(s)) => Some(Prop::F32(s as f32 / count as f32)), - Some(Prop::U64(s)) => Some(Prop::F64(s as f64 / count as f64)), - Some(Prop::F32(s)) => Some(Prop::F32(s / count as f32)), - Some(Prop::F64(s)) => Some(Prop::F64(s / count as f64)), - _ => None, - } - }) - }) - .into() + (move || builder().map(|it| compute_mean(it.flatten()))).into() } pub fn median(&self) -> PyPropValueList { let builder = self.builder.clone(); - (move || { - builder().map(|it| { - let mut sorted: Vec = it.into_iter().collect(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - let len = sorted.len(); - match len { - 0 => None, - 1 => sorted[0].clone(), - _ => { - let a = &sorted[len / 2]; - a.clone() - } - } - }) - }) - .into() + + (move || builder().map(|it| compute_median(it.flatten().collect()))).into() } pub fn flatten(&self) -> PyPropValueList { @@ -1300,3 +1070,36 @@ py_iterable_comp!( PyPropHistItemsListCmp, PyPropHistItemsListListCmp ); + +fn compute_median(mut data: Vec) -> Option { + if data.is_empty() || !data[0].dtype().has_cmp() { + return None; + } + data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + Some(data[(data.len() - 1) / 2].clone()) +} + +fn compute_mean(data: impl IntoIterator) -> Option { + let mut iter = data.into_iter(); + let first_value = iter.next()?; + let mut sum = first_value.as_f64()?; + let mut count = 1usize; + for value in iter { + sum += value.as_f64()?; + count += 1; + } + Some(Prop::F64(sum / count as f64)) +} + +fn compute_generalised_sum( + data: impl IntoIterator, + op: impl Fn(V, V) -> Option, + check: impl Fn(&V) -> bool, +) -> Option { + let mut iter = data.into_iter(); + let first_value = iter.next()?; + if !check(&first_value) { + return None; + } + iter.try_fold(first_value, |acc, v| op(acc, v)) +}