diff --git a/server/algorithms/src/clustering/greedy.rs b/server/algorithms/src/clustering/greedy.rs index 15095c9e..e99b27c4 100644 --- a/server/algorithms/src/clustering/greedy.rs +++ b/server/algorithms/src/clustering/greedy.rs @@ -14,7 +14,7 @@ use sysinfo::{System, SystemExt}; use crate::{ clustering::rtree::{cluster::Cluster, point::Point}, - rtree::{self, point::ToPoint}, + rtree::{self, point::ToPoint, SortDedupe}, s2, utils::info_log, }; @@ -106,34 +106,34 @@ impl<'a> Greedy { } fn generate_clusters(&self, point: &Point, neighbors: Vec<&Point>) -> HashSet { - let mut set = HashSet::::new(); + let mut clusters = HashSet::new(); for neighbor in neighbors.iter() { for i in 0..=7 { let ratio = i as Precision / 8 as Precision; let new_point = point.interpolate(neighbor, ratio, 0., 0.); - set.insert(new_point); + clusters.insert(new_point); if self.cluster_mode == ClusterMode::Balanced { for wiggle in vec![0.00025, 0.0001] { let wiggle_lat: Precision = wiggle / 2.; let wiggle_lon = wiggle; let random_point = point.interpolate(neighbor, ratio, wiggle_lat, wiggle_lon); - set.insert(random_point); + clusters.insert(random_point); let random_point = point.interpolate(neighbor, ratio, wiggle_lat, -wiggle_lon); - set.insert(random_point); + clusters.insert(random_point); let random_point = point.interpolate(neighbor, ratio, -wiggle_lat, wiggle_lon); - set.insert(random_point); + clusters.insert(random_point); let random_point = point.interpolate(neighbor, ratio, -wiggle_lat, -wiggle_lon); - set.insert(random_point); + clusters.insert(random_point); } } } } - set.insert(point.to_owned()); - set + clusters.insert(point.to_owned()); + clusters } fn gen_estimated_clusters(&self, tree: &RTree) -> Vec { @@ -150,7 +150,7 @@ impl<'a> Greedy { }) .reduce(HashSet::new, |a, b| a.union(&b).cloned().collect()); - clusters.into_iter().collect::>() + clusters.into_iter().collect() } fn flat_map_cells(&self, cell: CellID) -> Vec { @@ -222,20 +222,7 @@ impl<'a> Greedy { let size = (clusters_with_data .par_iter() - .map(|cluster| { - let mut start = std::mem::size_of_val(&cluster); - - for point in cluster.point.center { - start += std::mem::size_of_val(&point); - } - for point in cluster.points.iter() { - start += std::mem::size_of_val(point); - } - for point in cluster.all.iter() { - start += std::mem::size_of_val(point); - } - start - }) + .map(|cluster| cluster.get_size()) .sum::() / 1024 / 1024) @@ -362,8 +349,7 @@ impl<'a> Greedy { if points.len() < highest { None } else { - points.sort_by(|a, b| a.cell_id.cmp(&b.cell_id)); - points.dedup_by(|a, b| a.cell_id == b.cell_id); + points.sort_dedupe(); Some(Cluster { point: cluster.point, @@ -452,23 +438,10 @@ impl<'a> Greedy { self.radius, &clusters.iter().map(|c| c.point.center).collect(), ); - clusters.par_iter_mut().for_each(|cluster| { - cluster.points = cluster - .all - .iter() - // .collect::>() - // .into_par_iter() - .filter_map(|p| { - if cluster_tree.locate_all_at_point(&p.center).count() == 1 { - Some(*p) - } else { - None - } - }) - .collect::>() - .into_iter() - .collect(); - }); + clusters + .par_iter_mut() + .for_each(|cluster| cluster.update_unique(&cluster_tree)); + clusters.retain(|cluster| cluster.points.len() >= self.min_points); log::info!( diff --git a/server/algorithms/src/rtree/cluster.rs b/server/algorithms/src/rtree/cluster.rs index b5962edf..e2533064 100644 --- a/server/algorithms/src/rtree/cluster.rs +++ b/server/algorithms/src/rtree/cluster.rs @@ -3,7 +3,9 @@ use std::{ hash::{Hash, Hasher}, }; -use super::point::Point; +use rstar::RTree; + +use super::{point::Point, SortDedupe}; #[derive(Debug, Clone)] pub struct Cluster<'a> { @@ -24,6 +26,37 @@ impl<'a> Cluster<'a> { points: points.collect(), } } + + pub fn get_size(&self) -> usize { + let mut size = std::mem::size_of_val(&self); + + for point in self.point.center { + size += std::mem::size_of_val(&point); + } + for point in self.points.iter() { + size += std::mem::size_of_val(point); + } + for point in self.all.iter() { + size += std::mem::size_of_val(point); + } + size + } + + pub fn update_unique(&mut self, tree: &RTree) { + let mut points: Vec<_> = self + .all + .iter() + .filter_map(|p| { + if tree.locate_all_at_point(&p.center).count() == 1 { + Some(*p) + } else { + None + } + }) + .collect(); + points.sort_dedupe(); + self.points = points; + } } impl PartialEq for Cluster<'_> { diff --git a/server/algorithms/src/rtree/mod.rs b/server/algorithms/src/rtree/mod.rs index d539d478..b633b840 100644 --- a/server/algorithms/src/rtree/mod.rs +++ b/server/algorithms/src/rtree/mod.rs @@ -7,6 +7,10 @@ use point::Point; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use rstar::RTree; +pub trait SortDedupe { + fn sort_dedupe(&mut self); +} + pub fn spawn(radius: Precision, points: &SingleVec) -> RTree { let points = points .iter() diff --git a/server/algorithms/src/rtree/point.rs b/server/algorithms/src/rtree/point.rs index 1971ae00..f4be1453 100644 --- a/server/algorithms/src/rtree/point.rs +++ b/server/algorithms/src/rtree/point.rs @@ -7,10 +7,11 @@ use geo::Coord; use geohash::encode; use map_3d::EARTH_RADIUS; use model::api::Precision; +use rayon::slice::ParallelSliceMut; use rstar::{PointDistance, RTreeObject, AABB}; use s2::{cell::Cell, cellid::CellID, latlng::LatLng}; -use super::cluster::Cluster; +use super::{cluster::Cluster, SortDedupe}; #[derive(Debug, Clone, Copy)] pub struct Point { @@ -149,3 +150,10 @@ impl<'a> From> for Point { cluster.point } } + +impl SortDedupe for Vec<&Point> { + fn sort_dedupe(&mut self) { + self.par_sort_by(|a, b| a.cell_id.cmp(&b.cell_id)); + self.dedup_by(|a, b| a.cell_id == b.cell_id); + } +}