From 8c762f5e61d41404ba6592b942695284e2563f0d Mon Sep 17 00:00:00 2001 From: Derick M <58572875+TurtIeSocks@users.noreply.github.com> Date: Wed, 24 Jul 2024 19:57:40 -0400 Subject: [PATCH] feat: honeycomb clustering algo --- client/src/assets/constants.ts | 1 + server/algorithms/src/clustering/greedy.rs | 56 ++++++++++++++++++---- server/algorithms/src/clustering/mod.rs | 6 ++- server/algorithms/src/rtree/cluster.rs | 21 ++++---- server/algorithms/src/rtree/point.rs | 2 +- server/algorithms/src/utils.rs | 2 +- server/model/src/api/cluster_mode.rs | 6 ++- server/model/src/api/poracle.rs | 8 ++-- server/model/src/api/single_vec.rs | 2 +- 9 files changed, 76 insertions(+), 28 deletions(-) diff --git a/client/src/assets/constants.ts b/client/src/assets/constants.ts index ab3f7ca7..26ab10d3 100644 --- a/client/src/assets/constants.ts +++ b/client/src/assets/constants.ts @@ -195,6 +195,7 @@ export const TTH = ['All', 'Known', 'Unknown'] as const export const CALC_MODE = ['Radius', 'S2'] as const export const CLUSTERING_MODES = [ + 'Honeycomb', 'Fastest', 'Fast', 'Balanced', diff --git a/server/algorithms/src/clustering/greedy.rs b/server/algorithms/src/clustering/greedy.rs index da02d729..d5c09230 100644 --- a/server/algorithms/src/clustering/greedy.rs +++ b/server/algorithms/src/clustering/greedy.rs @@ -1,3 +1,4 @@ +use geojson::{Feature, Geometry}; use hashbrown::HashSet; use model::api::{cluster_mode::ClusterMode, single_vec::SingleVec, GetBbox, Precision}; @@ -13,6 +14,7 @@ use std::{io::Write, time::Instant}; use sysinfo::{System, SystemExt}; use crate::{ + bootstrap::radius, clustering::rtree::{cluster::Cluster, point::Point}, rtree::{self, point::ToPoint, SortDedupe}, s2, @@ -179,6 +181,32 @@ impl<'a> Greedy { .collect() } + fn get_honeycomb_clusters(&self, points: &SingleVec) -> Vec { + let bbox = points.get_bbox(); + let bbox_unwrap = bbox.clone().unwrap(); + + let feat = Feature { + bbox: bbox.clone(), + geometry: Some(Geometry { + bbox, + foreign_members: None, + value: geojson::Value::Polygon(vec![vec![ + vec![bbox_unwrap[0], bbox_unwrap[1]], + vec![bbox_unwrap[2], bbox_unwrap[1]], + vec![bbox_unwrap[2], bbox_unwrap[3]], + vec![bbox_unwrap[0], bbox_unwrap[3]], + vec![bbox_unwrap[0], bbox_unwrap[1]], + ]]), + }), + ..Default::default() + }; + radius::BootstrapRadius::new(&feat, self.radius) + .result() + .into_iter() + .map(|p| Point::new(self.radius, 20, p)) + .collect() + } + fn associate_clusters( &'a self, points: &'a SingleVec, @@ -189,6 +217,7 @@ impl<'a> Greedy { let time = Instant::now(); let clusters_with_data: Vec = match self.cluster_mode { + ClusterMode::Honeycomb => self.get_honeycomb_clusters(points), ClusterMode::Better | ClusterMode::Best => self.get_s2_clusters(points, point_tree), ClusterMode::Fast => self.gen_estimated_clusters(point_tree), _ => { @@ -207,7 +236,7 @@ impl<'a> Greedy { points.push(point); } if points.len() < self.min_points { - log::debug!("Empty"); + // log::debug!("Empty"); None } else { Some(Cluster::new(cluster, points, vec![])) @@ -330,7 +359,7 @@ impl<'a> Greedy { Some(Cluster { point: cluster.point, - points: points.into_iter().collect(), + unique: points.into_iter().collect(), all: cluster.all.iter().map(|p| *p).collect(), }) } @@ -345,10 +374,10 @@ impl<'a> Greedy { let time = Instant::now(); local_clusters.par_sort_by(|a, b| { - if a.points.len() == b.points.len() { + if a.unique.len() == b.unique.len() { b.all.len().cmp(&a.all.len()) } else { - b.points.len().cmp(&a.points.len()) + b.unique.len().cmp(&a.unique.len()) } }); sorting_time += time.elapsed().as_secs_f32(); @@ -358,13 +387,13 @@ impl<'a> Greedy { if new_clusters.len() >= self.max_clusters { break 'greedy; } - if cluster.points.len() >= current { - for point in cluster.points.iter() { + if cluster.unique.len() >= current { + for point in cluster.unique.iter() { if blocked_points.contains(point) { continue 'cluster; } } - for point in cluster.points.iter() { + for point in cluster.unique.iter() { blocked_points.insert(point); } new_clusters.insert(cluster); @@ -421,9 +450,9 @@ impl<'a> Greedy { clusters .par_iter_mut() - .for_each(|cluster| cluster.update_unique(&cluster_tree)); + .for_each(|cluster| cluster.set_unique(&cluster_tree)); - clusters.retain(|cluster| cluster.points.len() >= self.min_points); + clusters.retain(|cluster| cluster.unique.len() >= self.min_points); log::info!( "finished updating unique in {:.2}s", @@ -435,6 +464,9 @@ impl<'a> Greedy { } fn check_missing(&self, clusters: Vec, points: &SingleVec) -> HashSet { + let time = Instant::now(); + log::info!("checking coverage"); + let missing = { let seen_points = clusters .par_iter() @@ -462,6 +494,12 @@ impl<'a> Greedy { clusters.extend(missing); + log::info!( + "finished checking coverage in {:.2}s", + time.elapsed().as_secs_f32() + ); + log::info!("final solution size: {}", clusters.len()); + clusters } } diff --git a/server/algorithms/src/clustering/mod.rs b/server/algorithms/src/clustering/mod.rs index d2e9d1aa..35ebf3e8 100644 --- a/server/algorithms/src/clustering/mod.rs +++ b/server/algorithms/src/clustering/mod.rs @@ -45,7 +45,11 @@ pub fn main( let clusters = fastest::main(&data_points, radius, min_points); clusters } - ClusterMode::Balanced | ClusterMode::Fast | ClusterMode::Better | ClusterMode::Best => { + ClusterMode::Honeycomb + | ClusterMode::Balanced + | ClusterMode::Fast + | ClusterMode::Better + | ClusterMode::Best => { let mut greedy = Greedy::default(); greedy .set_cluster_mode(cluster_mode) diff --git a/server/algorithms/src/rtree/cluster.rs b/server/algorithms/src/rtree/cluster.rs index 028075f1..cdfe32c0 100644 --- a/server/algorithms/src/rtree/cluster.rs +++ b/server/algorithms/src/rtree/cluster.rs @@ -11,13 +11,13 @@ use super::{point::Point, SortDedupe}; #[derive(Debug, Clone)] pub struct Cluster<'a> { pub point: Point, - pub points: Vec<&'a Point>, + pub unique: Vec<&'a Point>, pub all: Vec<&'a Point>, } impl<'a> Cluster<'a> { - pub fn new(point: Point, all: Vec<&'a Point>, points: Vec<&'a Point>) -> Cluster<'a> { - Cluster { point, all, points } + pub fn new(point: Point, all: Vec<&'a Point>, unique: Vec<&'a Point>) -> Cluster<'a> { + Cluster { point, all, unique } } pub fn get_size(&self) -> usize { @@ -26,7 +26,7 @@ impl<'a> Cluster<'a> { for point in self.point.center { size += std::mem::size_of_val(&point); } - for point in self.points.iter() { + for point in self.unique.iter() { size += std::mem::size_of_val(point); } for point in self.all.iter() { @@ -35,7 +35,7 @@ impl<'a> Cluster<'a> { size } - pub fn update_all(&mut self, tree: &'a RTree) { + pub fn set_all(&mut self, tree: &'a RTree) { let mut points: Vec<_> = tree .locate_all_at_point(&self.point.center) .into_iter() @@ -44,12 +44,13 @@ impl<'a> Cluster<'a> { self.all = points; } - pub fn update_unique(&mut self, tree: &RTree) { + pub fn set_unique(&mut self, tree: &RTree) { let mut points: Vec<_> = self .all .par_iter() .filter_map(|p| { - if tree.locate_all_at_point(&p.center).count() == 1 { + let points = tree.locate_all_at_point(&p.center).count(); + if points == 1 { Some(*p) } else { None @@ -57,7 +58,7 @@ impl<'a> Cluster<'a> { }) .collect(); points.sort_dedupe(); - self.points = points; + self.unique = points; } } @@ -85,8 +86,8 @@ impl Display for Cluster<'_> { if i == self.all.len() - 1 { "" } else { ", " } )); } - display.push_str(&format!(")\nPoints: {} (", self.points.len())); - for (i, point) in self.points.iter().enumerate() { + display.push_str(&format!(")\nPoints: {} (", self.unique.len())); + for (i, point) in self.unique.iter().enumerate() { display.push_str(&format!( "{}{}", point._get_geohash(), diff --git a/server/algorithms/src/rtree/point.rs b/server/algorithms/src/rtree/point.rs index f4be1453..bbefc3e1 100644 --- a/server/algorithms/src/rtree/point.rs +++ b/server/algorithms/src/rtree/point.rs @@ -59,7 +59,7 @@ impl Point { [lat.to_degrees(), lng.to_degrees()] } - fn haversine_distance(&self, other: &[Precision; 2]) -> Precision { + pub fn haversine_distance(&self, other: &[Precision; 2]) -> Precision { let theta1 = self.center[0].to_radians(); let theta2 = other[0].to_radians(); let delta_theta = (other[0] - self.center[0]).to_radians(); diff --git a/server/algorithms/src/utils.rs b/server/algorithms/src/utils.rs index 5df424f4..bf65cbbe 100644 --- a/server/algorithms/src/utils.rs +++ b/server/algorithms/src/utils.rs @@ -101,7 +101,7 @@ pub fn _debug_clusters(clusters: &HashSet, file_suffix: &str) { ); unique_map.insert( cluster.point._get_geohash(), - cluster.points.iter().map(|p| p._get_geohash()).collect(), + cluster.unique.iter().map(|p| p._get_geohash()).collect(), ); for point in cluster.all.iter() { point_map diff --git a/server/model/src/api/cluster_mode.rs b/server/model/src/api/cluster_mode.rs index 0ed487d2..95bb9140 100644 --- a/server/model/src/api/cluster_mode.rs +++ b/server/model/src/api/cluster_mode.rs @@ -2,6 +2,7 @@ use serde::Deserialize; #[derive(Debug, Clone)] pub enum ClusterMode { + Honeycomb, Fastest, Fast, Balanced, @@ -17,6 +18,7 @@ impl<'de> Deserialize<'de> for ClusterMode { { let s: String = serde::Deserialize::deserialize(deserializer)?; match s.to_lowercase().as_str() { + "honeycomb" => Ok(ClusterMode::Honeycomb), "fastest" => Ok(ClusterMode::Fastest), "fast" => Ok(ClusterMode::Fast), "balanced" => Ok(ClusterMode::Balanced), @@ -38,7 +40,8 @@ impl<'de> Deserialize<'de> for ClusterMode { impl PartialEq for ClusterMode { fn eq(&self, other: &Self) -> bool { match (self, other) { - (ClusterMode::Fastest, ClusterMode::Fastest) + (ClusterMode::Honeycomb, ClusterMode::Honeycomb) + | (ClusterMode::Fastest, ClusterMode::Fastest) | (ClusterMode::Fast, ClusterMode::Fast) | (ClusterMode::Balanced, ClusterMode::Balanced) | (ClusterMode::Better, ClusterMode::Better) @@ -53,6 +56,7 @@ impl Eq for ClusterMode {} impl ToString for ClusterMode { fn to_string(&self) -> String { match self { + ClusterMode::Honeycomb => "Honeycomb", ClusterMode::Fastest => "Fastest", ClusterMode::Fast => "Fast", ClusterMode::Balanced => "Balanced", diff --git a/server/model/src/api/poracle.rs b/server/model/src/api/poracle.rs index a4589e14..186de6de 100644 --- a/server/model/src/api/poracle.rs +++ b/server/model/src/api/poracle.rs @@ -65,10 +65,10 @@ impl ToSingleVec for Poracle { impl ToMultiVec for Poracle { fn to_multi_vec(self) -> multi_vec::MultiVec { - if let Some(multipath) = self.multipath.as_ref() { - multipath.to_vec() - } else if let Some(path) = self.path.as_ref() { - vec![path.to_vec()] + if let Some(multipath) = self.multipath { + multipath + } else if let Some(path) = self.path { + vec![path] } else { vec![] } diff --git a/server/model/src/api/single_vec.rs b/server/model/src/api/single_vec.rs index 39392bb5..2694cf53 100644 --- a/server/model/src/api/single_vec.rs +++ b/server/model/src/api/single_vec.rs @@ -22,7 +22,7 @@ impl GetBbox for SingleVec { /// \[min_lon, min_lat, max_lon, max_lat\] fn get_bbox(&self) -> Option> { let mut bbox = if self.is_empty() { - vec![] + vec![0., 0., 0., 0.] } else { vec![ Precision::INFINITY,