Skip to content

Commit

Permalink
feat: honeycomb clustering algo
Browse files Browse the repository at this point in the history
  • Loading branch information
TurtIeSocks committed Jul 24, 2024
1 parent 679b8de commit 8c762f5
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 28 deletions.
1 change: 1 addition & 0 deletions client/src/assets/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ export const TTH = ['All', 'Known', 'Unknown'] as const
export const CALC_MODE = ['Radius', 'S2'] as const

export const CLUSTERING_MODES = [
'Honeycomb',
'Fastest',
'Fast',
'Balanced',
Expand Down
56 changes: 47 additions & 9 deletions server/algorithms/src/clustering/greedy.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use geojson::{Feature, Geometry};
use hashbrown::HashSet;
use model::api::{cluster_mode::ClusterMode, single_vec::SingleVec, GetBbox, Precision};

Expand All @@ -13,6 +14,7 @@ use std::{io::Write, time::Instant};
use sysinfo::{System, SystemExt};

use crate::{
bootstrap::radius,
clustering::rtree::{cluster::Cluster, point::Point},
rtree::{self, point::ToPoint, SortDedupe},
s2,
Expand Down Expand Up @@ -179,6 +181,32 @@ impl<'a> Greedy {
.collect()
}

fn get_honeycomb_clusters(&self, points: &SingleVec) -> Vec<Point> {
let bbox = points.get_bbox();
let bbox_unwrap = bbox.clone().unwrap();

let feat = Feature {
bbox: bbox.clone(),
geometry: Some(Geometry {
bbox,
foreign_members: None,
value: geojson::Value::Polygon(vec![vec![
vec![bbox_unwrap[0], bbox_unwrap[1]],
vec![bbox_unwrap[2], bbox_unwrap[1]],
vec![bbox_unwrap[2], bbox_unwrap[3]],
vec![bbox_unwrap[0], bbox_unwrap[3]],
vec![bbox_unwrap[0], bbox_unwrap[1]],
]]),
}),
..Default::default()
};
radius::BootstrapRadius::new(&feat, self.radius)
.result()
.into_iter()
.map(|p| Point::new(self.radius, 20, p))
.collect()
}

fn associate_clusters(
&'a self,
points: &'a SingleVec,
Expand All @@ -189,6 +217,7 @@ impl<'a> Greedy {

let time = Instant::now();
let clusters_with_data: Vec<Cluster> = match self.cluster_mode {
ClusterMode::Honeycomb => self.get_honeycomb_clusters(points),
ClusterMode::Better | ClusterMode::Best => self.get_s2_clusters(points, point_tree),
ClusterMode::Fast => self.gen_estimated_clusters(point_tree),
_ => {
Expand All @@ -207,7 +236,7 @@ impl<'a> Greedy {
points.push(point);
}
if points.len() < self.min_points {
log::debug!("Empty");
// log::debug!("Empty");
None
} else {
Some(Cluster::new(cluster, points, vec![]))
Expand Down Expand Up @@ -330,7 +359,7 @@ impl<'a> Greedy {

Some(Cluster {
point: cluster.point,
points: points.into_iter().collect(),
unique: points.into_iter().collect(),
all: cluster.all.iter().map(|p| *p).collect(),
})
}
Expand All @@ -345,10 +374,10 @@ impl<'a> Greedy {

let time = Instant::now();
local_clusters.par_sort_by(|a, b| {
if a.points.len() == b.points.len() {
if a.unique.len() == b.unique.len() {
b.all.len().cmp(&a.all.len())
} else {
b.points.len().cmp(&a.points.len())
b.unique.len().cmp(&a.unique.len())
}
});
sorting_time += time.elapsed().as_secs_f32();
Expand All @@ -358,13 +387,13 @@ impl<'a> Greedy {
if new_clusters.len() >= self.max_clusters {
break 'greedy;
}
if cluster.points.len() >= current {
for point in cluster.points.iter() {
if cluster.unique.len() >= current {
for point in cluster.unique.iter() {
if blocked_points.contains(point) {
continue 'cluster;
}
}
for point in cluster.points.iter() {
for point in cluster.unique.iter() {
blocked_points.insert(point);
}
new_clusters.insert(cluster);
Expand Down Expand Up @@ -421,9 +450,9 @@ impl<'a> Greedy {

clusters
.par_iter_mut()
.for_each(|cluster| cluster.update_unique(&cluster_tree));
.for_each(|cluster| cluster.set_unique(&cluster_tree));

clusters.retain(|cluster| cluster.points.len() >= self.min_points);
clusters.retain(|cluster| cluster.unique.len() >= self.min_points);

log::info!(
"finished updating unique in {:.2}s",
Expand All @@ -435,6 +464,9 @@ impl<'a> Greedy {
}

fn check_missing(&self, clusters: Vec<Cluster>, points: &SingleVec) -> HashSet<Point> {
let time = Instant::now();
log::info!("checking coverage");

let missing = {
let seen_points = clusters
.par_iter()
Expand Down Expand Up @@ -462,6 +494,12 @@ impl<'a> Greedy {

clusters.extend(missing);

log::info!(
"finished checking coverage in {:.2}s",
time.elapsed().as_secs_f32()
);
log::info!("final solution size: {}", clusters.len());

clusters
}
}
6 changes: 5 additions & 1 deletion server/algorithms/src/clustering/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ pub fn main(
let clusters = fastest::main(&data_points, radius, min_points);
clusters
}
ClusterMode::Balanced | ClusterMode::Fast | ClusterMode::Better | ClusterMode::Best => {
ClusterMode::Honeycomb
| ClusterMode::Balanced
| ClusterMode::Fast
| ClusterMode::Better
| ClusterMode::Best => {
let mut greedy = Greedy::default();
greedy
.set_cluster_mode(cluster_mode)
Expand Down
21 changes: 11 additions & 10 deletions server/algorithms/src/rtree/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use super::{point::Point, SortDedupe};
#[derive(Debug, Clone)]
pub struct Cluster<'a> {
pub point: Point,
pub points: Vec<&'a Point>,
pub unique: Vec<&'a Point>,
pub all: Vec<&'a Point>,
}

impl<'a> Cluster<'a> {
pub fn new(point: Point, all: Vec<&'a Point>, points: Vec<&'a Point>) -> Cluster<'a> {
Cluster { point, all, points }
pub fn new(point: Point, all: Vec<&'a Point>, unique: Vec<&'a Point>) -> Cluster<'a> {
Cluster { point, all, unique }
}

pub fn get_size(&self) -> usize {
Expand All @@ -26,7 +26,7 @@ impl<'a> Cluster<'a> {
for point in self.point.center {
size += std::mem::size_of_val(&point);
}
for point in self.points.iter() {
for point in self.unique.iter() {
size += std::mem::size_of_val(point);
}
for point in self.all.iter() {
Expand All @@ -35,7 +35,7 @@ impl<'a> Cluster<'a> {
size
}

pub fn update_all(&mut self, tree: &'a RTree<Point>) {
pub fn set_all(&mut self, tree: &'a RTree<Point>) {
let mut points: Vec<_> = tree
.locate_all_at_point(&self.point.center)
.into_iter()
Expand All @@ -44,20 +44,21 @@ impl<'a> Cluster<'a> {
self.all = points;
}

pub fn update_unique(&mut self, tree: &RTree<Point>) {
pub fn set_unique(&mut self, tree: &RTree<Point>) {
let mut points: Vec<_> = self
.all
.par_iter()
.filter_map(|p| {
if tree.locate_all_at_point(&p.center).count() == 1 {
let points = tree.locate_all_at_point(&p.center).count();
if points == 1 {
Some(*p)
} else {
None
}
})
.collect();
points.sort_dedupe();
self.points = points;
self.unique = points;
}
}

Expand Down Expand Up @@ -85,8 +86,8 @@ impl Display for Cluster<'_> {
if i == self.all.len() - 1 { "" } else { ", " }
));
}
display.push_str(&format!(")\nPoints: {} (", self.points.len()));
for (i, point) in self.points.iter().enumerate() {
display.push_str(&format!(")\nPoints: {} (", self.unique.len()));
for (i, point) in self.unique.iter().enumerate() {
display.push_str(&format!(
"{}{}",
point._get_geohash(),
Expand Down
2 changes: 1 addition & 1 deletion server/algorithms/src/rtree/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl Point {
[lat.to_degrees(), lng.to_degrees()]
}

fn haversine_distance(&self, other: &[Precision; 2]) -> Precision {
pub fn haversine_distance(&self, other: &[Precision; 2]) -> Precision {
let theta1 = self.center[0].to_radians();
let theta2 = other[0].to_radians();
let delta_theta = (other[0] - self.center[0]).to_radians();
Expand Down
2 changes: 1 addition & 1 deletion server/algorithms/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ pub fn _debug_clusters(clusters: &HashSet<Cluster>, file_suffix: &str) {
);
unique_map.insert(
cluster.point._get_geohash(),
cluster.points.iter().map(|p| p._get_geohash()).collect(),
cluster.unique.iter().map(|p| p._get_geohash()).collect(),
);
for point in cluster.all.iter() {
point_map
Expand Down
6 changes: 5 additions & 1 deletion server/model/src/api/cluster_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use serde::Deserialize;

#[derive(Debug, Clone)]
pub enum ClusterMode {
Honeycomb,
Fastest,
Fast,
Balanced,
Expand All @@ -17,6 +18,7 @@ impl<'de> Deserialize<'de> for ClusterMode {
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
match s.to_lowercase().as_str() {
"honeycomb" => Ok(ClusterMode::Honeycomb),
"fastest" => Ok(ClusterMode::Fastest),
"fast" => Ok(ClusterMode::Fast),
"balanced" => Ok(ClusterMode::Balanced),
Expand All @@ -38,7 +40,8 @@ impl<'de> Deserialize<'de> for ClusterMode {
impl PartialEq for ClusterMode {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(ClusterMode::Fastest, ClusterMode::Fastest)
(ClusterMode::Honeycomb, ClusterMode::Honeycomb)
| (ClusterMode::Fastest, ClusterMode::Fastest)
| (ClusterMode::Fast, ClusterMode::Fast)
| (ClusterMode::Balanced, ClusterMode::Balanced)
| (ClusterMode::Better, ClusterMode::Better)
Expand All @@ -53,6 +56,7 @@ impl Eq for ClusterMode {}
impl ToString for ClusterMode {
fn to_string(&self) -> String {
match self {
ClusterMode::Honeycomb => "Honeycomb",
ClusterMode::Fastest => "Fastest",
ClusterMode::Fast => "Fast",
ClusterMode::Balanced => "Balanced",
Expand Down
8 changes: 4 additions & 4 deletions server/model/src/api/poracle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ impl ToSingleVec for Poracle {

impl ToMultiVec for Poracle {
fn to_multi_vec(self) -> multi_vec::MultiVec {
if let Some(multipath) = self.multipath.as_ref() {
multipath.to_vec()
} else if let Some(path) = self.path.as_ref() {
vec![path.to_vec()]
if let Some(multipath) = self.multipath {
multipath
} else if let Some(path) = self.path {
vec![path]
} else {
vec![]
}
Expand Down
2 changes: 1 addition & 1 deletion server/model/src/api/single_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl GetBbox for SingleVec {
/// \[min_lon, min_lat, max_lon, max_lat\]
fn get_bbox(&self) -> Option<Vec<Precision>> {
let mut bbox = if self.is_empty() {
vec![]
vec![0., 0., 0., 0.]
} else {
vec![
Precision::INFINITY,
Expand Down

0 comments on commit 8c762f5

Please sign in to comment.