Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: honeycomb clustering algo #227

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions client/src/assets/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ export const TTH = ['All', 'Known', 'Unknown'] as const
export const CALC_MODE = ['Radius', 'S2'] as const

export const CLUSTERING_MODES = [
'Honeycomb',
'Fastest',
'Fast',
'Balanced',
Expand Down
56 changes: 47 additions & 9 deletions server/algorithms/src/clustering/greedy.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use geojson::{Feature, Geometry};
use hashbrown::HashSet;
use model::api::{cluster_mode::ClusterMode, single_vec::SingleVec, GetBbox, Precision};

Expand All @@ -13,6 +14,7 @@ use std::{io::Write, time::Instant};
use sysinfo::{System, SystemExt};

use crate::{
bootstrap::radius,
clustering::rtree::{cluster::Cluster, point::Point},
rtree::{self, point::ToPoint, SortDedupe},
s2,
Expand Down Expand Up @@ -179,6 +181,32 @@ impl<'a> Greedy {
.collect()
}

fn get_honeycomb_clusters(&self, points: &SingleVec) -> Vec<Point> {
let bbox = points.get_bbox();
let bbox_unwrap = bbox.clone().unwrap();

let feat = Feature {
bbox: bbox.clone(),
geometry: Some(Geometry {
bbox,
foreign_members: None,
value: geojson::Value::Polygon(vec![vec![
vec![bbox_unwrap[0], bbox_unwrap[1]],
vec![bbox_unwrap[2], bbox_unwrap[1]],
vec![bbox_unwrap[2], bbox_unwrap[3]],
vec![bbox_unwrap[0], bbox_unwrap[3]],
vec![bbox_unwrap[0], bbox_unwrap[1]],
]]),
}),
..Default::default()
};
radius::BootstrapRadius::new(&feat, self.radius)
.result()
.into_iter()
.map(|p| Point::new(self.radius, 20, p))
.collect()
}

fn associate_clusters(
&'a self,
points: &'a SingleVec,
Expand All @@ -189,6 +217,7 @@ impl<'a> Greedy {

let time = Instant::now();
let clusters_with_data: Vec<Cluster> = match self.cluster_mode {
ClusterMode::Honeycomb => self.get_honeycomb_clusters(points),
ClusterMode::Better | ClusterMode::Best => self.get_s2_clusters(points, point_tree),
ClusterMode::Fast => self.gen_estimated_clusters(point_tree),
_ => {
Expand All @@ -207,7 +236,7 @@ impl<'a> Greedy {
points.push(point);
}
if points.len() < self.min_points {
log::debug!("Empty");
// log::debug!("Empty");
None
} else {
Some(Cluster::new(cluster, points, vec![]))
Expand Down Expand Up @@ -330,7 +359,7 @@ impl<'a> Greedy {

Some(Cluster {
point: cluster.point,
points: points.into_iter().collect(),
unique: points.into_iter().collect(),
all: cluster.all.iter().map(|p| *p).collect(),
})
}
Expand All @@ -345,10 +374,10 @@ impl<'a> Greedy {

let time = Instant::now();
local_clusters.par_sort_by(|a, b| {
if a.points.len() == b.points.len() {
if a.unique.len() == b.unique.len() {
b.all.len().cmp(&a.all.len())
} else {
b.points.len().cmp(&a.points.len())
b.unique.len().cmp(&a.unique.len())
}
});
sorting_time += time.elapsed().as_secs_f32();
Expand All @@ -358,13 +387,13 @@ impl<'a> Greedy {
if new_clusters.len() >= self.max_clusters {
break 'greedy;
}
if cluster.points.len() >= current {
for point in cluster.points.iter() {
if cluster.unique.len() >= current {
for point in cluster.unique.iter() {
if blocked_points.contains(point) {
continue 'cluster;
}
}
for point in cluster.points.iter() {
for point in cluster.unique.iter() {
blocked_points.insert(point);
}
new_clusters.insert(cluster);
Expand Down Expand Up @@ -421,9 +450,9 @@ impl<'a> Greedy {

clusters
.par_iter_mut()
.for_each(|cluster| cluster.update_unique(&cluster_tree));
.for_each(|cluster| cluster.set_unique(&cluster_tree));

clusters.retain(|cluster| cluster.points.len() >= self.min_points);
clusters.retain(|cluster| cluster.unique.len() >= self.min_points);

log::info!(
"finished updating unique in {:.2}s",
Expand All @@ -435,6 +464,9 @@ impl<'a> Greedy {
}

fn check_missing(&self, clusters: Vec<Cluster>, points: &SingleVec) -> HashSet<Point> {
let time = Instant::now();
log::info!("checking coverage");

let missing = {
let seen_points = clusters
.par_iter()
Expand Down Expand Up @@ -462,6 +494,12 @@ impl<'a> Greedy {

clusters.extend(missing);

log::info!(
"finished checking coverage in {:.2}s",
time.elapsed().as_secs_f32()
);
log::info!("final solution size: {}", clusters.len());

clusters
}
}
6 changes: 5 additions & 1 deletion server/algorithms/src/clustering/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ pub fn main(
let clusters = fastest::main(&data_points, radius, min_points);
clusters
}
ClusterMode::Balanced | ClusterMode::Fast | ClusterMode::Better | ClusterMode::Best => {
ClusterMode::Honeycomb
| ClusterMode::Balanced
| ClusterMode::Fast
| ClusterMode::Better
| ClusterMode::Best => {
let mut greedy = Greedy::default();
greedy
.set_cluster_mode(cluster_mode)
Expand Down
21 changes: 11 additions & 10 deletions server/algorithms/src/rtree/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use super::{point::Point, SortDedupe};
#[derive(Debug, Clone)]
pub struct Cluster<'a> {
pub point: Point,
pub points: Vec<&'a Point>,
pub unique: Vec<&'a Point>,
pub all: Vec<&'a Point>,
}

impl<'a> Cluster<'a> {
pub fn new(point: Point, all: Vec<&'a Point>, points: Vec<&'a Point>) -> Cluster<'a> {
Cluster { point, all, points }
pub fn new(point: Point, all: Vec<&'a Point>, unique: Vec<&'a Point>) -> Cluster<'a> {
Cluster { point, all, unique }
}

pub fn get_size(&self) -> usize {
Expand All @@ -26,7 +26,7 @@ impl<'a> Cluster<'a> {
for point in self.point.center {
size += std::mem::size_of_val(&point);
}
for point in self.points.iter() {
for point in self.unique.iter() {
size += std::mem::size_of_val(point);
}
for point in self.all.iter() {
Expand All @@ -35,7 +35,7 @@ impl<'a> Cluster<'a> {
size
}

pub fn update_all(&mut self, tree: &'a RTree<Point>) {
pub fn set_all(&mut self, tree: &'a RTree<Point>) {
let mut points: Vec<_> = tree
.locate_all_at_point(&self.point.center)
.into_iter()
Expand All @@ -44,20 +44,21 @@ impl<'a> Cluster<'a> {
self.all = points;
}

pub fn update_unique(&mut self, tree: &RTree<Point>) {
pub fn set_unique(&mut self, tree: &RTree<Point>) {
let mut points: Vec<_> = self
.all
.par_iter()
.filter_map(|p| {
if tree.locate_all_at_point(&p.center).count() == 1 {
let points = tree.locate_all_at_point(&p.center).count();
if points == 1 {
Some(*p)
} else {
None
}
})
.collect();
points.sort_dedupe();
self.points = points;
self.unique = points;
}
}

Expand Down Expand Up @@ -85,8 +86,8 @@ impl Display for Cluster<'_> {
if i == self.all.len() - 1 { "" } else { ", " }
));
}
display.push_str(&format!(")\nPoints: {} (", self.points.len()));
for (i, point) in self.points.iter().enumerate() {
display.push_str(&format!(")\nPoints: {} (", self.unique.len()));
for (i, point) in self.unique.iter().enumerate() {
display.push_str(&format!(
"{}{}",
point._get_geohash(),
Expand Down
2 changes: 1 addition & 1 deletion server/algorithms/src/rtree/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl Point {
[lat.to_degrees(), lng.to_degrees()]
}

fn haversine_distance(&self, other: &[Precision; 2]) -> Precision {
pub fn haversine_distance(&self, other: &[Precision; 2]) -> Precision {
let theta1 = self.center[0].to_radians();
let theta2 = other[0].to_radians();
let delta_theta = (other[0] - self.center[0]).to_radians();
Expand Down
2 changes: 1 addition & 1 deletion server/algorithms/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ pub fn _debug_clusters(clusters: &HashSet<Cluster>, file_suffix: &str) {
);
unique_map.insert(
cluster.point._get_geohash(),
cluster.points.iter().map(|p| p._get_geohash()).collect(),
cluster.unique.iter().map(|p| p._get_geohash()).collect(),
);
for point in cluster.all.iter() {
point_map
Expand Down
6 changes: 5 additions & 1 deletion server/model/src/api/cluster_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use serde::Deserialize;

#[derive(Debug, Clone)]
pub enum ClusterMode {
Honeycomb,
Fastest,
Fast,
Balanced,
Expand All @@ -17,6 +18,7 @@ impl<'de> Deserialize<'de> for ClusterMode {
{
let s: String = serde::Deserialize::deserialize(deserializer)?;
match s.to_lowercase().as_str() {
"honeycomb" => Ok(ClusterMode::Honeycomb),
"fastest" => Ok(ClusterMode::Fastest),
"fast" => Ok(ClusterMode::Fast),
"balanced" => Ok(ClusterMode::Balanced),
Expand All @@ -38,7 +40,8 @@ impl<'de> Deserialize<'de> for ClusterMode {
impl PartialEq for ClusterMode {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(ClusterMode::Fastest, ClusterMode::Fastest)
(ClusterMode::Honeycomb, ClusterMode::Honeycomb)
| (ClusterMode::Fastest, ClusterMode::Fastest)
| (ClusterMode::Fast, ClusterMode::Fast)
| (ClusterMode::Balanced, ClusterMode::Balanced)
| (ClusterMode::Better, ClusterMode::Better)
Expand All @@ -53,6 +56,7 @@ impl Eq for ClusterMode {}
impl ToString for ClusterMode {
fn to_string(&self) -> String {
match self {
ClusterMode::Honeycomb => "Honeycomb",
ClusterMode::Fastest => "Fastest",
ClusterMode::Fast => "Fast",
ClusterMode::Balanced => "Balanced",
Expand Down
8 changes: 4 additions & 4 deletions server/model/src/api/poracle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ impl ToSingleVec for Poracle {

impl ToMultiVec for Poracle {
fn to_multi_vec(self) -> multi_vec::MultiVec {
if let Some(multipath) = self.multipath.as_ref() {
multipath.to_vec()
} else if let Some(path) = self.path.as_ref() {
vec![path.to_vec()]
if let Some(multipath) = self.multipath {
multipath
} else if let Some(path) = self.path {
vec![path]
} else {
vec![]
}
Expand Down
2 changes: 1 addition & 1 deletion server/model/src/api/single_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl GetBbox for SingleVec {
/// \[min_lon, min_lat, max_lon, max_lat\]
fn get_bbox(&self) -> Option<Vec<Precision>> {
let mut bbox = if self.is_empty() {
vec![]
vec![0., 0., 0., 0.]
} else {
vec![
Precision::INFINITY,
Expand Down
Loading