Skip to content

Commit

Permalink
refactor: cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
TurtIeSocks committed Oct 27, 2023
1 parent 8df0d64 commit d425caa
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 45 deletions.
59 changes: 16 additions & 43 deletions server/algorithms/src/clustering/greedy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use sysinfo::{System, SystemExt};

use crate::{
clustering::rtree::{cluster::Cluster, point::Point},
rtree::{self, point::ToPoint},
rtree::{self, point::ToPoint, SortDedupe},
s2,
utils::info_log,
};
Expand Down Expand Up @@ -106,34 +106,34 @@ impl<'a> Greedy {
}

fn generate_clusters(&self, point: &Point, neighbors: Vec<&Point>) -> HashSet<Point> {
let mut set = HashSet::<Point>::new();
let mut clusters = HashSet::new();
for neighbor in neighbors.iter() {
for i in 0..=7 {
let ratio = i as Precision / 8 as Precision;
let new_point = point.interpolate(neighbor, ratio, 0., 0.);
set.insert(new_point);
clusters.insert(new_point);
if self.cluster_mode == ClusterMode::Balanced {
for wiggle in vec![0.00025, 0.0001] {
let wiggle_lat: Precision = wiggle / 2.;
let wiggle_lon = wiggle;
let random_point =
point.interpolate(neighbor, ratio, wiggle_lat, wiggle_lon);
set.insert(random_point);
clusters.insert(random_point);
let random_point =
point.interpolate(neighbor, ratio, wiggle_lat, -wiggle_lon);
set.insert(random_point);
clusters.insert(random_point);
let random_point =
point.interpolate(neighbor, ratio, -wiggle_lat, wiggle_lon);
set.insert(random_point);
clusters.insert(random_point);
let random_point =
point.interpolate(neighbor, ratio, -wiggle_lat, -wiggle_lon);
set.insert(random_point);
clusters.insert(random_point);
}
}
}
}
set.insert(point.to_owned());
set
clusters.insert(point.to_owned());
clusters
}

fn gen_estimated_clusters(&self, tree: &RTree<Point>) -> Vec<Point> {
Expand All @@ -150,7 +150,7 @@ impl<'a> Greedy {
})
.reduce(HashSet::new, |a, b| a.union(&b).cloned().collect());

clusters.into_iter().collect::<Vec<Point>>()
clusters.into_iter().collect()
}

fn flat_map_cells(&self, cell: CellID) -> Vec<CellID> {
Expand Down Expand Up @@ -222,20 +222,7 @@ impl<'a> Greedy {

let size = (clusters_with_data
.par_iter()
.map(|cluster| {
let mut start = std::mem::size_of_val(&cluster);

for point in cluster.point.center {
start += std::mem::size_of_val(&point);
}
for point in cluster.points.iter() {
start += std::mem::size_of_val(point);
}
for point in cluster.all.iter() {
start += std::mem::size_of_val(point);
}
start
})
.map(|cluster| cluster.get_size())
.sum::<usize>()
/ 1024
/ 1024)
Expand Down Expand Up @@ -362,8 +349,7 @@ impl<'a> Greedy {
if points.len() < highest {
None
} else {
points.sort_by(|a, b| a.cell_id.cmp(&b.cell_id));
points.dedup_by(|a, b| a.cell_id == b.cell_id);
points.sort_dedupe();

Some(Cluster {
point: cluster.point,
Expand Down Expand Up @@ -452,23 +438,10 @@ impl<'a> Greedy {
self.radius,
&clusters.iter().map(|c| c.point.center).collect(),
);
clusters.par_iter_mut().for_each(|cluster| {
cluster.points = cluster
.all
.iter()
// .collect::<Vec<&&Point>>()
// .into_par_iter()
.filter_map(|p| {
if cluster_tree.locate_all_at_point(&p.center).count() == 1 {
Some(*p)
} else {
None
}
})
.collect::<HashSet<&Point>>()
.into_iter()
.collect();
});
clusters
.par_iter_mut()
.for_each(|cluster| cluster.update_unique(&cluster_tree));

clusters.retain(|cluster| cluster.points.len() >= self.min_points);

log::info!(
Expand Down
35 changes: 34 additions & 1 deletion server/algorithms/src/rtree/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use std::{
hash::{Hash, Hasher},
};

use super::point::Point;
use rstar::RTree;

use super::{point::Point, SortDedupe};

#[derive(Debug, Clone)]
pub struct Cluster<'a> {
Expand All @@ -24,6 +26,37 @@ impl<'a> Cluster<'a> {
points: points.collect(),
}
}

pub fn get_size(&self) -> usize {
let mut size = std::mem::size_of_val(&self);

for point in self.point.center {
size += std::mem::size_of_val(&point);
}
for point in self.points.iter() {
size += std::mem::size_of_val(point);
}
for point in self.all.iter() {
size += std::mem::size_of_val(point);
}
size
}

pub fn update_unique(&mut self, tree: &RTree<Point>) {
let mut points: Vec<_> = self
.all
.iter()
.filter_map(|p| {
if tree.locate_all_at_point(&p.center).count() == 1 {
Some(*p)
} else {
None
}
})
.collect();
points.sort_dedupe();
self.points = points;
}
}

impl PartialEq for Cluster<'_> {
Expand Down
4 changes: 4 additions & 0 deletions server/algorithms/src/rtree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ use point::Point;
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
use rstar::RTree;

pub trait SortDedupe {
fn sort_dedupe(&mut self);
}

pub fn spawn(radius: Precision, points: &SingleVec) -> RTree<Point> {
let points = points
.iter()
Expand Down
10 changes: 9 additions & 1 deletion server/algorithms/src/rtree/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ use geo::Coord;
use geohash::encode;
use map_3d::EARTH_RADIUS;
use model::api::Precision;
use rayon::slice::ParallelSliceMut;
use rstar::{PointDistance, RTreeObject, AABB};
use s2::{cell::Cell, cellid::CellID, latlng::LatLng};

use super::cluster::Cluster;
use super::{cluster::Cluster, SortDedupe};

#[derive(Debug, Clone, Copy)]
pub struct Point {
Expand Down Expand Up @@ -149,3 +150,10 @@ impl<'a> From<Cluster<'a>> for Point {
cluster.point
}
}

impl SortDedupe for Vec<&Point> {
fn sort_dedupe(&mut self) {
self.par_sort_by(|a, b| a.cell_id.cmp(&b.cell_id));
self.dedup_by(|a, b| a.cell_id == b.cell_id);
}
}

0 comments on commit d425caa

Please sign in to comment.