From 8178d50e1b1201cd2d8bacf17b3ffe3a0fc1cf52 Mon Sep 17 00:00:00 2001 From: Derick M <58572875+TurtIeSocks@users.noreply.github.com> Date: Wed, 18 Oct 2023 15:03:00 -0400 Subject: [PATCH] accuracy improvements --- client/src/pages/map/interface/Drawing.tsx | 9 + client/src/pages/map/markers/Geohash.tsx | 40 +++ client/src/pages/map/markers/index.tsx | 93 +++--- .../src/clustering/rtree/cluster.rs | 40 +++ server/algorithms/src/clustering/rtree/mod.rs | 314 ++++++++---------- .../algorithms/src/clustering/rtree/point.rs | 44 +-- server/api/src/lib.rs | 2 +- server/model/src/db/spawnpoint.rs | 8 +- 8 files changed, 306 insertions(+), 244 deletions(-) create mode 100644 client/src/pages/map/markers/Geohash.tsx create mode 100644 server/algorithms/src/clustering/rtree/cluster.rs diff --git a/client/src/pages/map/interface/Drawing.tsx b/client/src/pages/map/interface/Drawing.tsx index 0bdf2c90..e5be7795 100644 --- a/client/src/pages/map/interface/Drawing.tsx +++ b/client/src/pages/map/interface/Drawing.tsx @@ -171,6 +171,15 @@ export function Drawing() { }) } }} + // onDrawStart={({ workingLayer, shape }) => { + // if (shape === 'Circle') { + // workingLayer.on('move', ({ target }) => { + // const latlng = target.getLatLng() + + // console.log(geohash.encode(latlng.lat, latlng.lng, 12)) + // }) + // } + // }} onCreate={async ({ layer, shape }) => { if (ref.current && ref.current.hasLayer(layer)) { const id = ref.current.getLayerId(layer) diff --git a/client/src/pages/map/markers/Geohash.tsx b/client/src/pages/map/markers/Geohash.tsx new file mode 100644 index 00000000..4be9decd --- /dev/null +++ b/client/src/pages/map/markers/Geohash.tsx @@ -0,0 +1,40 @@ +import * as React from 'react' +import geohash from 'ngeohash' +import { Circle } from 'react-leaflet' +import { usePersist } from '@hooks/usePersist' + +import StyledPopup from '../popups/Styled' + +export function GeohashMarker({ hash }: { hash: string }) { + const x = geohash.decode(hash) + const radius = usePersist((s) => s.radius) + return ( + <> + + +
Hash: {hash}
+
+
+ + + ) +} diff --git a/client/src/pages/map/markers/index.tsx b/client/src/pages/map/markers/index.tsx index 3c6fe79f..3b55cb9f 100644 --- a/client/src/pages/map/markers/index.tsx +++ b/client/src/pages/map/markers/index.tsx @@ -11,6 +11,7 @@ import useDeepCompareEffect from 'use-deep-compare-effect' import { getMarkers } from '@services/fetches' import { Category, PixiMarker } from '@assets/types' import StyledPopup from '../popups/Styled' +// import { GeohashMarker } from './Geohash' export default function Markers({ category }: { category: Category }) { const enabled = usePersist((s) => s[category], shallow) @@ -85,51 +86,61 @@ export default function Markers({ category }: { category: Category }) { return nativeLeaflet ? ( <> - {markers.map((i) => ( - - {pokestopRange && ( + {/* */} + {markers.map((i) => { + const hash = geohash.encode(...i.p, 12) + return ( + + {pokestopRange && ( + + )} + +
+ Lat: {i.p[0]} +
+ Lng: {i.p[1]} +
+ Hash: {geohash.encode(...i.p, 9)} +
+ Hash: {geohash.encode(...i.p, 12)} +
+
+
+ - )} - - -
- Lat: {i.p[0]} -
- Lng: {i.p[1]} -
- Hash: {geohash.encode(...i.p, 9)} -
- Hash: {geohash.encode(...i.p, 12)} -
-
-
- -
- ))} +
+ ) + })} ) : null } diff --git a/server/algorithms/src/clustering/rtree/cluster.rs b/server/algorithms/src/clustering/rtree/cluster.rs new file mode 100644 index 00000000..78e929b2 --- /dev/null +++ b/server/algorithms/src/clustering/rtree/cluster.rs @@ -0,0 +1,40 @@ +use std::hash::{Hash, Hasher}; + +use hashbrown::HashSet; + +use super::point::Point; + +#[derive(Debug, Clone)] +pub struct Cluster<'a> { + pub point: &'a Point, + pub points: HashSet<&'a Point>, + pub all: HashSet<&'a Point>, +} + +impl<'a> Cluster<'_> { + pub fn new(point: &'a Point, all: T, points: U) -> Cluster<'a> + where + T: Iterator, + U: Iterator, + { + Cluster { + point, + all: all.collect(), + points: points.collect(), + } + } +} + +impl PartialEq for Cluster<'_> { + fn eq(&self, other: &Self) -> bool { + self.point.cell_id == other.point.cell_id + } +} + +impl Eq for Cluster<'_> {} + +impl Hash for Cluster<'_> { + fn hash(&self, state: &mut H) { + self.point.cell_id.hash(state); + } +} diff --git a/server/algorithms/src/clustering/rtree/mod.rs b/server/algorithms/src/clustering/rtree/mod.rs index 6b464f83..6ff6c464 100644 --- a/server/algorithms/src/clustering/rtree/mod.rs +++ b/server/algorithms/src/clustering/rtree/mod.rs @@ -1,37 +1,16 @@ +mod cluster; mod point; use hashbrown::HashSet; use model::api::{single_vec::SingleVec, stats::Stats, Precision}; use point::Point; -use rand::Rng; + use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use rstar::RTree; -use std::sync::{Arc, Mutex}; -use std::thread; use std::time::Instant; use crate::s2::create_cell_map; - -struct Comparer<'a> { - clusters: HashSet<&'a Point>, - missed: usize, - score: usize, -} - -#[derive(Debug, Clone)] -struct Cluster<'a> { - point: Point, - points: Vec<&'a Point>, -} - -impl Default for Cluster<'_> { - fn default() -> Self { - Self { - point: Point::default(), - points: vec![], - } - } -} +use cluster::Cluster; pub fn main( points: SingleVec, @@ -87,13 +66,25 @@ pub fn main( return_set.into_iter().map(|p| p.center).collect() } -fn get_clusters(point: &Point, neighbors: Vec<&Point>, segments: usize) -> HashSet { +fn generate_clusters(point: &Point, neighbors: Vec<&Point>, segments: usize) -> HashSet { let mut set = HashSet::::new(); for neighbor in neighbors { for i in 0..=(segments - 1) { let ratio = i as Precision / segments as Precision; - let new_point = point.interpolate(neighbor, ratio); + let new_point = point.interpolate(neighbor, ratio, 0., 0.); set.insert(new_point); + for wiggle in vec![0.00025, 0.0001] { + let wiggle_lat: f64 = wiggle / 2.; + let wiggle_lon = wiggle; + let random_point = point.interpolate(neighbor, ratio, wiggle_lat, wiggle_lon); + set.insert(random_point); + let random_point = point.interpolate(neighbor, ratio, wiggle_lat, -wiggle_lon); + set.insert(random_point); + let random_point = point.interpolate(neighbor, ratio, -wiggle_lat, wiggle_lon); + set.insert(random_point); + let random_point = point.interpolate(neighbor, ratio, -wiggle_lat, -wiggle_lon); + set.insert(random_point); + } } } set.insert(point.to_owned()); @@ -101,19 +92,13 @@ fn get_clusters(point: &Point, neighbors: Vec<&Point>, segments: usize) -> HashS } fn get_initial_clusters(tree: &RTree, time: Instant) -> Vec { - // let tree = point::main(radius * 2., points); - // log::info!( - // "[RTREE] Generated second tree with double radius: {}s", - // time.elapsed().as_secs_f32() - // ); - let tree_points: Vec<&Point> = tree.iter().map(|p| p).collect(); let clusters = tree_points .par_iter() .map(|point| { let neighbors = tree.locate_all_at_point(&point.center); - get_clusters(point, neighbors.into_iter().collect(), 8) + generate_clusters(point, neighbors.into_iter().collect(), 8) }) .reduce(HashSet::new, |a, b| a.union(&b).cloned().collect()); @@ -122,6 +107,13 @@ fn get_initial_clusters(tree: &RTree, time: Instant) -> Vec { clusters.len(), time.elapsed().as_secs_f32() ); + if clusters + .iter() + .find(|p| p._get_geohash() == "u14cu1dtdx2s") + .is_some() + { + log::error!("Found point {}", clusters.len()); + } clusters.into_iter().collect::>() } @@ -131,46 +123,46 @@ fn setup( min_points: usize, time: Instant, ) -> (HashSet, usize) { - let tree = point::main(radius, &points); - log::info!( - "[RTREE] made primary tree: {}s", - time.elapsed().as_secs_f32() - ); + let point_tree: RTree = point::main(radius, &points); + log::info!("[RTREE] made point tree: {}s", time.elapsed().as_secs_f32()); - let initial_clusters = get_initial_clusters(&tree, time); + let neighbor_tree: RTree = point::main(radius * 2., &points); + let initial_clusters = get_initial_clusters(&neighbor_tree, time); let clusters_with_data: Vec = initial_clusters .par_iter() .map(|cluster| { - let points = tree + let mut points: Vec<&Point> = point_tree .locate_all_at_point(&cluster.center) .collect::>(); - Cluster { - point: *cluster, - points, + if point_tree.contains(cluster) && points.is_empty() { + points.push(cluster) } + Cluster::new(cluster, points.into_iter(), vec![].into_iter()) }) .collect(); log::info!( "[RTREE] added data to cluster structs: {}s", time.elapsed().as_secs_f32() ); + let (solution, _) = initial_solution(min_points, clusters_with_data, time); + log::info!("[RTREE] Initial solution size: {}", solution.len()); + let (solution, seen) = dedupe(solution, min_points, time); + log::info!("[RTREE] Deduped solution size: {}", solution.len()); - clustering(min_points, points.len(), &clusters_with_data, time) - // (comparison.cluster, comparison.missed) + (solution, points.len() - seen) } fn initial_solution( min_points: usize, - clusters_with_data: &Vec, + clusters_with_data: Vec, time: Instant, -) -> (HashSet, usize) { +) -> (HashSet, usize) { log::info!( - "Starting initial solution: {}s", + "[RTREE] Starting initial solution: {}s", time.elapsed().as_secs_f32() ); - let mut new_clusters = HashSet::<&Point>::new(); - let mut blocked_clusters = HashSet::<&Point>::new(); + let mut new_clusters = HashSet::::new(); let mut blocked_points = HashSet::<&Point>::new(); let mut highest = 100; @@ -178,161 +170,141 @@ fn initial_solution( let local_clusters = clusters_with_data .par_iter() .filter_map(|cluster| { - if blocked_clusters.contains(&cluster.point) { + if new_clusters.contains(cluster) { None } else { - Some(( - &cluster.point, - cluster - .points - .iter() - .filter_map(|p| { - if blocked_points.contains(p) { - None - } else { - Some(*p) - } - }) - .collect::>(), + Some(Cluster::new( + cluster.point, + cluster.all.clone().into_iter(), + cluster.all.iter().filter_map(|p| { + if blocked_points.contains(p) { + None + } else { + Some(*p) + } + }), )) } }) - .collect::)>>(); + .collect::>(); let mut best = 0; - for (cluster, points) in local_clusters.iter() { - let length = points.len() + 1; - + for cluster in local_clusters.into_iter() { + let length = cluster.points.len() + 1; if length > best { best = length; } if length >= highest { - if blocked_clusters.contains(*cluster) || length == 0 { + if new_clusters.contains(&cluster) || length == 0 { continue; } let mut count = 0; - for point in points { - if !blocked_points.contains(*point) { + for point in cluster.points.iter() { + if !blocked_points.contains(point) { count += 1; + if count >= min_points { + break; + } } } if count >= min_points { - for point in points { + for point in cluster.points.iter() { blocked_points.insert(point); } - blocked_clusters.insert(cluster); - new_clusters.insert(*cluster); + new_clusters.insert(cluster); } } } highest = best; } + log::warn!("Blocked: {} | highest {}", blocked_points.len(), highest); log::info!( - "Finished initial solution: {}s", + "[RTREE] Finished initial solution: {}s", time.elapsed().as_secs_f32() ); - ( - new_clusters.into_iter().map(|p| *p).collect(), - blocked_points.len(), - ) + (new_clusters, blocked_points.len()) } -fn clustering( +fn dedupe( + initial_solution: HashSet, min_points: usize, - total_points: usize, - clusters_with_data: &Vec, time: Instant, ) -> (HashSet, usize) { - log::info!("Starting clustering: {}s", time.elapsed().as_secs_f32()); - - let (clusters, covered) = initial_solution(min_points, clusters_with_data, time); - - let comparison = Comparer { - clusters: clusters.iter().collect(), - missed: total_points - covered, - score: clusters.len() * min_points + (total_points - covered), - }; - let arc = Arc::new(Mutex::new(comparison)); - let length = clusters_with_data.len(); - - thread::scope(|scope| { - for i in 0..num_cpus::get() { - let arc_clone = Arc::clone(&arc); - scope.spawn(move || { - let mut rng = rand::thread_rng(); - let mut stats = Stats::new(); - stats.total_points = total_points; - - let mut iteration = 0; - - while iteration <= 100_000 { - let mut fails = 0; - let mut new_clusters = HashSet::<&Point>::new(); - let mut blocked_clusters = HashSet::::new(); - let mut blocked_points = HashSet::<&Point>::new(); - - log::info!("Thread: {}, Iteration: {}", i, iteration); - while fails < 100 { - let random_index = rng.gen_range(0..length); - if blocked_clusters.contains(&random_index) { - continue; - } - blocked_clusters.insert(random_index); - - let cluster = &clusters_with_data[random_index]; - let valid_points: Vec<&&Point> = cluster - .points - .iter() - .filter(|p| !blocked_points.contains(*p)) - .collect(); - if valid_points.len() >= min_points { - for point in valid_points.iter() { - blocked_points.insert(*point); - } - new_clusters.insert(&cluster.point); - continue; - } - fails += 1; - } - let missed = total_points - blocked_points.len(); - stats.total_clusters = new_clusters.len(); - stats.points_covered = total_points - missed; - let current_score = stats.get_score(min_points); + // let mut point_map: HashMap> = HashMap::new(); + // let mut cluster_map: HashMap> = HashMap::new(); + + // for cluster in initial_solution.iter() { + // cluster_map.insert( + // cluster.point._get_geohash(), + // cluster.points.iter().map(|p| p._get_geohash()).collect(), + // ); + // for point in cluster.points.iter() { + // point_map + // .entry(point._get_geohash()) + // .and_modify(|f| { + // f.insert(cluster.point._get_geohash()); + // }) + // .or_insert_with(|| { + // let mut set: HashSet = HashSet::new(); + // set.insert(cluster.point._get_geohash()); + // set + // }); + // } + // } + + // debug_hashmap("point_map.txt", &point_map).unwrap(); + // debug_hashmap("cluster_map.txt", &cluster_map).unwrap(); + log::info!( + "[RTREE] Starting deduping: {}s", + time.elapsed().as_secs_f32() + ); - let mut comparison = arc_clone.lock().unwrap(); - let is_better = if current_score == comparison.score { - new_clusters.len() < comparison.clusters.len() - } else { - current_score < comparison.score - }; - if is_better { - log::info!( - "Old Score: {} | New Score: {}| Iteration {}", - comparison.score, - current_score, - iteration, - ); - log::info!( - "Covered: {} | Clusters: {}", - stats.points_covered, - stats.total_clusters - ); - comparison.clusters = new_clusters.clone(); - comparison.missed = missed; - comparison.score = current_score; - } - fails = 0; - iteration += 1; - } - }); + let mut seen_points: HashSet<&Point> = HashSet::new(); + let mut solution: HashSet = initial_solution + .iter() + .filter_map(|cluster| { + let unique_points = cluster + .points + .iter() + .filter(|p| { + initial_solution + .iter() + .find(|c| c.point != cluster.point && c.all.contains(*p)) + .is_none() + }) + .count(); + + if unique_points == 0 { + None + } else { + seen_points.extend(cluster.points.iter()); + Some(*cluster.point) + } + }) + .collect(); + log::info!("[RTREE] Deduped solution size: {}", solution.len(),); + + if min_points == 1 { + let mut count = 0; + for cluster in initial_solution { + let valid = cluster + .points + .iter() + .find(|p| !seen_points.contains(*p)) + .is_some(); + if valid { + solution.insert(*cluster.point); + seen_points.extend(cluster.points.iter()); + count += 1; + } } - }); - - let final_result = arc.lock().unwrap(); + log::info!("Extra clusters: {}", count); + } + log::info!( + "[RTREE] Finished deduping: {}s", + time.elapsed().as_secs_f32() + ); - log::info!("Finished clustering: {}s", time.elapsed().as_secs_f32()); - ( - final_result.clusters.iter().map(|p| **p).collect(), - final_result.missed, - ) + log::warn!("Seen: {}", seen_points.len()); + (solution, seen_points.len()) } diff --git a/server/algorithms/src/clustering/rtree/point.rs b/server/algorithms/src/clustering/rtree/point.rs index 105f04aa..26100b5a 100644 --- a/server/algorithms/src/clustering/rtree/point.rs +++ b/server/algorithms/src/clustering/rtree/point.rs @@ -1,11 +1,12 @@ use std::hash::{Hash, Hasher}; -use map_3d::EARTH_RADIUS; +use geo::Coord; +use geohash::encode; use model::api::{single_vec::SingleVec, Precision}; use rstar::{PointDistance, RTree, RTreeObject, AABB}; use s2::{cellid::CellID, latlng::LatLng}; -const R: Precision = 6378137.0; +const EARTH_RADIUS: Precision = 6378137.0; const X: Precision = std::f64::consts::PI / 180.0; #[derive(Debug, Clone, Copy)] @@ -24,9 +25,9 @@ impl Point { } } - pub fn interpolate(&self, next: &Point, ratio: f64) -> Self { - let lat = self.center[0] * (1. - ratio) + next.center[0] * ratio; - let lon = self.center[1] * (1. - ratio) + next.center[1] * ratio; + pub fn interpolate(&self, next: &Self, ratio: f64, wiggle_lat: f64, wiggle_lon: f64) -> Self { + let lat = self.center[0] * (1. - ratio) + (next.center[0] + wiggle_lat) * ratio; + let lon = self.center[1] * (1. - ratio) + (next.center[1] + wiggle_lon) * ratio; let new_point = Self::new(self.radius, [lat, lon]); new_point } @@ -48,11 +49,16 @@ impl Point { [lat.to_degrees(), lng.to_degrees()] } - // pub fn midpoint(&self, other: &Point) -> Self { - // let lat = (self.center[0] + other.center[0]) / 2.0; - // let lon = (self.center[1] + other.center[1]) / 2.0; - // Self::new(self.radius, [lat, lon]) - // } + pub fn _get_geohash(&self) -> String { + encode( + Coord { + x: self.center[1], + y: self.center[0], + }, + 12, + ) + .unwrap() + } } impl PartialEq for Point { @@ -75,12 +81,6 @@ impl RTreeObject for Point { fn envelope(&self) -> Self::Envelope { let corner_1 = self.haversine_destination(225.); let corner_2 = self.haversine_destination(45.); - // let corner_1 = [self.center[0] - self.radius, self.center[1] - self.radius]; - // let corner_2 = [self.center[0] + self.radius, self.center[1] + self.radius]; - // println!( - // "{},{}\n{},{}\n{},{}\n", - // self.center[0], self.center[1], corner_1[0], corner_1[1], corner_2[0], corner_2[1] - // ); AABB::from_corners(corner_1, corner_2) } } @@ -92,7 +92,7 @@ impl PointDistance for Point { let lat2 = other[0] * X; let lon2 = other[1] * X; let a = lat1.sin() * lat2.sin() + lat1.cos() * lat2.cos() * (lon2 - lon1).cos(); - a.acos() * R + a.acos() * EARTH_RADIUS } fn contains_point(&self, point: &::Point) -> bool { @@ -100,16 +100,6 @@ impl PointDistance for Point { } } -impl Default for Point { - fn default() -> Self { - Self { - cell_id: CellID(0), - center: [0., 0.], - radius: 70., - } - } -} - pub fn main(radius: f64, points: &SingleVec) -> RTree { let spawnpoints = points .iter() diff --git a/server/api/src/lib.rs b/server/api/src/lib.rs index 3aba5969..48921c8d 100644 --- a/server/api/src/lib.rs +++ b/server/api/src/lib.rs @@ -65,7 +65,7 @@ pub async fn start() -> io::Result<()> { .app_data(web::Data::new(scanner_type)) .app_data(web::Data::new(client)) // increase max payload size to 20MB - .app_data(web::JsonConfig::default().limit(20_971_520)) + .app_data(web::JsonConfig::default().limit(1024 * 1024 * 50)) .wrap(middleware::Logger::new("%s | %r - %b bytes in %D ms (%a)")) .wrap(middleware::Compress::default()) .wrap( diff --git a/server/model/src/db/spawnpoint.rs b/server/model/src/db/spawnpoint.rs index fe8b0e0f..5a991b24 100644 --- a/server/model/src/db/spawnpoint.rs +++ b/server/model/src/db/spawnpoint.rs @@ -84,12 +84,12 @@ impl Query { .from_raw_sql(Statement::from_sql_and_values( DbBackend::MySql, format!( - "SELECT lat, lon, despawn_sec FROM spawnpoint WHERE last_seen >= {} AND {} AND ({}) LIMIT 2000000", + "SELECT lat, lon, despawn_sec FROM spawnpoint WHERE last_seen >= {} {} AND ({}) LIMIT 2000000", last_seen, match tth { - SpawnpointTth::All => "1=1".to_string(), - SpawnpointTth::Known => "despawn_sec IS NOT NULL".to_string(), - SpawnpointTth::Unknown => "despawn_sec IS NULL".to_string(), + SpawnpointTth::All => "".to_string(), + SpawnpointTth::Known => "AND despawn_sec IS NOT NULL".to_string(), + SpawnpointTth::Unknown => "AND despawn_sec IS NULL".to_string(), }, utils::sql_raw(area) )