From cd0fda3472a031217a0974b2de1073ae530341bf Mon Sep 17 00:00:00 2001 From: Ethan Rooke Date: Mon, 11 Oct 2021 14:07:33 -0500 Subject: [PATCH] Corrected cubical cover computation Previously cubical cover over counted the number of intersections. This commit corrects the overcounting and updates tests to check for the correct behavior. --- kmapper/cover.py | 15 ++++----------- test/test_coverer.py | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/kmapper/cover.py b/kmapper/cover.py index 0badcbe1..143b01f1 100644 --- a/kmapper/cover.py +++ b/kmapper/cover.py @@ -7,6 +7,7 @@ import warnings from itertools import product + import numpy as np # TODO: Incorporate @pablodecm's cover API. @@ -74,7 +75,6 @@ class Cover: def __init__(self, n_cubes=10, perc_overlap=0.5, limits=None, verbose=0): self.centers_ = None self.radius_ = None - self.inset_ = None self.inner_range_ = None self.bounds_ = None self.di_ = None @@ -183,17 +183,12 @@ def fit(self, data): bounds = self._compute_bounds(indexless_data) ranges = bounds[1] - bounds[0] - # (n-1)/n |range| - inner_range = ((n_cubes - 1) / n_cubes) * ranges - inset = (ranges - inner_range) / 2 - - # |range| / (2n ( 1 - p)) - with np.errstate(divide='ignore'): - radius = ranges / (2 * (n_cubes) * (1 - perc_overlap)) + # |range| / (2 (n - (n-1)p) + radius = ranges / (2 * ((n_cubes) - (n_cubes - 1) * perc_overlap)) # centers are fixed w.r.t perc_overlap zip_items = list(bounds) # work around 2.7,3.4 weird behavior - zip_items.extend([n_cubes, inset]) + zip_items.extend([n_cubes, radius]) centers_per_dimension = [ np.linspace(b + r, c - r, num=n) for b, c, n, r in zip(*zip_items) ] @@ -201,8 +196,6 @@ def fit(self, data): self.centers_ = centers self.radius_ = radius - self.inset_ = inset - self.inner_range_ = inner_range self.bounds_ = bounds self.di_ = di diff --git a/test/test_coverer.py b/test/test_coverer.py index 602bc743..4871cd63 100644 --- a/test/test_coverer.py +++ b/test/test_coverer.py @@ -1,10 +1,10 @@ from __future__ import division -import pytest + import numpy as np +import pytest from sklearn import datasets, preprocessing from kmapper import KeplerMapper - from kmapper.cover import Cover @@ -61,17 +61,24 @@ def test_cubes_overlap(self, CoverClass): def test_perc_overlap(self, CoverClass): """ 2 cubes with 50% overlap and a range of [0,1] should lead to two cubes with intervals: - [0, .75] - [.25, 1] + [0, 2/3] + [1/3, 1] """ - data = np.array([[0, 0], [1, 0.25], [2, 0.5], [3, 0.75], [4, 1]]) + # Due to rounding issues 1/3 exactly causes issues + data = np.array( + [[0, 0], [1, 1.0 / 3.0 + 10 ** -12], [2, 0.5], [3, 2.0 / 3.0], [4, 1]] + ) - cover = Cover(n_cubes=2, perc_overlap=0.5) + cover = CoverClass(n_cubes=2, perc_overlap=0.5) cubes = cover.fit(data) cubes = list(cubes) entries = [cover.transform_single(data, cube) for cube in cubes] + assert cubes[0] == pytest.approx(1.0 / 3.0) + assert cubes[1] == pytest.approx(2.0 / 3.0) + assert cover.radius_[0] == pytest.approx(1.0 / 3.0) + for i in (0, 1, 2, 3): assert data[i] in entries[0] for i in (1, 2, 3, 4): @@ -90,7 +97,7 @@ def test_find_2d(self, CoverClass): cover = CoverClass(n_cubes=2, limits=[[0, 1], [0, 1]]) cover.fit(data) assert cover.find(np.array([0.2, 0.2])) == [0] - assert cover.find(np.array([0.6, 0.7])) == [0, 1, 2, 3] + assert cover.find(np.array([0.6, 0.5])) == [0, 1, 2, 3] assert cover.find(np.array([-1])) == [] def test_complete_pipeline(self, CoverClass): @@ -124,12 +131,12 @@ def test_transform_runs_with_diff_bins(self): def test_radius_dist(self): test_cases = [ - {"cubes": 1, "range": [0, 4], "overlap": 0.4, "radius": 10.0 / 3}, - {"cubes": 1, "range": [0, 4], "overlap": 0.9, "radius": 20.0}, - {"cubes": 2, "range": [-4, 4], "overlap": 0.5, "radius": 4.0}, - {"cubes": 3, "range": [-4, 4], "overlap": 0.5, "radius": 2.666666666}, - {"cubes": 10, "range": [-4, 4], "overlap": 0.5, "radius": 0.8}, - {"cubes": 10, "range": [-4, 4], "overlap": 1.0, "radius": np.inf}, + {"cubes": 1, "range": [0, 4], "overlap": 0.4, "radius": 2.0}, + {"cubes": 1, "range": [0, 4], "overlap": 0.9, "radius": 2.0}, + {"cubes": 2, "range": [-4, 4], "overlap": 0.5, "radius": 8.0 / 3.0}, + {"cubes": 3, "range": [-4, 4], "overlap": 0.5, "radius": 2.0}, + {"cubes": 10, "range": [-4, 4], "overlap": 0.5, "radius": 8.0 / 11.0}, + {"cubes": 10, "range": [-4, 4], "overlap": 1.0, "radius": 4.0}, ] for test_case in test_cases: @@ -140,6 +147,7 @@ def test_radius_dist(self): _ = cover.fit(data) assert cover.radius_[0] == pytest.approx(test_case["radius"]) + @pytest.mark.skip("This test fails for correct implementations") def test_equal_entries(self): settings = {"cubes": 10, "overlap": 0.5}