Skip to content

Commit

Permalink
refactor: use factories for common operations
Browse files Browse the repository at this point in the history
  • Loading branch information
prokolyvakis committed Oct 3, 2023
1 parent 2a7b14a commit 90827f5
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 133 deletions.
84 changes: 84 additions & 0 deletions experiments/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
import numpy as np
import plotly.graph_objects as go

from mudpod.clustering import DipMeans
from mudpod.projections import IdentityProjector
from mudpod.projections import JohnsonLindenstrauss
from mudpod.observer import PercentileObserver
from mudpod.observer import RandomObserver
from mudpod.projections import View
from mudpod.unimodality import UnimodalityTest
from mudpod.unimodality import MonteCarloUnimodalityTest


def plot_clustered_data(data: np.ndarray, labels: np.ndarray) -> None:
"""Plots clustered data.
Expand Down Expand Up @@ -61,3 +70,78 @@ def group_data_points(data: np.ndarray, clusters: np.ndarray) -> list[np.ndarray
m = m[m[:, -1].argsort()]
m = np.split(m[:, :-1], np.unique(m[:, -1], return_index=True)[1][1:])
return m


def get_view(arguments: dict) -> View:
"""Get a view based on the config parameters existing in arguments.
Args:
arguments: a dict containing the config parameters.
Returns:
The parametrized view.
"""
pt = str(arguments['<pj>'])
if pt == 'jl':
p = JohnsonLindenstrauss()
elif pt == 'i':
p = IdentityProjector()
else:
raise ValueError(f'The projection type: {pt} is not supported!')


dt = str(arguments['--dist'])
ot = str(arguments['--obs'])
if ot == 'percentile':
o = PercentileObserver(0.99, dt)
elif ot == 'random':
o = RandomObserver()
else:
raise ValueError(f'The observer type: {ot} is not supported!')

v = View(p, o, dt)

return v


def get_monte_carlo_test(arguments: dict, workers_num: int = 1) -> MonteCarloUnimodalityTest:
"""Get a Monte Carlo unimodality test.
Args:
arguments: a dict containing the config parameters.
workers_num: an integer indicating the number of workers.
Returns:
A parametrized Monte Carlo unimodality test.
"""
v = get_view(arguments)

t = UnimodalityTest(v, float(arguments['<pv>']))
mct = MonteCarloUnimodalityTest(
t,
sim_num=int(arguments['<sims>']),
workers_num=workers_num
)

return mct


def get_dip_means(arguments: dict, seed: int, workers_num: int = 1) -> DipMeans:
"""Get a DipMeans clustering instance.
Args:
arguments: a dict containing the config parameters.
seed: a random seed.
workers_num: an integer indicating the number of workers.
Returns:
A parametrized DipMeans instance.
"""
v = get_view(arguments)

dm = DipMeans(
view=v,
pval=float(arguments['<pv>']),
sim_num=int(arguments['<sims>']),
workers_num=workers_num,
random_state=seed
)

return dm
42 changes: 7 additions & 35 deletions experiments/real/pre_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,9 @@
from sklearn.metrics import silhouette_score
from umap import UMAP

from experiments.common import get_dip_means
from experiments.common import plot_clustered_data
from mudpod.misc import set_seed
from mudpod.clustering import DipMeans
from mudpod.projections import IdentityProjector
from mudpod.projections import JohnsonLindenstrauss
from mudpod.observer import PercentileObserver
from mudpod.observer import RandomObserver
from mudpod.projections import View


logger.remove()
Expand Down Expand Up @@ -72,40 +67,17 @@ def get_data(
if __name__ == "__main__":
arguments = docopt(__doc__)

SEED = int(arguments['--seed'])
set_seed(SEED)

n_samples = arguments['--samples'] or None
if n_samples is not None:
n_samples = int(n_samples)
x, y = get_data(Path(arguments['<p>']), samples=n_samples)

SEED = int(arguments['--seed'])
set_seed(SEED)

pt = str(arguments['<pj>'])
if pt == 'jl':
p = JohnsonLindenstrauss()
elif pt == 'i':
p = IdentityProjector()
else:
raise ValueError(f'The projection type: {pt} is not supported!')


dt = str(arguments['--dist'])
ot = str(arguments['--obs'])
if ot == 'percentile':
o = PercentileObserver(0.99, dt)
elif ot == 'random':
o = RandomObserver()
else:
raise ValueError(f'The observer type: {ot} is not supported!')

v = View(p, o, dt)

dm = DipMeans(
view=v,
pval=float(arguments['<pv>']),
sim_num=int(arguments['<sims>']),
workers_num=1,
random_state=SEED
dm = get_dip_means(
arguments=arguments,
seed=SEED
)

clusters = dm.fit(x).labels_
Expand Down
39 changes: 6 additions & 33 deletions experiments/synthetic/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,9 @@
from loguru import logger
from sklearn.metrics import normalized_mutual_info_score

from experiments.common import get_dip_means
from experiments.common import plot_clustered_data
from experiments.synthetic.misc import load
from mudpod.clustering import DipMeans
from mudpod.projections import IdentityProjector
from mudpod.projections import JohnsonLindenstrauss
from mudpod.observer import PercentileObserver
from mudpod.observer import RandomObserver
from mudpod.projections import View
from mudpod.misc import set_seed


Expand All @@ -39,37 +34,15 @@
if __name__ == "__main__":
arguments = docopt(__doc__)

x, y = load(str(arguments['<d>']))

SEED = int(arguments['--seed'])
set_seed(SEED)

pt = str(arguments['<pj>'])
if pt == 'jl':
p = JohnsonLindenstrauss()
elif pt == 'i':
p = IdentityProjector()
else:
raise ValueError(f'The projection type: {pt} is not supported!')


dt = str(arguments['--dist'])
ot = str(arguments['--obs'])
if ot == 'percentile':
o = PercentileObserver(0.99, dt)
elif ot == 'random':
o = RandomObserver()
else:
raise ValueError(f'The observer type: {ot} is not supported!')

v = View(p, o, dt)
x, y = load(str(arguments['<d>']))

dm = DipMeans(
view=v,
pval=float(arguments['<pv>']),
sim_num=int(arguments['<sims>']),
workers_num=1,
random_state=SEED

dm = get_dip_means(
arguments=arguments,
seed=SEED
)

clusters = dm.fit(x).labels_
Expand Down
35 changes: 3 additions & 32 deletions experiments/synthetic/two_gaussians_mix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,10 @@
from docopt import docopt
from loguru import logger

from experiments.common import get_monte_carlo_test
from experiments.common import plot_clustered_data
from experiments.synthetic.misc import TwoDimGaussianSumGenerator
from mudpod.misc import set_seed
from mudpod.projections import IdentityProjector
from mudpod.projections import JohnsonLindenstrauss
from mudpod.observer import PercentileObserver
from mudpod.observer import RandomObserver
from mudpod.projections import View
from mudpod.unimodality import UnimodalityTest
from mudpod.unimodality import MonteCarloUnimodalityTest


logger.remove()
Expand All @@ -43,31 +37,6 @@
SEED = int(arguments['--seed'])
set_seed(SEED)

pt = str(arguments['<pj>'])
if pt == 'jl':
p = JohnsonLindenstrauss()
elif pt == 'i':
p = IdentityProjector()
else:
raise ValueError(f'The projection type: {pt} is not supported!')

dt = str(arguments['--dist'])
ot = str(arguments['--obs'])
if ot == 'percentile':
o = PercentileObserver(0.99, dt)
elif ot == 'random':
o = RandomObserver()
else:
raise ValueError(f'The observer type: {ot} is not supported!')

v = View(p, o, dt)
t = UnimodalityTest(v, float(arguments['<pv>']))
mct = MonteCarloUnimodalityTest(
t,
sim_num=int(arguments['<sims>']),
workers_num=1
)

n_samples = int(arguments['--samples'])
std = float(arguments['--noise'])
g = TwoDimGaussianSumGenerator(
Expand All @@ -76,6 +45,8 @@
random_state=SEED
)

mct = get_monte_carlo_test(arguments=arguments, workers_num=1)

tr = 'unimodal' if mct.test(g.x) else 'bimodal'
msg = dict(arguments)
msg['groundtruth'] = g.t
Expand Down
36 changes: 3 additions & 33 deletions experiments/synthetic/unimodality.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,9 @@
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris

from experiments.common import get_monte_carlo_test
from experiments.common import plot_clustered_data
from mudpod.misc import set_seed
from mudpod.projections import IdentityProjector
from mudpod.projections import JohnsonLindenstrauss
from mudpod.observer import PercentileObserver
from mudpod.observer import RandomObserver
from mudpod.projections import View
from mudpod.unimodality import UnimodalityTest
from mudpod.unimodality import MonteCarloUnimodalityTest

logger.remove()
# add a new handler with level set to INFO
Expand Down Expand Up @@ -63,37 +57,13 @@ def get_dataset(name: str) -> Callable:
SEED = int(arguments['--seed'])
set_seed(SEED)

pt = str(arguments['<pj>'])
if pt == 'jl':
p = JohnsonLindenstrauss()
elif pt == 'i':
p = IdentityProjector()
else:
raise ValueError(f'The projection type: {pt} is not supported!')


dt = str(arguments['--dist'])
ot = str(arguments['--obs'])
if ot == 'percentile':
o = PercentileObserver(0.99, dt)
elif ot == 'random':
o = RandomObserver()
else:
raise ValueError(f'The observer type: {ot} is not supported!')

v = View(p, o, dt)
t = UnimodalityTest(v, float(arguments['<pv>']))
mct = MonteCarloUnimodalityTest(
t,
sim_num=int(arguments['<sims>']),
workers_num=1
)

data_func = get_dataset(str(arguments['<d>']))
n_samples = int(arguments['--samples'])
noise = float(arguments['--noise'])
x, y = data_func(n_samples=n_samples, noise=noise, random_state=SEED)

mct = get_monte_carlo_test(arguments=arguments, workers_num=1)

msg = dict(arguments)
msg['result'] = 'unimodal' if mct.test(x) else 'multimodal'
msg.pop('--help')
Expand Down

0 comments on commit 90827f5

Please sign in to comment.