-
Notifications
You must be signed in to change notification settings - Fork 0
/
two-sample-power-dimension.py
104 lines (80 loc) · 2.28 KB
/
two-sample-power-dimension.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
# coding: utf-8
# Power vs. Dimension for 15 Relationships
import os
import sys
import numpy as np
from joblib import Parallel, delayed
from hyppo.independence import MGC, Dcorr, Hsic, HHG, CCA, RV
from kmerf import KMERF
from simulations import make_marron_wand_classification, MARRON_WAND_SIMS
sys.path.append(os.path.realpath(".."))
DIMENSIONS = range(3, 11)
SAMP_SIZE = 100
REPS = range(1000)
SAVE_PATH = "two-sample-n-{}_p-{}_{}".format(
int(SAMP_SIZE), int(DIMENSIONS[0]), int(DIMENSIONS[-1])
)
TESTS = {
"KMERF": KMERF(forest="classifier"),
"MGC": MGC(),
"Dcorr": Dcorr(),
"Hsic": Hsic(),
"HHG": HHG(),
"CCA": CCA(),
"RV": RV(),
}
def _sim_slice(X, p):
"""
Generate x, y from each sim
"""
X_t = X[:, :p]
y_t = np.concatenate((np.zeros(SAMP_SIZE // 2), np.ones(SAMP_SIZE // 2)))
return X_t, y_t
def _perm_stat(est, X, p):
"""
Generates null and alternate distributions
"""
X, y = _sim_slice(X, p)
y = y.reshape(-1, 1)
obs_stat = est.statistic(X, y)
permy = np.random.permutation(y)
perm_stat = est.statistic(X, permy)
return obs_stat, perm_stat
def _nonperm_pval(est, X, p):
"""
Generates fast permutation pvalues
"""
X, y = _sim_slice(X, p)
pvalue = est.test(X, y)[1]
return pvalue
def compute_null(rep, est, est_name, sim, p=1):
"""
Calculates empirical null and alternate distribution for each test.
"""
X, _ = make_marron_wand_classification(
n_samples=SAMP_SIZE,
n_dim=DIMENSIONS[-1],
n_informative=1,
simulation=sim,
seed=rep,
)
if est_name in ["Dcorr", "Hsic"]:
pval = _nonperm_pval(est, X, p)
save_kwargs = {"X": [pval]}
else:
alt_dist, null_dist = _perm_stat(est, X, p)
save_kwargs = {"X": [alt_dist, null_dist], "delimiter": ","}
np.savetxt(
f"{SAVE_PATH}/{sim}_{est_name}_{p}_{rep}.txt", **save_kwargs
)
# Run this block to regenerate power curves. Note that this takes a very long time!
_ = Parallel(n_jobs=-1, verbose=100)(
[
delayed(compute_null)(rep, est, est_name, sim, p=dim)
for rep in REPS
for est_name, est in TESTS.items()
for sim in MARRON_WAND_SIMS.keys()
for dim in DIMENSIONS
]
)