Skip to content

Commit

Permalink
Added AlphaPrecisionNaive (#142)
Browse files Browse the repository at this point in the history
* Added AlphaPrecisionNaive

* Added test for AlphaPrecisionNaive metric

* Removed assert from metric

* Extended AlphaPrecision with Naive method and removed specific class.

* Update version
  • Loading branch information
robsdavis authored Mar 6, 2023
1 parent a82d9e4 commit 7bb36fb
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 6 deletions.
1 change: 0 additions & 1 deletion src/synthcity/benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def evaluate(

if synthetic_cache:
save_to_file(cache_file, X_syn)

evaluation = Metrics.evaluate(
X_test if X_test is not None else X,
X_syn,
Expand Down
1 change: 0 additions & 1 deletion src/synthcity/metrics/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def evaluate(
scores = ScoreEvaluator()

eval_cnt = min(len(X_gt), len(X_syn))

for metric in standard_metrics:
if metric.type() not in metrics:
continue
Expand Down
72 changes: 70 additions & 2 deletions src/synthcity/metrics/eval_statistical.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,10 @@ def metrics(
emb_center: Optional[np.ndarray] = None,
) -> Tuple:
if len(X) != len(X_syn):
raise RuntimeError("The real and synthetic data mush have the same length")
raise RuntimeError("The real and synthetic data must have the same length")

if emb_center is None:
emb_center = np.mean(X.numpy(), axis=0)
emb_center = np.mean(X, axis=0)

n_steps = 30
alphas = np.linspace(0, 1, n_steps)
Expand Down Expand Up @@ -651,6 +651,60 @@ def metrics(
authenticity,
)

def _normalize_covariates(
self,
X: DataLoader,
X_syn: DataLoader,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""_normalize_covariates
This is an internal method to replicate the old, naive method for evaluating
AlphaPrecision.
Args:
X (DataLoader): The ground truth dataset.
X_syn (DataLoader): The synthetic dataset.
Returns:
Tuple[pd.DataFrame, pd.DataFrame]: normalised version of the datasets
"""
X_gt_norm = X.dataframe().copy()
X_syn_norm = X_syn.dataframe().copy()
if self._task_type != "survival_analysis":
if hasattr(X, "target_column"):
X_gt_norm = X_gt_norm.drop(columns=[X.target_column])
if hasattr(X_syn, "target_column"):
X_syn_norm = X_syn_norm.drop(columns=[X_syn.target_column])
scaler = MinMaxScaler().fit(X_gt_norm)
if hasattr(X, "target_column"):
X_gt_norm_df = pd.DataFrame(
scaler.transform(X_gt_norm),
columns=[
col
for col in X.train().dataframe().columns
if col != X.target_column
],
)
else:
X_gt_norm_df = pd.DataFrame(
scaler.transform(X_gt_norm), columns=X.train().dataframe().columns
)

if hasattr(X_syn, "target_column"):
X_syn_norm_df = pd.DataFrame(
scaler.transform(X_syn_norm),
columns=[
col
for col in X_syn.dataframe().columns
if col != X_syn.target_column
],
)
else:
X_syn_norm_df = pd.DataFrame(
scaler.transform(X_syn_norm), columns=X_syn.dataframe().columns
)

return (X_gt_norm_df, X_syn_norm_df)

@validate_arguments(config=dict(arbitrary_types_allowed=True))
def _evaluate(
self,
Expand Down Expand Up @@ -683,6 +737,20 @@ def _evaluate(
results[f"delta_coverage_beta{emb}"] = Delta_coverage_beta
results[f"authenticity{emb}"] = authenticity

X_df, X_syn_df = self._normalize_covariates(X, X_syn)
(
alphas_naive,
alpha_precision_curve_naive,
beta_coverage_curve_naive,
Delta_precision_alpha_naive,
Delta_coverage_beta_naive,
authenticity_naive,
) = self.metrics(X_df.to_numpy(), X_syn_df.to_numpy(), emb_center=None)

results["delta_precision_alpha_naive"] = Delta_precision_alpha_naive
results["delta_coverage_beta_naive"] = Delta_coverage_beta_naive
results["authenticity_naive"] = authenticity_naive

return results


Expand Down
2 changes: 1 addition & 1 deletion src/synthcity/metrics/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _safe_evaluate(
log.debug(f" >> Evaluating metric {evaluator.fqdn()} done. Duration: {duration} s")

if err is not None:
log.error(f" >> Rvaluator {evaluator.fqdn()} failed: {err}")
log.error(f" >> Evaluator {evaluator.fqdn()} failed: {err}")

return evaluator.fqdn(), result, failed, duration, evaluator.direction()

Expand Down
2 changes: 1 addition & 1 deletion src/synthcity/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.2.1"
__version__ = "0.2.2"

MAJOR_VERSION = ".".join(__version__.split(".")[:-1])
MINOR_VERSION = __version__.split(".")[-1]
14 changes: 14 additions & 0 deletions tests/metrics/test_statistical.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,27 @@ def test_evaluate_alpha_precision(test_plugin: Plugin) -> None:
"delta_precision_alpha_OC",
"delta_coverage_beta_OC",
"authenticity_OC",
"delta_precision_alpha_naive",
"delta_coverage_beta_naive",
"authenticity_naive",
]:
assert key in syn_score
assert key in rnd_score

# fr best method
assert syn_score["delta_precision_alpha_OC"] > rnd_score["delta_precision_alpha_OC"]
assert syn_score["authenticity_OC"] < rnd_score["authenticity_OC"]

# For naive method
assert (
syn_score["delta_precision_alpha_naive"]
> rnd_score["delta_precision_alpha_naive"]
)
assert (
syn_score["delta_coverage_beta_naive"] > rnd_score["delta_coverage_beta_naive"]
)
assert syn_score["authenticity_naive"] < rnd_score["authenticity_naive"]

assert AlphaPrecision.name() == "alpha_precision"
assert AlphaPrecision.type() == "stats"
assert AlphaPrecision.direction() == "maximize"
Expand Down

0 comments on commit 7bb36fb

Please sign in to comment.