Skip to content

Commit

Permalink
Implementation of early stopping feature.
Browse files Browse the repository at this point in the history
Adding following parameters to input.yaml::fit:
  min_relative_train_loss_per_iter: 5e-5
  min_relative_test_loss_per_iter: 1e-5
  early_stopping_patience: 10
  • Loading branch information
Yury Lysogorskiy committed Feb 2, 2024
1 parent e9b9755 commit d2785c1
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 12 deletions.
8 changes: 6 additions & 2 deletions docs/pacemaker/inputfile.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,12 @@ fit:
## will not be added until the list of functions of the previous body-order is exhausted
## power_order - the order of adding new basis functions is defined by the "power rank" p of a function.
## p = len(ns) + sum(ns) + sum(ls). Functions with the smallest p are added first
#ladder_type: body_order

#ladder_type: body_order

# early stoppping
## min_relative_train_loss_per_iter: 5e-5
## min_relative_test_loss_per_iter: 1e-5
## early_stopping_patience: 200

## callbacks during the fitting. Module quick_validation.py should be available for import
## see example/pacemaker_with_callback for more details and examples
Expand Down
2 changes: 1 addition & 1 deletion lib/pybind11/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.

cmake_minimum_required(VERSION 3.4)
cmake_minimum_required(VERSION 3.7)

# The `cmake_minimum_required(VERSION 3.4...3.22)` syntax does not work with
# some versions of VS that have a patched CMake 3.11. This forces us to emulate
Expand Down
5 changes: 5 additions & 0 deletions src/pyace/data/input_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ fit:
# ladder_step: 100
# ladder_type: power_order

# Early stopping
# min_relative_train_loss_per_iter: 5e-5
# min_relative_test_loss_per_iter: 1e-5
# early_stopping_patience: 200

#################################################################
## Backend specification section
#################################################################
Expand Down
95 changes: 92 additions & 3 deletions src/pyace/generalfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def save_dataset(dataframe, fname):
log.info("Dataset saved into {}".format(fname))


class TestLossChangeTooSmallException(StopIteration):
pass


class GeneralACEFit:
"""
Main fitting wrapper class
Expand All @@ -150,6 +154,7 @@ def __init__(self,
seed=None,
callbacks=None
):
self.early_stopping_occured = None
self.seed = seed
if self.seed is not None:
log.info("Set numpy random seed to {}".format(self.seed))
Expand Down Expand Up @@ -293,11 +298,11 @@ def __init__(self,

self.data_config = data_config
self.weighting_policy_spec = self.fit_config.get(FIT_WEIGHTING_KW)
display_step = backend_config.get('display_step', 20)
self.display_step = backend_config.get('display_step', 20)
if self.ladder_scheme:
self.metrics_aggregator = MetricsAggregator(extended_display_step=display_step)
self.metrics_aggregator = MetricsAggregator(extended_display_step=self.display_step)
else:
self.metrics_aggregator = MetricsAggregator(extended_display_step=display_step,
self.metrics_aggregator = MetricsAggregator(extended_display_step=self.display_step,
ladder_metrics_filename=None)
self.fit_backend = FitBackendAdapter(backend_config,
fit_metrics_callback=self.fit_metric_callback,
Expand Down Expand Up @@ -354,6 +359,24 @@ def __init__(self,

self.loss_spec = LossFunctionSpecification(**loss_spec_dict)

# attributes for early stopping
self.train_loss_list = []
self.test_loss_list = []
self.early_stopping_occured = False
self.early_stopping_patience = fit_config.get("early_stopping_patience", 200)
self.min_relative_train_loss_per_iter = fit_config.get('min_relative_train_loss_per_iter')
self.min_relative_test_loss_per_iter = fit_config.get('min_relative_test_loss_per_iter')
if self.min_relative_train_loss_per_iter:
self.min_relative_train_loss_per_iter=-abs(self.min_relative_train_loss_per_iter)
log.info(
f"Slowest relative change of TRAIN loss is set to {self.min_relative_train_loss_per_iter :+1.2e}/iter, " +
f"patience = {self.early_stopping_patience} iters")
if self.min_relative_test_loss_per_iter:
self.min_relative_test_loss_per_iter = -abs(self.min_relative_test_loss_per_iter)
log.info(
f"Slowest relative change of TEST loss is set to {self.min_relative_test_loss_per_iter :+1.2e}/iter, " +
f"patience = {self.early_stopping_patience} iters")

def set_core_rep(self, basis_conf):
# automatic repulsion selection
if "repulsion" in self.fit_config and self.fit_config["repulsion"] == "auto":
Expand All @@ -372,11 +395,71 @@ def fit_metric_callback(self, metrics_dict, extended_display_step=None):
metrics_dict["cycle_step"] = self.current_fit_cycle
metrics_dict["ladder_step"] = self.current_ladder_step
self.metrics_aggregator.fit_metric_callback(metrics_dict, extended_display_step=extended_display_step)
self.train_loss_list.append(metrics_dict['loss'])
self.log_d_rel_loss(metrics_dict["iter_num"], mode='train')
if self.min_relative_train_loss_per_iter is not None:
self.detect_early_stopping(mode='train')

def test_metric_callback(self, metrics_dict, extended_display_step=None):
metrics_dict["cycle_step"] = self.current_fit_cycle
metrics_dict["ladder_step"] = self.current_ladder_step
self.metrics_aggregator.test_metric_callback(metrics_dict, extended_display_step=extended_display_step)
self.test_loss_list.append(metrics_dict['loss'])
self.log_d_rel_loss(metrics_dict["iter_num"], mode='test')
if self.min_relative_test_loss_per_iter is not None:
self.detect_early_stopping(mode='test')

def compute_d_rel_loss_d_step(self, loss_list, mode):
iter_step = self.display_step if mode == 'test' else 1
min_loss_depth = int(np.ceil(self.early_stopping_patience / iter_step))
# take last min_loss_depth
loss_list = np.array(loss_list[-min_loss_depth:])
d_rel_loss_d_step = (loss_list[1:] - loss_list[:-1]) / loss_list[:-1] / iter_step # normally - big negative
return d_rel_loss_d_step

def log_d_rel_loss(self, iter_num, mode):
if iter_num > 0 and iter_num % self.display_step == 0 and not self.early_stopping_occured:
loss_list = self.get_loss_list(mode)
d_rel_loss_d_step = self.compute_d_rel_loss_d_step(loss_list, mode)
if len(d_rel_loss_d_step) > 0:
last_d_rel_loss_d_step = d_rel_loss_d_step[-1]
log.info(f"Last relative {mode.upper()} loss change {last_d_rel_loss_d_step :+1.2e}/iter")

def get_loss_list(self, mode):
assert mode in ['train', 'test'], f"Unsupported {mode=}"

if mode == 'train':
return self.train_loss_list
elif mode == 'test':
return self.test_loss_list

def detect_early_stopping(self, mode):
loss_list = self.get_loss_list(mode)
if self.early_stopping_occured:
# early stopping already occured
return

iter_step = self.display_step if mode == 'test' else 1
min_loss_depth = int(np.ceil(self.early_stopping_patience / iter_step))

if len(loss_list) - 1 < min_loss_depth: # -1 because test loss is written at it=0
# trajectory is not long enough
return

d_rel_loss_d_step = self.compute_d_rel_loss_d_step(loss_list, mode)

min_relative_loss_per_iter = self.min_relative_test_loss_per_iter if mode == 'test' else self.min_relative_train_loss_per_iter
if min(d_rel_loss_d_step) > min_relative_loss_per_iter:
# early stopping
min_d_rel_loss_d_step = min(d_rel_loss_d_step)
last_d_rel_loss_d_step = d_rel_loss_d_step[-1]
msg = f"EARLY STOPPING: Too small or even positive {mode.upper()} loss change (best={min_d_rel_loss_d_step:+1.2e} / iter, " + \
f"last={last_d_rel_loss_d_step:+1.2e}/iter, " + \
f"threshold = {min_relative_loss_per_iter :+1.2e}/iter) " + \
f"within last {self.early_stopping_patience} iterations. Stopping"
log.info(msg)
self.early_stopping_occured = True
raise TestLossChangeTooSmallException(msg)

def fit(self) -> BBasisConfiguration:
gc.collect()
Expand Down Expand Up @@ -474,6 +557,7 @@ def cycle_fitting(self, bbasisconfig: BBasisConfiguration) -> BBasisConfiguratio
num_of_parameters))
log.info("Running fit backend")
self.current_fit_iteration = 0
self.reset_early_stopping()
current_bbasisconfig = self.fit_backend.fit(
current_bbasisconfig,
dataframe=self.fitting_data, loss_spec=self.loss_spec, fit_config=self.fit_config,
Expand Down Expand Up @@ -535,6 +619,11 @@ def cycle_fitting(self, bbasisconfig: BBasisConfiguration) -> BBasisConfiguratio
save_interim_potential(current_best_bbasisconfig, potential_filename="interim_potential_best_cycle.yaml")
return current_best_bbasisconfig

def reset_early_stopping(self):
self.early_stopping_occured = False
self.test_loss_list = []
self.train_loss_list = []

@staticmethod
def apply_gaussian_noise(current_bbasisconfig, trainable_parameters_dict, noise_abs_sigma, noise_rel_sigma):
cur_bbasis = ACEBBasisSet(current_bbasisconfig)
Expand Down
2 changes: 1 addition & 1 deletion src/pyace/metrics_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def print_detailed_metrics(fit_metrics_dict, title='Iteration:'):
log.info('{:<12}'.format(title) +
"#{iter_num:<5}".format(iter_num=iter_num) +
'{:<14}'.format('({numeval} evals):'.format(numeval=fit_metrics_dict["eval_count"])) +
'{:>10}'.format('Loss: ') + "{loss: >3.6f}".format(loss=total_loss) +
'{:>10}'.format('Loss: ') + "{loss: >1.4e}".format(loss=total_loss) +
'{str1:>21}{rmse_epa:>.2f} ({low_rmse_e:>.2f}) meV/at' \
.format(str1=" | RMSE Energy(low): ",
rmse_epa=1e3 * fit_metrics_dict["rmse_epa"],
Expand Down
15 changes: 12 additions & 3 deletions src/pyace/preparedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,18 @@ def prepare_datasets(self):
self.fitting_data, self.test_data = train_test_split(self.fitting_data, test_size=test_size)
self.test_data = self.process_dataset(self.test_data)

# apply weights (TODO: for joint train+test?)
self.fitting_data = apply_weights(self.fitting_data, self.weighting_policy_spec, self.ignore_weights)
self.test_data = apply_weights(self.test_data, self.weighting_policy_spec, self.ignore_weights)
# apply weights
if self.test_data is not None:
# for joint train+test
self.fitting_data["train"] = True
self.test_data["train"] = False
joint_df = pd.concat([self.fitting_data, self.test_data], axis=0)
joint_df = apply_weights(joint_df, self.weighting_policy_spec, self.ignore_weights)
self.fitting_data = joint_df.query("train").reset_index(drop=True)
self.test_data = joint_df.query("~train").reset_index(drop=True)
# self.test_data = apply_weights(self.test_data, self.weighting_policy_spec, self.ignore_weights)
else:
self.fitting_data = apply_weights(self.fitting_data, self.weighting_policy_spec, self.ignore_weights)

# decrease augmented weights
aug_factor = self.data_config.get("aug_factor", 1e-4)
Expand Down
8 changes: 6 additions & 2 deletions tests/test-CLI/Cu-I/input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,19 @@ fit:

## scipy.minimze algorithm: BFGS / L-BFGS-B / Nelder-Mead / etc...
optimizer: BFGS
repulsion: auto

## maximum number of scipy.minimize iterations
maxiter: 20

# early stopping
min_relative_train_loss_per_iter: 5e-5
min_relative_test_loss_per_iter: 1e-5
early_stopping_patience: 10

#################################################################
## Backend specification section
#################################################################
backend:
evaluator: tensorpot
batch_size: 100
display_step: 50
display_step: 10

0 comments on commit d2785c1

Please sign in to comment.