diff --git a/mpisppy/phbase.py b/mpisppy/phbase.py index 9410653bf..27751e62b 100644 --- a/mpisppy/phbase.py +++ b/mpisppy/phbase.py @@ -8,6 +8,7 @@ ############################################################################### import time import logging +import math import numpy as np import mpisppy.MPI as MPI @@ -689,6 +690,10 @@ def attach_PH_to_objective(self, add_duals, add_prox, add_smooth=0): self.prox_approx_tol = self.options['proximal_linearization_tolerance'] else: self.prox_approx_tol = 1.e-1 + # The proximal approximation code now checks the tolerance based on the x-coordinates + # as opposed to the y-coordinates. Therefore, we will use the square root of the + # y-coordinate tolerance. + self.prox_approx_tol = math.sqrt(self.prox_approx_tol) else: self._prox_approx = False @@ -734,7 +739,7 @@ def attach_PH_to_objective(self, add_duals, add_prox, add_smooth=0): elif self._prox_approx: xvarsqrd = scenario._mpisppy_model.xsqvar[ndn_i] scenario._mpisppy_data.xsqvar_prox_approx[ndn_i] = \ - ProxApproxManager(xvar, xvarsqrd, xbars[ndn_i], scenario._mpisppy_model.xsqvar_cuts, ndn_i) + ProxApproxManager(scenario._mpisppy_model, xvar, ndn_i) else: xvarsqrd = xvar**2 prox_expr += (scenario._mpisppy_model.rho[ndn_i] / 2.0) * \ diff --git a/mpisppy/utils/prox_approx.py b/mpisppy/utils/prox_approx.py index c11edb5bd..03c213643 100644 --- a/mpisppy/utils/prox_approx.py +++ b/mpisppy/utils/prox_approx.py @@ -8,28 +8,32 @@ ############################################################################### from math import isclose +from array import array +from bisect import bisect from pyomo.core.expr.numeric_expr import LinearExpression # helpers for distance from y = x**2 -def _f(val, x_pnt, y_pnt): - return (( val - x_pnt )**2 + ( val**2 - y_pnt )**2)/2. -def _df(val, x_pnt, y_pnt): - #return 2*(val - x_pnt) + 4*(val**2 - y_pnt)*val - return val*(1 - 2*y_pnt + 2*val*val) - x_pnt -def _d2f(val, x_pnt, y_pnt): - return 1 + 6*val*val - 2*y_pnt +# def _f(val, x_pnt, y_pnt): +# return (( val - x_pnt )**2 + ( val**2 - y_pnt )**2)/2. +# def _df(val, x_pnt, y_pnt): +# #return 2*(val - x_pnt) + 4*(val**2 - y_pnt)*val +# return val*(1 - 2*y_pnt + 2*val*val) - x_pnt +# def _d2f(val, x_pnt, y_pnt): +# return 1 + 6*val*val - 2*y_pnt +# def _newton_step(val, x_pnt, y_pnt): +# return val - _df(val, x_pnt, y_pnt) / _d2f(val, x_pnt, y_pnt) def _newton_step(val, x_pnt, y_pnt): - return val - _df(val, x_pnt, y_pnt) / _d2f(val, x_pnt, y_pnt) + return val - (val * (1 - 2*y_pnt + 2*val*val) - x_pnt) / (1 + 6*val*val - 2*y_pnt) class ProxApproxManager: __slots__ = () - def __new__(cls, xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i): + def __new__(cls, mpisppy_model, xvar, ndn_i): if xvar.is_integer(): - return ProxApproxManagerDiscrete(xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i) + return ProxApproxManagerDiscrete(mpisppy_model, xvar, ndn_i) else: - return ProxApproxManagerContinuous(xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i) + return ProxApproxManagerContinuous(mpisppy_model, xvar, ndn_i) class _ProxApproxManager: ''' @@ -37,13 +41,17 @@ class _ProxApproxManager: ''' __slots__ = () - def __init__(self, xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i): + def __init__(self, mpisppy_model, xvar, ndn_i): self.xvar = xvar - self.xvarsqrd = xvarsqrd - self.xbar = xbar + self.xvarsqrd = mpisppy_model.xsqvar[ndn_i] + self.cuts = mpisppy_model.xsqvar_cuts + self.xbar = mpisppy_model.xbars[ndn_i] + self.rho = mpisppy_model.rho[ndn_i] + self.W = mpisppy_model.W[ndn_i] self.var_index = ndn_i - self.cuts = xsqvar_cuts self.cut_index = 0 + self.cut_values = array("d") + self.cut_values.append(0.0) self._store_bounds() def _store_bounds(self): @@ -56,61 +64,122 @@ def _store_bounds(self): else: self.ub = self.xvar.ub - def add_cut(self, val, persistent_solver=None): + def add_cut(self, val, tolerance, persistent_solver): ''' create a cut at val ''' pass + def check_and_add_value(self, val, tolerance): + idx = bisect(self.cut_values, val) + # self.cut_values is empty, has one element + # or we're appending to the end + if idx == len(self.cut_values): + if val - self.cut_values[idx-1] < tolerance: + return False + else: + self.cut_values.insert(idx, val) + return True + # here we're at the beginning + if idx == 0: + if self.cut_values[idx] - val < tolerance: + return False + else: + self.cut_values.insert(idx, val) + return True + # in the middle + if self.cut_values[idx] - val < tolerance: + return False + if val - self.cut_values[idx-1] < tolerance: + return False + self.cut_values.insert(idx, val) + return True + + def add_cuts(self, x_val, tolerance, persistent_solver): + x_bar = self.xbar.value + rho = self.rho.value + W = self.W.value + + num_cuts = self.add_cut(x_val, tolerance, persistent_solver) + # rotate x_val around x_bar, the minimizer of (\rho / 2)(x - x_bar)^2 + # to create a vertex at this point + rotated_x_val_x_bar = 2*x_bar - x_val + if not isclose(x_val, rotated_x_val_x_bar, abs_tol=tolerance): + num_cuts += self.add_cut(rotated_x_val_x_bar, tolerance, persistent_solver) + # aug_lagrange_point, is the minimizer of w\cdot x + (\rho / 2)(x - x_bar)^2 + # to create a vertex at this point + aug_lagrange_point = -W / rho + x_bar + if not isclose(x_val, aug_lagrange_point, abs_tol=tolerance): + num_cuts += self.add_cut(2*aug_lagrange_point - x_val, tolerance, persistent_solver) + # finally, create another vertex at the aug_lagrange_point by rotating + # rotated_x_val_x_bar around the aug_lagrange_point + if not isclose(rotated_x_val_x_bar, aug_lagrange_point, abs_tol=tolerance): + num_cuts += self.add_cut(2*aug_lagrange_point - rotated_x_val_x_bar, tolerance, persistent_solver) + # If we only added 0 or 1 cuts initially, add up to two more + # to capture something of the proximal term. This can happen + # when x_bar == x_val and W == 0. + if self.cut_index <= 1: + lb, ub = self.xvar.bounds + upval = 1 + dnval = 1 + if ub is not None: + # after a lot of calculus, you can show that + # this point minimizes the error in the approximation + upval = 2.0 * (ub - x_val) / 3.0 + if lb is not None: + dnval = 2.0 * (x_val - lb) / 3.0 + num_cuts += self.add_cut(x_val + max(upval, tolerance+1e-06), tolerance, persistent_solver) + num_cuts += self.add_cut(x_val - max(dnval, tolerance+1e-06), tolerance, persistent_solver) + # print(f"{x_val=}, {x_bar=}, {W=}") + # print(f"{self.cut_values=}") + # print(f"{self.cut_index=}") + return num_cuts + def check_tol_add_cut(self, tolerance, persistent_solver=None): ''' add a cut if the tolerance is not satified ''' + if self.xvar.fixed: + # don't do anything for fixed variables + return 0 x_pnt = self.xvar.value - x_bar = self.xbar.value y_pnt = self.xvarsqrd.value - f_val = x_pnt**2 + # f_val = x_pnt**2 - #print(f"y-distance: {actual_val - measured_val})") + # print(f"{x_pnt=}, {y_pnt=}, {f_val=}") + # print(f"y-distance: {actual_val - measured_val})") if y_pnt is None: - self.add_cut(x_pnt, persistent_solver) - if not isclose(x_pnt, x_bar, abs_tol=1e-6): - self.add_cut(2*x_bar - x_pnt, persistent_solver) - return True - - if (f_val - y_pnt) > tolerance: - ''' - In this case, we project the point x_pnt, y_pnt onto - the curve y = x**2 by finding the minimum distance - between y = x**2 and x_pnt, y_pnt. - - This involves solving a cubic equation, so instead - we start at x_pnt, y_pnt and run newtons algorithm - to get an approximate good-enough solution. - ''' - this_val = x_pnt - #print(f"initial distance: {_f(this_val, x_pnt, y_pnt)**(0.5)}") - #print(f"this_val: {this_val}") + y_pnt = 0.0 + # We project the point x_pnt, y_pnt onto + # the curve y = x**2 by finding the minimum distance + # between y = x**2 and x_pnt, y_pnt. + # This involves solving a cubic equation, so instead + # we start at x_pnt, y_pnt and run newtons algorithm + # to get an approximate good-enough solution. + this_val = x_pnt + # print(f"initial distance: {_f(this_val, x_pnt, y_pnt)**(0.5)}") + # print(f"this_val: {this_val}") + next_val = _newton_step(this_val, x_pnt, y_pnt) + while not isclose(this_val, next_val, rel_tol=1e-6, abs_tol=1e-6): + # print(f"newton step distance: {_f(next_val, x_pnt, y_pnt)**(0.5)}") + # print(f"next_val: {next_val}") + this_val = next_val next_val = _newton_step(this_val, x_pnt, y_pnt) - while not isclose(this_val, next_val, rel_tol=1e-6, abs_tol=1e-6): - #print(f"newton step distance: {_f(next_val, x_pnt, y_pnt)**(0.5)}") - #print(f"next_val: {next_val}") - this_val = next_val - next_val = _newton_step(this_val, x_pnt, y_pnt) - self.add_cut(next_val, persistent_solver) - if not isclose(next_val, x_bar, abs_tol=1e-6): - self.add_cut(2*x_bar - next_val, persistent_solver) - return True - return False + x_pnt = next_val + return self.add_cuts(x_pnt, tolerance, persistent_solver) class ProxApproxManagerContinuous(_ProxApproxManager): - def add_cut(self, val, persistent_solver=None): + def add_cut(self, val, tolerance, persistent_solver): ''' create a cut at val using a taylor approximation ''' - # handled by bound - if val == 0: + lb, ub = self.xvar.bounds + if lb is not None and val < lb: + val = lb + if ub is not None and val > ub: + val = ub + if not self.check_and_add_value(val, tolerance): return 0 # f'(a) = 2*val # f(a) - f'(a)a = val*val - 2*val*val @@ -145,11 +214,16 @@ def _compute_mb(val): class ProxApproxManagerDiscrete(_ProxApproxManager): - def add_cut(self, val, persistent_solver=None): + def add_cut(self, val, tolerance, persistent_solver): ''' create up to two cuts at val, exploiting integrality ''' val = int(round(val)) + if tolerance > 1: + # TODO: We should consider how to handle this, maybe. + # Tolerances less than or equal 1 won't affect + # discrete cuts + pass ## cuts are indexed by the x-value to the right ## e.g., the cut for (2,3) is indexed by 3 diff --git a/mpisppy/utils/sputils.py b/mpisppy/utils/sputils.py index df9f4b26c..5118bf659 100644 --- a/mpisppy/utils/sputils.py +++ b/mpisppy/utils/sputils.py @@ -983,8 +983,9 @@ def nonant_cost_coeffs(s): for var in repn.nonlinear_vars: if id(var) in s._mpisppy_data.varid_to_nonant_index: raise RuntimeError( - "Found nonlinear variables in the objective function. " - f"Variable {var} has nonlinear interactions in the objective funtion" + "A call to nonant_cost_coefficient found nonlinear variables in the objective function. " + f"Variable {var} has nonlinear interactions in the objective funtion. " + "Consider using gradient-based rho." ) return cost_coefs