diff --git a/README.md b/README.md index 639b0d0..511896f 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs- The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md). +To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=lpgd` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function. + #### Return value The function `solve_and_derivative` returns a tuple diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 9919108..d20cee0 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -12,6 +12,7 @@ import _diffcp import diffcp.cones as cone_lib +from diffcp.utils import compute_perturbed_solution, compute_adjoint_perturbed_solution def pi(z, cones): @@ -24,14 +25,14 @@ def pi(z, cones): [u, cone_lib.pi(v, cones, dual=True), np.maximum(w, 0)]) -def solve_and_derivative_wrapper(A, b, c, cone_dict, warm_start, mode, kwargs): +def solve_and_derivative_wrapper(A, b, c, P, cone_dict, warm_start, mode, kwargs): """A wrapper around solve_and_derivative for the batch function.""" return solve_and_derivative( - A, b, c, cone_dict, warm_start=warm_start, mode=mode, **kwargs) + A, b, c, cone_dict, warm_start=warm_start, mode=mode, P=P, **kwargs) def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs_backward=-1, - mode="lsqr", warm_starts=None, **kwargs): + mode="lsqr", warm_starts=None, Ps=None, **kwargs): """ Solves a batch of cone programs and returns a function that performs a batch of derivatives. Uses a ThreadPool to perform @@ -49,8 +50,9 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs means serial and n_jobs_forward = -1 defaults to the number of CPUs (default=-1). n_jobs_backward - Number of jobs to use in the backward pass. n_jobs_backward = 1 means serial and n_jobs_backward = -1 defaults to the number of CPUs (default=-1). - mode - Differentiation mode in ["lsqr", "lsmr", "dense"]. + mode - Differentiation mode in ["lsqr", "lsmr", "dense", "lpgd", "lpgd_right", "lpgd_left"]. warm_starts - A list of warm starts. + Ps - (optional) A list of P matrices. kwargs - kwargs sent to scs. Returns: @@ -58,10 +60,11 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs ys: A list of y arrays. ss: A list of s arrays. D_batch: A callable with signature - D_batch(dAs, dbs, dcs) -> dxs, dys, dss + D_batch(dAs, dbs, dcs, dPs) -> dxs, dys, dss This callable maps lists of problem data derivatives to lists of solution derivatives. DT_batch: A callable with signature - DT_batch(dxs, dys, dss) -> dAs, dbs, dcs + DT_batch(dxs, dys, dss, return_dP=False) -> dAs, dbs, dcs + DT_batch(dxs, dys, dss, return_dP=True) -> dAs, dbs, dcs, dPs This callable maps lists of solution derivatives to lists of problem data derivatives. """ batch_size = len(As) @@ -73,13 +76,16 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs n_jobs_backward = mp.cpu_count() n_jobs_forward = min(batch_size, n_jobs_forward) n_jobs_backward = min(batch_size, n_jobs_backward) + + if Ps is None: + Ps = [None] * batch_size if n_jobs_forward == 1: # serial xs, ys, ss, Ds, DTs = [], [], [], [], [] for i in range(batch_size): x, y, s, D, DT = solve_and_derivative(As[i], bs[i], cs[i], - cone_dicts[i], warm_starts[i], mode=mode, **kwargs) + cone_dicts[i], warm_starts[i], mode=mode, P=Ps[i], **kwargs) xs += [x] ys += [y] ss += [s] @@ -88,8 +94,8 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs else: # thread pool pool = ThreadPool(processes=n_jobs_forward) - args = [(A, b, c, cone_dict, warm_start, mode, kwargs) for A, b, c, cone_dict, warm_start in - zip(As, bs, cs, cone_dicts, warm_starts)] + args = [(A, b, c, P, cone_dict, warm_start, mode, kwargs) for A, b, c, P, cone_dict, warm_start in + zip(As, bs, cs, Ps, cone_dicts, warm_starts)] with threadpool_limits(limits=1): results = pool.starmap(solve_and_derivative_wrapper, args) pool.close() @@ -109,21 +115,26 @@ def D_batch(dAs, dbs, dcs, **kwargs): dss += [ds] return dxs, dys, dss - def DT_batch(dxs, dys, dss, **kwargs): - dAs, dbs, dcs = [], [], [] + def DT_batch(dxs, dys, dss, return_dP=False, **kwargs): + dAs, dbs, dcs, dPs = [], [], [], [] for i in range(batch_size): - dA, db, dc = DTs[i](dxs[i], dys[i], dss[i], **kwargs) - dAs += [dA] - dbs += [db] - dcs += [dc] - return dAs, dbs, dcs + ret = DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs) + dAs += [ret[0]] + dbs += [ret[1]] + dcs += [ret[2]] + if return_dP: + dPs += [ret[3]] + if return_dP: + return dAs, dbs, dcs, dPs + else: + return dAs, dbs, dcs else: - def D_batch(dAs, dbs, dcs, **kwargs): + def D_batch(dAs, dbs, dcs, dPs=None, **kwargs): pool = ThreadPool(processes=n_jobs_backward) def Di(i): - return Ds[i](dAs[i], dbs[i], dcs[i], **kwargs) + return Ds[i](dAs[i], dbs[i], dcs[i], dPs[i], **kwargs) results = pool.map(Di, range(batch_size)) pool.close() dxs = [r[0] for r in results] @@ -131,29 +142,33 @@ def Di(i): dss = [r[2] for r in results] return dxs, dys, dss - def DT_batch(dxs, dys, dss, **kwargs): + def DT_batch(dxs, dys, dss, return_dP=False, **kwargs): pool = ThreadPool(processes=n_jobs_backward) def DTi(i): - return DTs[i](dxs[i], dys[i], dss[i], **kwargs) + return DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs) results = pool.map(DTi, range(batch_size)) pool.close() dAs = [r[0] for r in results] dbs = [r[1] for r in results] dcs = [r[2] for r in results] - return dAs, dbs, dcs + if return_dP: + dPs = [r[3] for r in results] + return dAs, dbs, dcs, dPs + else: + return dAs, dbs, dcs return xs, ys, ss, D_batch, DT_batch -def solve_only_wrapper(A, b, c, cone_dict, warm_start, kwargs): +def solve_only_wrapper(A, b, c, P, cone_dict, warm_start, kwargs): """A wrapper around solve_only for the batch function""" return solve_only( - A, b, c, cone_dict, warm_start=warm_start, **kwargs) + A, b, c, cone_dict, warm_start=warm_start, P=P, **kwargs) def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, - warm_starts=None, **kwargs): + warm_starts=None, Ps=None, **kwargs): """ Solves a batch of cone programs. Uses a ThreadPool to perform operations across @@ -183,15 +198,15 @@ def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, xs, ys, ss = [], [], [] for i in range(batch_size): x, y, s = solve_only(As[i], bs[i], cs[i], cone_dicts[i], - warm_starts[i], **kwargs) + warm_starts[i], Ps[i], **kwargs) xs += [x] ys += [y] ss += [s] else: # thread pool pool = ThreadPool(processes=n_jobs_forward) - args = [(A, b, c, cone_dict, warm_start, kwargs) for A, b, c, cone_dict, warm_start in - zip(As, bs, cs, cone_dicts, warm_starts)] + args = [(A, b, c, P, cone_dict, warm_start, kwargs) for A, b, c, P, cone_dict, warm_start in + zip(As, bs, cs, Ps, cone_dicts, warm_starts)] with threadpool_limits(limits=1): results = pool.starmap(solve_only_wrapper, args) pool.close() @@ -207,21 +222,21 @@ class SolverError(Exception): def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', - solve_method='SCS', **kwargs): + solve_method='SCS', P=None, **kwargs): """Solves a cone program, returns its derivative as an abstract linear map. This function solves a convex cone program, with primal-dual problems - min. c^T x min. b^Ty - subject to Ax + s = b subject to A^Ty + c = 0 + min. x^T P x + c^T x min. x^T P x + b^T y + subject to Ax + s = b subject to Px + A^Ty + c = 0 s \in K y \in K^* - The problem data A, b, and c correspond to the arguments `A`, `b`, and `c`, + The problem data A, b, c, and P correspond to the arguments `A`, `b`, `c`, and `P`, and the convex cone `K` corresponds to `cone_dict`; x and s are the primal variables, and y is the dual variable. This function returns a solution (x, y, s) to the program. It also returns two functions that respectively represent application of the derivative - (at A, b, and c) and its adjoint. + (at A, b, c, and P) and its adjoint. The problem data must be formatted according to the SCS convention, see https://github.com/cvxgrp/scs. @@ -248,8 +263,9 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', exponential cones. See SCS documentation for more details. warm_start: (optional) A tuple (x, y, s) at which to warm-start SCS. mode: (optional) Which mode to compute derivative with, options are - ["dense", "lsqr", "lsmr"]. + ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]. solve_method: (optional) Name of solver to use; SCS, ECOS, or Clarabel. + P: (optional) A sparse SciPy matrix in CSC format. kwargs: (optional) Keyword arguments to send to the solver. Returns: @@ -257,22 +273,25 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', y: Optimal value of the dual variable y. s: Optimal value of the slack variable s. derivative: A callable with signature - derivative(dA, db, dc) -> dx, dy, ds - that applies the derivative of the cone program at (A, b, and c) - to the perturbations `dA`, `db`, `dc`. `dA` must be a SciPy sparse + derivative(dA, db, dc, dP) -> dx, dy, ds + that applies the derivative of the cone program at (A, b, c, P) + to the perturbations `dA`, `db`, `dc`, `dP`. `dA` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `A`; + `dP` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `P`; `db` and `dc` are NumPy arrays. adjoint_derivative: A callable with signature - adjoint_derivative(dx, dy, ds) -> dA, db, dc + adjoint_derivative(dx, dy, ds, return_dP=False) -> dA, db, dc + adjoint_derivative(dx, dy, ds, return_dP=True) -> dA, db, dc, dP that applies the adjoint of the derivative of the cone program at - (A, b, and c) to the perturbations `dx`, `dy`, `ds`, which must be + (A, b, c, P) to the perturbations `dx`, `dy`, `ds`. `dx`, `dy`, `ds` must be NumPy arrays. The output `dA` matches the sparsity pattern of `A`. + If `return_dP` is True, the output `dP` matches the sparsity pattern of `P`. Raises: SolverError: if the cone program is infeasible or unbounded. """ result = solve_and_derivative_internal( A, b, c, cone_dict, warm_start=warm_start, mode=mode, - solve_method=solve_method, **kwargs) + solve_method=solve_method, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -282,7 +301,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', def solve_only(A, b, c, cone_dict, warm_start=None, - solve_method='SCS', **kwargs): + solve_method='SCS', P=None, **kwargs): """ Solves a cone program and returns its solution. @@ -298,7 +317,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None, result = solve_internal( A, b, c, cone_dict, warm_start=warm_start, - solve_method=solve_method, **kwargs) + solve_method=solve_method, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -306,7 +325,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None, def solve_internal(A, b, c, cone_dict, solve_method=None, - warm_start=None, raise_on_error=True, **kwargs): + warm_start=None, raise_on_error=True, P=None, **kwargs): if solve_method is None: psd_cone = ('s' in cone_dict) and (cone_dict['s'] != []) @@ -335,6 +354,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, del kwargs["eps"] data = { + "P": P, "A": A, "b": b, "c": c @@ -349,7 +369,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, result = scs.solve(data, cone_dict, **kwargs) status = result["info"]["status"] - inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)"} + inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)", "solved (inaccurate - reached time_limit_secs)"} if status in inaccurate_status and "acceleration_lookback" not in kwargs: # anderson acceleration is sometimes unstable result = scs.solve( @@ -367,14 +387,14 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, return result elif solve_method == "ECOS": + if P is not None: + raise ValueError("ECOS does not support quadratic objectives.") if warm_start is not None: raise ValueError('ECOS does not support warmstart.') if ('s' in cone_dict) and (cone_dict['s'] != []): raise ValueError("PSD cone not supported by ECOS.") if ('ed' in cone_dict) and (cone_dict['ed'] != 0): raise NotImplementedError("Dual exponential cones not supported yet.") - if warm_start is not None: - raise ValueError("ECOS does not support warm starting.") len_eq = cone_dict[cone_lib.EQ_DIM] C_ecos = c G_ecos = A[len_eq:] @@ -448,8 +468,11 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, 'iter': solution['info']['iter'], 'pobj': solution['info']['pcost']} elif solve_method == "Clarabel": - # for now set P to 0 - P = sparse.csc_matrix((c.size, c.size)) + if warm_start is not None: + raise ValueError("Clarabel currently does not support warmstarting.") + + if P is None: + P = sparse.csc_matrix((c.size, c.size)) cones = [] if "z" in cone_dict: @@ -510,10 +533,13 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, return result def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, - warm_start=None, mode='lsqr', raise_on_error=True, **kwargs): - if mode not in ["dense", "lsqr", "lsmr"]: + warm_start=None, mode='lsqr', raise_on_error=True, P=None, **kwargs): + if mode not in ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]: raise ValueError("Unsupported mode {}; the supported modes are " - "'dense', 'lsqr' and 'lsmr'".format(mode)) + "'dense', 'lsqr', 'lsmr', 'lpgd', 'lpgd_right' and 'lpgd_left'".format(mode)) + if mode in ["dense", "lsqr", "lsmr"] and P is not None: + raise ValueError("Dense, lsqr, and lsmr modes currently do not support quadratic objectives. "\ + "Consider switching to 'lpgd' mode.") if np.isnan(A.data).any(): raise RuntimeError("Found a NaN in A.") @@ -529,8 +555,13 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, # eliminate explicit zeros in A, we no longer need them A.eliminate_zeros() + if "derivative_kwargs" in kwargs: + derivative_kwargs = kwargs.pop("derivative_kwargs") + else: + derivative_kwargs = {} + solver_kwargs = kwargs result = solve_internal(A, b, c, cone_dict, solve_method=solve_method, - warm_start=warm_start, raise_on_error=raise_on_error, **kwargs) + warm_start=warm_start, raise_on_error=raise_on_error, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -538,92 +569,233 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, # pre-compute quantities for the derivative m, n = A.shape N = m + n + 1 - cones = cone_lib.parse_cone_dict(cone_dict) - cones_parsed = cone_lib.parse_cone_dict_cpp(cones) - z = (x, y - s, np.array([1])) - u, v, w = z - Q = sparse.bmat([ - [None, A.T, np.expand_dims(c, - 1)], - [-A, None, np.expand_dims(b, -1)], - [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None] - ]) - - D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True) - if mode == "dense": - Q_dense = Q.todense() - M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w) - MT = M.T - elif mode in ("lsqr", "lsmr"): - M = _diffcp.M_operator(Q, cones_parsed, u, v, w) - MT = M.transpose() - - pi_z = pi(z, cones) - - def derivative(dA, db, dc, **kwargs): - """Applies derivative at (A, b, c) to perturbations dA, db, dc + if "lpgd" not in mode: # pre-compute quantities for the derivative + cones = cone_lib.parse_cone_dict(cone_dict) + cones_parsed = cone_lib.parse_cone_dict_cpp(cones) + z = (x, y - s, np.array([1])) + u, v, w = z + + Q = sparse.bmat([ + [None, A.T, np.expand_dims(c, - 1)], + [-A, None, np.expand_dims(b, -1)], + [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None] + ]) + + D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True) + if mode == "dense": + Q_dense = Q.todense() + M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w) + MT = M.T + elif mode in ("lsqr", "lsmr"): + M = _diffcp.M_operator(Q, cones_parsed, u, v, w) + MT = M.transpose() + + pi_z = pi(z, cones) + + def derivative(dA, db, dc, dP=None, **kwargs): + """Applies derivative at (A, b, c) to perturbations dA, db, dc, dP Args: dA: SciPy sparse matrix in CSC format; must have same sparsity pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` + dP: (optional) SciPy sparse matrix in CSC format; must have + same sparsity pattern as `P` from the cone program Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. """ - dQ = sparse.bmat([ - [None, dA.T, np.expand_dims(dc, - 1)], - [-dA, None, np.expand_dims(db, -1)], - [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None] - ]) - rhs = dQ @ pi_z - if np.allclose(rhs, 0): - dz = np.zeros(rhs.size) - elif mode == "dense": - dz = _diffcp._solve_derivative_dense(M, MT, rhs) - elif mode == "lsqr": - dz = _diffcp.lsqr(M, rhs).solution - elif mode == "lsmr": - M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec) - dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12) - - du, dv, dw = np.split(dz, [n, n + m]) - dx = du - x * dw - dy = D_proj_dual_cone.matvec(dv) - y * dw - ds = D_proj_dual_cone.matvec(dv) - dv - s * dw - return -dx, -dy, -ds - - def adjoint_derivative(dx, dy, ds, **kwargs): + if P is None and dP is not None: + raise ValueError("P must be provided if dP is not None.") + + if "lpgd" in mode: + dx, dy, ds = derivative_lpgd(dA, db, dc, dP=dP, **derivative_kwargs, **kwargs) + return dx, dy, ds + else: + dQ = sparse.bmat([ + [None, dA.T, np.expand_dims(dc, - 1)], + [-dA, None, np.expand_dims(db, -1)], + [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None] + ]) + rhs = dQ @ pi_z + if np.allclose(rhs, 0): + dz = np.zeros(rhs.size) + elif mode == "dense": + dz = _diffcp._solve_derivative_dense(M, MT, rhs) + elif mode == "lsqr": + dz = _diffcp.lsqr(M, rhs).solution + elif mode == "lsmr": + M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec) + dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12) + + du, dv, dw = np.split(dz, [n, n + m]) + dx = du - x * dw + dy = D_proj_dual_cone.matvec(dv) - y * dw + ds = D_proj_dual_cone.matvec(dv) - dv - s * dw + return -dx, -dy, -ds + + def adjoint_derivative(dx, dy, ds, return_dP=False, **kwargs): """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds Args: dx: NumPy array representing perturbation in `x` dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` + return_dP: flag to return dP Returns: (`dA`, `db`, `dc`), the result of applying the adjoint to the perturbations; the sparsity pattern of `dA` matches that of `A`. """ - dw = -(x @ dx + y @ dy + s @ ds) - dz = np.concatenate( - [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) - - if np.allclose(dz, 0): - r = np.zeros(dz.shape) - elif mode == "dense": - r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz) - elif mode == "lsqr": - r = _diffcp.lsqr(MT, dz).solution - elif mode == "lsmr": - MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec) - r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10) - - values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] - dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) - db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] - dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] - - return dA, db, dc + if "lpgd" in mode: + return adjoint_derivative_lpgd(dx, dy, ds, return_dP=return_dP, **derivative_kwargs, **kwargs) + else: + if return_dP: + raise ValueError("Modes 'dense', 'lsqr', 'lsmr' currently do not support "\ + "differentiating with respect to P. Consider switching to 'lpgd' mode.") + dw = -(x @ dx + y @ dy + s @ ds) + dz = np.concatenate( + [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) + + if np.allclose(dz, 0): + r = np.zeros(dz.shape) + elif mode == "dense": + r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz) + elif mode == "lsqr": + r = _diffcp.lsqr(MT, dz).solution + elif mode == "lsmr": + MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec) + r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10) + + values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] + dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) + db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] + dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] + return dA, db, dc + + def derivative_lpgd(dA, db, dc, tau, rho, dP=None): + """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc, dP + with optional regularization rho. + Args: + dA: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `A` from the cone program + db: NumPy array representing perturbation in `b` + dc: NumPy array representing perturbation in `c` + tau: Perturbation strength parameter + rho: Regularization strength parameter + dP: (optional) SciPy sparse matrix in CSC format; must have + same sparsity pattern as `P` from the cone program + Returns: + NumPy arrays dx, dy, ds, the result of applying the derivative + to the perturbations. + """ + + if tau is None or tau <= 0: + raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.") + if rho < 0: + raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.") + + if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right + try: + x_right, y_right, s_right = compute_perturbed_solution( + dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + except SolverError as e: + raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decreasing tau or swiching to 'lpgd_left' mode.") + + if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left + try: + x_left, y_left, s_left = compute_perturbed_solution( + dA, db, dc, dP, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + except SolverError as e: + raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decreasing tau or swiching to 'lpgd_right' mode.") + + if mode == "lpgd": + dx = (x_right - x_left) / (2 * tau) + dy = (y_right - y_left) / (2 * tau) + ds = (s_right - s_left) / (2 * tau) + elif mode == "lpgd_right": + dx = (x_right - x) / tau + dy = (y_right - y) / tau + ds = (s_right - s) / tau + elif mode == "lpgd_left": + dx = (x - x_left) / tau + dy = (y - y_left) / tau + ds = (s - s_left) / tau + else: + raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") + return dx, dy, ds + + def adjoint_derivative_lpgd(dx, dy, ds, tau, rho, return_dP=False): + """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920] + Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, + can be regarded as an efficient adjoint finite difference method. + Tau controls the perturbation strength. In the limit tau -> 0, LPGD computes the exact adjoint derivative. + For finite tau, LPGD computes an informative replacement of the adjoint derivative, avoiding degeneracies. + Rho controls the regularization strength. If rho > 0, the perturbed problem is regularized. + + Args: + dx: NumPy array representing perturbation in `x` + dy: NumPy array representing perturbation in `y` + ds: NumPy array representing perturbation in `s` + tau: Perturbation strength parameter + rho: Regularization strength parameter + return_dP: flag to return dP + Returns: + (`dA`, `db`, `dc`) if return_dP is False, + (`dA`, `db`, `dc`, `dP`) if return_dP is True, + the result of applying the LPGD to the perturbations; + the sparsity pattern of `dA`, `dP` matches that of `A`, `P`. + """ + + if tau is None or tau <= 0: + raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.") + if rho < 0: + raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.") + + if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right + try: + x_right, y_right, _ = compute_adjoint_perturbed_solution( + dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + except SolverError as e: + raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decreasing tau or swiching to 'lpgd_left' mode.") + + if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left + try: + x_left, y_left, _ = compute_adjoint_perturbed_solution( + dx, dy, ds, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + except SolverError as e: + raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decreasing tau or swiching to 'lpgd_right' mode.") + + if mode == "lpgd": + dc = (x_right - x_left) / (2 * tau) + db = - (y_right - y_left) / (2 * tau) + dA_data = (y_right[rows] * x_right[cols] - y_left[rows] * x_left[cols]) / (2 * tau) + dP_data = (x_right[cols] * x_right[cols] - x_left[cols] * x_left[cols]) / (2 * tau) + elif mode == "lpgd_right": + dc = (x_right - x) / tau + db = - (y_right - y) / tau + dA_data = (y_right[rows] * x_right[cols] - y[rows] * x[cols]) / tau + dP_data = (x_right[cols] * x_right[cols] - x[cols] * x[cols]) / tau + elif mode == "lpgd_left": + dc = (x - x_left) / tau + db = - (y - y_left) / tau + dA_data = (y[rows] * x[cols] - y_left[rows] * x_left[cols]) / tau + dP_data = (x[cols] * x[cols] - x_left[cols] * x_left[cols]) / tau + else: + raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") + + dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape) + + if return_dP: + dP = sparse.csc_matrix((dP_data, (cols, cols)), shape=P.shape) + return dA, db, dc, dP + else: + return dA, db, dc + result["D"] = derivative result["DT"] = adjoint_derivative diff --git a/diffcp/utils.py b/diffcp/utils.py index 22dc61a..c927747 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -1,6 +1,7 @@ import diffcp.cones as cone_lib import numpy as np from scipy import sparse +from copy import deepcopy def scs_data_from_cvxpy_problem(problem): @@ -36,3 +37,175 @@ def get_random_like(A, randomness): rows, cols = A.nonzero() values = randomness(A.nnz) return sparse.csc_matrix((values, (rows, cols)), shape=A.shape) + + +def regularize_P(P, rho, size): + """Regularizes the matrix P by adding rho * I to it.""" + if rho > 0: + reg = rho * sparse.eye(size, format='csc') + if P is None: + P_reg = reg + else: + P_reg = P + reg + else: + P_reg = P + return P_reg + + +def embed_problem(A, b, c, P, cone_dict): + """Embeds the problem into a larger problem that allows costs on the slack variables + A_emb = [A, I; 0, -I] + b_emb = [b; 0] + c_emb = [c; 0] + P_emb = [P, 0; 0, 0] + cone_dict_emb = cone_dict with z shifted by m + """ + m = b.shape[0] + A_emb = sparse.bmat([ + [A, sparse.eye(m, format=A.format)], + [None, -sparse.eye(m, format=A.format)] + ]).tocsc() + b_emb = np.hstack([b, np.zeros(m)]) + c_emb = np.hstack([c, np.zeros(m)]) + if P is not None: + P_emb = sparse.bmat([ + [P, None], + [None, np.zeros((m, m))] + ]).tocsc() + else: + P_emb = None + cone_dict_emb = deepcopy(cone_dict) + if 'z' in cone_dict_emb: + cone_dict_emb['z'] += m + else: + cone_dict_emb['z'] = m + return A_emb, b_emb, c_emb, P_emb, cone_dict_emb + + +def compute_perturbed_solution(dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): + """ + Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with + perturbations dA, db, dc and optional regularization rho. + + Args: + dA: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `A` from the cone program + db: NumPy array representing perturbation in `b` + dc: NumPy array representing perturbation in `c` + dP: (optional) SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `P` from the cone program + tau: Perturbation strength parameter + rho: Regularization strength parameter + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + m, n = A.shape + + # Perturb the problem + A_pert = A + tau * dA + b_pert = b + tau * db + c_pert = c + tau * dc + if dP is not None: + P_pert = P + tau * dP + else: + P_pert = P + + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective + P_pert_reg = regularize_P(P_pert, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None + + # Solve the perturbed problem + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_pert_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + return x_pert, y_pert, s_pert + + +def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): + """ + Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) + by solving the following perturbed problem with perturbations dx, dy, ds + and perturbation/regularization parameters tau/rho: + argmin. + + tau*( + ) + rho/2 |x-x^*|^2 + subject to Ax + s = b-tau*dy + s \in K + + For ds=0 we solve + argmin. + + subject to Ax + s = b-tau*dy + s \in K + This is just a perturbed instance of the forward optimization problem. + + For ds!=0 we rewrite the problem as an embedded problem. + We add a constraint x'=s, replace all appearances of s with x' and solve + argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> + [ 0, rho*I]] + subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] + [0, -I]] + (s', s) \in (0 x K) + Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). + + Args: + dx: NumPy array representing perturbation in `x` + dy: NumPy array representing perturbation in `y` + ds: NumPy array representing perturbation in `s` + tau: Perturbation strength parameter + rho: Regularization strength parameter + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + m, n = A.shape + + # The cases ds = 0 and ds != 0 are handled separately, see docstring + if np.isclose(np.sum(np.abs(ds)), 0): + # Perturb problem (Note: perturb primal and dual linear terms in different directions) + c_pert = c + tau * dx + b_pert = b - tau * dy + + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective + P_reg = regularize_P(P, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None + + # Solve the perturbed problem + # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update + result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + else: + # Embed problem (see docstring) + A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict) + + # Perturb problem (Note: perturb primal and dual linear terms in different directions) + b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) + c_emb_pert = c_emb + tau * np.hstack([dx, ds]) + + # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective + P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m) + c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) + + # Set warm start + if solve_method in ["ECOS", "Clarabel"]: + warm_start = None + else: + warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) + + # Solve the embedded problem + result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert = result_pert['x'][:n] + y_pert = result_pert['y'][:m] + s_pert = result_pert['x'][n:n+m] + return x_pert, y_pert, s_pert diff --git a/examples/batch_clarabel_lpgd_quadratic.py b/examples/batch_clarabel_lpgd_quadratic.py new file mode 100644 index 0000000..11654fb --- /dev/null +++ b/examples/batch_clarabel_lpgd_quadratic.py @@ -0,0 +1,47 @@ +import diffcp +import time +import numpy as np +import scipy.sparse as sparse + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 2 + +As, bs, cs, Ks, Ps = [], [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + P = sparse.csc_matrix((c.size, c.size)) + P = sparse.triu(P).tocsc() + As += [A] + bs += [b] + cs += [c] + Ks += [K] + Ps += [P] + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc - tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 8): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1), + Ps=Ps) + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1), Ps=Ps) + + def f_backward(): + DT_batch(xs, ys, ss, return_dP=True) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) diff --git a/examples/batch_ecos.py b/examples/batch_ecos.py index 9eca81b..0684305 100644 --- a/examples/batch_ecos.py +++ b/examples/batch_ecos.py @@ -31,9 +31,9 @@ def time_function(f, N=1): for n_jobs in range(1, 8): def f_forward(): return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False) + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False) xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False) + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False) def f_backward(): DT_batch(xs, ys, ss, mode="lsqr") diff --git a/examples/batch_ecos_lpgd.py b/examples/batch_ecos_lpgd.py new file mode 100644 index 0000000..9f5b0a7 --- /dev/null +++ b/examples/batch_ecos_lpgd.py @@ -0,0 +1,46 @@ +import diffcp +import utils +import IPython as ipy +import time +import numpy as np + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 1 + +As, bs, cs, Ks = [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + As += [A] + bs += [b] + cs += [c] + Ks += [K] + + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc - tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 8): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0)) + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0)) + + def f_backward(): + DT_batch(xs, ys, ss) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) diff --git a/examples/batch_lpgd.py b/examples/batch_lpgd.py new file mode 100644 index 0000000..6992433 --- /dev/null +++ b/examples/batch_lpgd.py @@ -0,0 +1,42 @@ +import diffcp +import utils +import IPython as ipy +import time +import numpy as np + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 1 + +As, bs, cs, Ks = [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + As += [A] + bs += [b] + cs += [c] + Ks += [K] + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc-tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 5): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs) + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left') + def f_backward(): + DT_batch(xs, ys, ss, tau=0.1, rho=0.1) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) \ No newline at end of file diff --git a/examples/clarabel_example_lpgd_quadratic.py b/examples/clarabel_example_lpgd_quadratic.py new file mode 100644 index 0000000..613e2f7 --- /dev/null +++ b/examples/clarabel_example_lpgd_quadratic.py @@ -0,0 +1,40 @@ +import diffcp + +import numpy as np +import utils +from scipy import sparse + +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) +P = sparse.csc_matrix((c.size, c.size)) +P = sparse.triu(P).tocsc() + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, P=P, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# Adjoint derivative +dA, db, dc, dP = adjoint_derivative(dx=c, dy=np.zeros(m), ds=np.zeros(m), return_dP=True) + +# Derivative (dummy inputs) +dx, dy, ds = derivative(dA=A, db=b, dc=c, dP=P) diff --git a/examples/dual_example.py b/examples/dual_example.py index ad0c816..3c0220d 100644 --- a/examples/dual_example.py +++ b/examples/dual_example.py @@ -6,10 +6,10 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { - 'f': 3, + 'z': 3, 'l': 3, 'q': [5] } diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py new file mode 100644 index 0000000..e0a802f --- /dev/null +++ b/examples/dual_example_lpgd.py @@ -0,0 +1,39 @@ +import diffcp + +import numpy as np +import utils +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# We evaluate the gradient of the objective with respect to A, b and c. +dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m)) + +# The gradient of the objective with respect to b should be +# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by +# Boyd & Vandenberghe). +print("db =", db) +print("-y =", -y) diff --git a/examples/ecos_example.py b/examples/ecos_example.py index 9a64182..9d1534b 100644 --- a/examples/ecos_example.py +++ b/examples/ecos_example.py @@ -6,10 +6,10 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { - 'f': 3, + 'z': 3, 'l': 3, 'q': [5] } @@ -23,7 +23,7 @@ # We solve the cone program and get the derivative and its adjoint x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( - A, b, c, K, solver="ECOS", verbose=False) + A, b, c, K, solve_method="ECOS", verbose=False) print("x =", x) print("y =", y) diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py new file mode 100644 index 0000000..bc8777a --- /dev/null +++ b/examples/ecos_example_lpgd.py @@ -0,0 +1,39 @@ +import diffcp + +import numpy as np +import utils +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# We evaluate the gradient of the objective with respect to A, b and c. +dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m)) + +# The gradient of the objective with respect to b should be +# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by +# Boyd & Vandenberghe). +print("db =", db) +print("-y =", -y) diff --git a/examples/sdp_lpgd.py b/examples/sdp_lpgd.py new file mode 100644 index 0000000..e5cec85 --- /dev/null +++ b/examples/sdp_lpgd.py @@ -0,0 +1,67 @@ +import cvxpy as cp +import diffcp +import numpy as np + +import time + + +def scs_data_from_cvxpy_problem(problem): + data = problem.get_problem_data(cp.SCS)[0] + cone_dims = cp.reductions.solvers.conic_solvers.scs_conif.dims_to_solver_dict(data[ + "dims"]) + return data["A"], data["b"], data["c"], cone_dims + + +def randn_symm(n): + A = np.random.randn(n, n) + return (A + A.T) / 2 + + +def randn_psd(n): + A = 1. / 10 * np.random.randn(n, n) + return A@A.T + + +def main(n=3, p=3): + # Generate problem data + C = randn_psd(n) + As = [randn_symm(n) for _ in range(p)] + Bs = np.random.randn(p) + + # Extract problem data using cvxpy + X = cp.Variable((n, n), PSD=True) + objective = cp.trace(C@X) + constraints = [cp.trace(As[i]@X) == Bs[i] for i in range(p)] + prob = cp.Problem(cp.Minimize(objective), constraints) + A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob) + + # Print problem size + mn_plus_m_plus_n = A.size + b.size + c.size + n_plus_2n = c.size + 2 * b.size + entries_in_derivative = mn_plus_m_plus_n * n_plus_2n + print(f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)""") + + # Compute solution and derivative maps + start = time.perf_counter() + x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, cone_dims, eps=1e-5, mode="lpgd") + end = time.perf_counter() + print("Compute solution and set up derivative: %.2f s." % (end - start)) + + # Derivative + lpgd_args = dict(tau=0.1, rho=0.1) + start = time.perf_counter() + dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm( + C), np.zeros(y.size), np.zeros(s.size), **lpgd_args) + end = time.perf_counter() + print("Evaluate derivative: %.2f s." % (end - start)) + + # Adjoint of derivative + start = time.perf_counter() + dx, dy, ds = derivative(A, b, c, **lpgd_args) + end = time.perf_counter() + print("Evaluate adjoint of derivative: %.2f s." % (end - start)) + +if __name__ == '__main__': + np.random.seed(0) + main(50, 25)