From 20fd10c425c47c38a775b2223c5d7161264f144f Mon Sep 17 00:00:00 2001 From: Anselm Paulus <40758879+a-paulus@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:19:37 +0200 Subject: [PATCH 01/17] Update README.md --- README.md | 182 +----------------------------------------------------- 1 file changed, 1 insertion(+), 181 deletions(-) diff --git a/README.md b/README.md index 639b0d0..3f85fb9 100644 --- a/README.md +++ b/README.md @@ -1,181 +1 @@ -[![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml) - -# diffcp - -`diffcp` is a Python package for computing the derivative of a convex cone program, with respect to its problem data. The derivative is implemented as an abstract linear map, with methods for its forward application and its adjoint. - -The implementation is based on the calculations in our paper [Differentiating through a cone program](http://web.stanford.edu/~boyd/papers/diff_cone_prog.html). - -### Installation -`diffcp` is available on PyPI, as a source distribution. Install it with - -```bash -pip install diffcp -``` - -You will need a C++11-capable compiler to build `diffcp`. - -`diffcp` requires: -* [NumPy](https://github.com/numpy/numpy) >= 1.15 -* [SciPy](https://github.com/scipy/scipy) >= 1.10 -* [SCS](https://github.com/bodono/scs-python) >= 2.0.2 -* [pybind11](https://github.com/pybind/pybind11/tree/stable) >= 2.4 -* [threadpoolctl](https://github.com/joblib/threadpoolctl) >= 1.1 -* [ECOS](https://github.com/embotech/ecos-python) >= 2.0.10 -* [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) >= 0.5.1 -* Python >= 3.7 - -`diffcp` uses Eigen; Eigen operations can be automatically vectorized by compilers. To enable vectorization, install with - -```bash -MARCH_NATIVE=1 pip install diffcp -``` - -OpenMP can be enabled by passing extra arguments to your compiler. For example, on linux, you can tell gcc to activate the OpenMP extension by specifying the flag "-fopenmp": - -```bash -OPENMP_FLAG="-fopenmp" pip install diffcp -``` - -To enable both vectorization and OpenMP (on linux), use - -```bash -MARCH_NATIVE=1 OPENMP_FLAG="-fopenmp" pip install diffcp -``` - -### Cone programs -`diffcp` differentiates through a primal-dual cone program pair. The primal problem must be expressed as - -``` -minimize c'x -subject to Ax + s = b - s in K -``` -where `x` and `s` are variables, `A`, `b` and `c` are the user-supplied problem data, and `K` is a user-defined convex cone. The corresponding dual problem is - -``` -minimize b'y -subject to A'y + c == 0 - y in K^* -``` - -with dual variable `y`. - -### Usage - -`diffcp` exposes the function - -```python -solve_and_derivative(A, b, c, cone_dict, warm_start=None, solver=None, **kwargs). -``` - -This function returns a primal-dual solution `x`, `y`, and `s`, along with -functions for evaluating the derivative and its adjoint (transpose). -These functions respectively compute right and left multiplication of the derivative -of the solution map at `A`, `b`, and `c` by a vector. -The `solver` argument determines which solver to use; the available solvers -are `solver="SCS"`, `solver="ECOS"`, and `solver="Clarabel"`. -If no solver is specified, `diffcp` will choose the solver itself. -In the case that the problem is not solved, i.e. the solver fails for some reason, we will raise -a `SolverError` Exception. - -#### Arguments -The arguments `A`, `b`, and `c` correspond to the problem data of a cone program. -* `A` must be a [SciPy sparse CSC matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html). -* `b` and `c` must be NumPy arrays. -* `cone_dict` is a dictionary that defines the convex cone `K`. -* `warm_start` is an optional tuple `(x, y, s)` at which to warm-start. (Note: this is only available for the SCS solver). -* `**kwargs` are keyword arguments to forward to the solver (e.g., `verbose=False`). - -These inputs must conform to the [SCS convention](https://github.com/bodono/scs-python) for problem data. The keys in `cone_dict` correspond to the cones, with -* `diffcp.ZERO` for the zero cone, -* `diffcp.POS` for the positive orthant, -* `diffcp.SOC` for a product of SOC cones, -* `diffcp.PSD` for a product of PSD cones, and -* `diffcp.EXP` for a product of exponential cones. - -The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md). - -#### Return value -The function `solve_and_derivative` returns a tuple - -```python -(x, y, s, derivative, adjoint_derivative) -``` - -* `x`, `y`, and `s` are a primal-dual solution. - -* `derivative` is a function that applies the derivative at `(A, b, c)` to perturbations `dA`, `db`, `dc`. It has the signature -```derivative(dA, db, dc) -> dx, dy, ds```, where `dA` is a SciPy sparse CSC matrix with the same sparsity pattern as `A`, and `db` and `dc` are NumPy arrays. `dx`, `dy`, and `ds` are NumPy arrays, approximating the change in the primal-dual solution due to the perturbation. - -* `adjoint_derivative` is a function that applies the adjoint of the derivative to perturbations `dx`, `dy`, `ds`. It has the signature -```adjoint_derivative(dx, dy, ds) -> dA, db, dc```, where `dx`, `dy`, and `ds` are NumPy arrays. - -#### Example -```python -import numpy as np -from scipy import sparse - -import diffcp - -cone_dict = { - diffcp.ZERO: 3, - diffcp.POS: 3, - diffcp.SOC: [5] -} - -m = 3 + 3 + 5 -n = 5 - -A, b, c = diffcp.utils.random_cone_prog(m, n, cone_dict) -x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict) - -# evaluate the derivative -nonzeros = A.nonzero() -data = 1e-4 * np.random.randn(A.size) -dA = sparse.csc_matrix((data, nonzeros), shape=A.shape) -db = 1e-4 * np.random.randn(m) -dc = 1e-4 * np.random.randn(n) -dx, dy, ds = D(dA, db, dc) - -# evaluate the adjoint of the derivative -dx = c -dy = np.zeros(m) -ds = np.zeros(m) -dA, db, dc = DT(dx, dy, ds) -``` - -For more examples, including the SDP example described in the paper, see the [`examples`](examples/) directory. - -### Citing -If you wish to cite `diffcp`, please use the following BibTex: - -``` -@article{diffcp2019, - author = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.}, - title = {Differentiating through a Cone Program}, - journal = {Journal of Applied and Numerical Optimization}, - year = {2019}, - volume = {1}, - number = {2}, - pages = {107--115}, -} - -@misc{diffcp, - author = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.}, - title = {{diffcp}: differentiating through a cone program, version 1.0}, - howpublished = {\url{https://github.com/cvxgrp/diffcp}}, - year = 2019 -} -``` - -The following thesis concurrently derived the mathematics behind differentiating cone programs. -``` -@phdthesis{amos2019differentiable, - author = {Brandon Amos}, - title = {{Differentiable Optimization-Based Modeling for Machine Learning}}, - school = {Carnegie Mellon University}, - year = 2019, - month = May, -} -``` +This branch is currently only a fork of the original diffcp implementation. The LPGD implementation will be added soon. From 26727e1f27ff2bd4870d3e2deab2cea72fc431ed Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 12:38:45 +0200 Subject: [PATCH 02/17] Add LPGD --- diffcp/cone_program.py | 383 ++++++++++++++++++++++++++++++++++------- diffcp/utils.py | 13 ++ examples/batch_LPGD.py | 42 +++++ 3 files changed, 373 insertions(+), 65 deletions(-) create mode 100644 examples/batch_LPGD.py diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 9919108..ea3fb86 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -12,6 +12,8 @@ import _diffcp import diffcp.cones as cone_lib +import copy +from diffcp.utils import regularize_P def pi(z, cones): @@ -49,7 +51,7 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs means serial and n_jobs_forward = -1 defaults to the number of CPUs (default=-1). n_jobs_backward - Number of jobs to use in the backward pass. n_jobs_backward = 1 means serial and n_jobs_backward = -1 defaults to the number of CPUs (default=-1). - mode - Differentiation mode in ["lsqr", "lsmr", "dense"]. + mode - Differentiation mode in ["lsqr", "lsmr", "dense", "lpgd", "lpgd_right", "lpgd_left"]. warm_starts - A list of warm starts. kwargs - kwargs sent to scs. @@ -248,7 +250,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', exponential cones. See SCS documentation for more details. warm_start: (optional) A tuple (x, y, s) at which to warm-start SCS. mode: (optional) Which mode to compute derivative with, options are - ["dense", "lsqr", "lsmr"]. + ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]. solve_method: (optional) Name of solver to use; SCS, ECOS, or Clarabel. kwargs: (optional) Keyword arguments to send to the solver. @@ -306,7 +308,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None, def solve_internal(A, b, c, cone_dict, solve_method=None, - warm_start=None, raise_on_error=True, **kwargs): + warm_start=None, raise_on_error=True, P=None, **kwargs): if solve_method is None: psd_cone = ('s' in cone_dict) and (cone_dict['s'] != []) @@ -335,6 +337,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, del kwargs["eps"] data = { + "P": P, "A": A, "b": b, "c": c @@ -349,7 +352,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, result = scs.solve(data, cone_dict, **kwargs) status = result["info"]["status"] - inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)"} + inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)", "solved (inaccurate - reached time_limit_secs)"} if status in inaccurate_status and "acceleration_lookback" not in kwargs: # anderson acceleration is sometimes unstable result = scs.solve( @@ -367,6 +370,8 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, return result elif solve_method == "ECOS": + if P is not None: + raise ValueError("ECOS does not support quadratic objectives.") if warm_start is not None: raise ValueError('ECOS does not support warmstart.') if ('s' in cone_dict) and (cone_dict['s'] != []): @@ -449,7 +454,8 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, 'pobj': solution['info']['pcost']} elif solve_method == "Clarabel": # for now set P to 0 - P = sparse.csc_matrix((c.size, c.size)) + if P is None: + P = sparse.csc_matrix((c.size, c.size)) cones = [] if "z" in cone_dict: @@ -510,10 +516,10 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, return result def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, - warm_start=None, mode='lsqr', raise_on_error=True, **kwargs): - if mode not in ["dense", "lsqr", "lsmr"]: + warm_start=None, mode='lsqr', raise_on_error=True, P=None, **kwargs): + if mode not in ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]: raise ValueError("Unsupported mode {}; the supported modes are " - "'dense', 'lsqr' and 'lsmr'".format(mode)) + "'dense', 'lsqr', 'lsmr', 'lpgd', 'lpgd_right' and 'lpgd_left'".format(mode)) if np.isnan(A.data).any(): raise RuntimeError("Found a NaN in A.") @@ -529,6 +535,11 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, # eliminate explicit zeros in A, we no longer need them A.eliminate_zeros() + if "derivative_kwargs" in kwargs: + derivative_kwargs = kwargs.pop("derivative_kwargs") + else: + derivative_kwargs = {} + solver_kwargs = kwargs result = solve_internal(A, b, c, cone_dict, solve_method=solve_method, warm_start=warm_start, raise_on_error=raise_on_error, **kwargs) x = result["x"] @@ -538,27 +549,29 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, # pre-compute quantities for the derivative m, n = A.shape N = m + n + 1 - cones = cone_lib.parse_cone_dict(cone_dict) - cones_parsed = cone_lib.parse_cone_dict_cpp(cones) - z = (x, y - s, np.array([1])) - u, v, w = z - Q = sparse.bmat([ - [None, A.T, np.expand_dims(c, - 1)], - [-A, None, np.expand_dims(b, -1)], - [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None] - ]) + if "lpgd" not in mode: # pre-compute quantities for the derivative + cones = cone_lib.parse_cone_dict(cone_dict) + cones_parsed = cone_lib.parse_cone_dict_cpp(cones) + z = (x, y - s, np.array([1])) + u, v, w = z + + Q = sparse.bmat([ + [None, A.T, np.expand_dims(c, - 1)], + [-A, None, np.expand_dims(b, -1)], + [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None] + ]) - D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True) - if mode == "dense": - Q_dense = Q.todense() - M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w) - MT = M.T - elif mode in ("lsqr", "lsmr"): - M = _diffcp.M_operator(Q, cones_parsed, u, v, w) - MT = M.transpose() + D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True) + if mode == "dense": + Q_dense = Q.todense() + M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w) + MT = M.T + elif mode in ("lsqr", "lsmr"): + M = _diffcp.M_operator(Q, cones_parsed, u, v, w) + MT = M.transpose() - pi_z = pi(z, cones) + pi_z = pi(z, cones) def derivative(dA, db, dc, **kwargs): """Applies derivative at (A, b, c) to perturbations dA, db, dc @@ -571,27 +584,32 @@ def derivative(dA, db, dc, **kwargs): NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. """ - dQ = sparse.bmat([ - [None, dA.T, np.expand_dims(dc, - 1)], - [-dA, None, np.expand_dims(db, -1)], - [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None] - ]) - rhs = dQ @ pi_z - if np.allclose(rhs, 0): - dz = np.zeros(rhs.size) - elif mode == "dense": - dz = _diffcp._solve_derivative_dense(M, MT, rhs) - elif mode == "lsqr": - dz = _diffcp.lsqr(M, rhs).solution - elif mode == "lsmr": - M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec) - dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12) - - du, dv, dw = np.split(dz, [n, n + m]) - dx = du - x * dw - dy = D_proj_dual_cone.matvec(dv) - y * dw - ds = D_proj_dual_cone.matvec(dv) - dv - s * dw - return -dx, -dy, -ds + + if "lpgd" in mode: + dx, dy, ds = derivative_lpgd(dA, db, dc, mode=mode, **derivative_kwargs, **kwargs) + return dx, dy, ds + else: + dQ = sparse.bmat([ + [None, dA.T, np.expand_dims(dc, - 1)], + [-dA, None, np.expand_dims(db, -1)], + [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None] + ]) + rhs = dQ @ pi_z + if np.allclose(rhs, 0): + dz = np.zeros(rhs.size) + elif mode == "dense": + dz = _diffcp._solve_derivative_dense(M, MT, rhs) + elif mode == "lsqr": + dz = _diffcp.lsqr(M, rhs).solution + elif mode == "lsmr": + M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec) + dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12) + + du, dv, dw = np.split(dz, [n, n + m]) + dx = du - x * dw + dy = D_proj_dual_cone.matvec(dv) - y * dw + ds = D_proj_dual_cone.matvec(dv) - dv - s * dw + return -dx, -dy, -ds def adjoint_derivative(dx, dy, ds, **kwargs): """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds @@ -604,26 +622,261 @@ def adjoint_derivative(dx, dy, ds, **kwargs): perturbations; the sparsity pattern of `dA` matches that of `A`. """ - dw = -(x @ dx + y @ dy + s @ ds) - dz = np.concatenate( - [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) - - if np.allclose(dz, 0): - r = np.zeros(dz.shape) - elif mode == "dense": - r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz) - elif mode == "lsqr": - r = _diffcp.lsqr(MT, dz).solution - elif mode == "lsmr": - MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec) - r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10) - - values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] - dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) - db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] - dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] + if "lpgd" in mode: + dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, mode=mode, **derivative_kwargs, **kwargs) + else: + dw = -(x @ dx + y @ dy + s @ ds) + dz = np.concatenate( + [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) + + if np.allclose(dz, 0): + r = np.zeros(dz.shape) + elif mode == "dense": + r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz) + elif mode == "lsqr": + r = _diffcp.lsqr(MT, dz).solution + elif mode == "lsmr": + MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec) + r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10) + + values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] + dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) + db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] + dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] return dA, db, dc + + def derivative_lpgd(dA, db, dc, mode, tau, rho): + """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc + with optional regularization rho. + Args: + dA: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `A` from the cone program + db: NumPy array representing perturbation in `b` + dc: NumPy array representing perturbation in `c` + mode: Mode of the derivative computation; options are + ["lpgd", "lpgd_left", "lpgd_right"] corresponding to central, left, and right finite differences. + tau: Perturbation strength parameter + rho: Regularization parameter + Returns: + NumPy arrays dx, dy, ds, the result of applying the derivative + to the perturbations. + """ + + if tau is None or tau <= 0: + raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.") + if rho < 0: + raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") + + if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right + x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs) + + if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left + x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho, solve_method, solver_kwargs) + + if mode == "lpgd": + dx = (x_right - x_left) / (2 * tau) + dy = (y_right - y_left) / (2 * tau) + ds = (s_right - s_left) / (2 * tau) + elif mode == "lpgd_right": + dx = (x_right - x) / tau + dy = (y_right - y) / tau + ds = (s_right - s) / tau + elif mode == "lpgd_left": + dx = (x - x_left) / tau + dy = (y - y_left) / tau + ds = (s - s_left) / tau + else: + raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") + return dx, dy, ds + + def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs): + """ + Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with + perturbations dA, db, dc and optional regularization rho. + + Args: + dA: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `A` from the cone program + db: NumPy array representing perturbation in `b` + dc: NumPy array representing perturbation in `c` + tau: Perturbation strength parameter + rho: Regularization parameter + solve_method: Name of solver to use; SCS, ECOS, or Clarabel + solver_kwargs: Additional keyword arguments to send to the solver + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + + # Perturb the problem + A_pert = A + tau * dA + b_pert = b + tau * db + c_pert = c + tau * dc + + # Regularize + P_reg = regularize_P(P, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) + + # Solve the perturbed problem + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + return x_pert, y_pert, s_pert + + def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho): + """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920] + Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, + can be regarded as an efficient adjoint finite difference method. + Tau controls the perturbation strength. In the limit tau -> 0, LPGD computes the exact adjoint derivative. + For finite tau, LPGD computes an informative replacement of the adjoint derivative, avoiding degeneracies. + Rho controls the regularization strength. If rho > 0, the perturbed problem is regularized. + + Args: + dx: NumPy array representing perturbation in `x` + dy: NumPy array representing perturbation in `y` + ds: NumPy array representing perturbation in `s` + mode: Mode of the adjoint derivative computation; options arem ["lpgd", "lpgd_left", "lpgd_right"] + tau: Perturbation strength parameter + rho: Regularization parameter + Returns: + (`dA`, `db`, `dc`), the result of applying the LPGD to the + perturbations; the sparsity pattern of `dA` matches that of `A`. + """ + + if tau is None or tau <= 0: + raise ValueError(f"Finite difference mode requires tau > 0, got tau={tau}.") + if rho < 0: + raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") + + if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right + x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs) + + if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left + x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho, solve_method, solver_kwargs) + + if mode == "lpgd": + dc = (x_right - x_left) / (2 * tau) + db = - (y_right - y_left) / (2 * tau) + dA_data = (y_right[rows] * x_right[cols] - y_left[rows] * x_left[cols]) / (2 * tau) + elif mode == "lpgd_right": + dc = (x_right - x) / tau + db = - (y_right - y) / tau + dA_data = (y_right[rows] * x_right[cols] - y[rows] * x[cols]) / tau + elif mode == "lpgd_left": + dc = (x - x_left) / tau + db = - (y - y_left) / tau + dA_data = (y[rows] * x[cols] - y_left[rows] * x_left[cols]) / tau + else: + raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") + + dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape) + return dA, db, dc + + def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs): + """ + Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) + by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho: + argmin. + + tau ( + ) + rho/2 |x-x^*|^2 + subject to Ax + s = b-tau*dy + s \in K + + For ds=0 we solve + argmin. + + subject to Ax + s = b-tau*dy + s \in K + This is just a perturbed instance of the forward optimization problem. + + For ds!=0 we rewrite the problem as an embedded problem. + We add a constraint x'=s, replace all appearances of s with x' and solve + argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> + [ 0, rho*I]] + subject to [[A, I]; [x, x'] + [0; s] = [0; b-tau*dy] + [0, -I]] + s \in K + Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case. + + Args: + dx: NumPy array representing perturbation in `x` + dy: NumPy array representing perturbation in `y` + ds: NumPy array representing perturbation in `s` + tau: Perturbation strength parameter + rho: Regularization parameter + solve_method: Name of solver to use; SCS, ECOS, or Clarabel + solver_kwargs: Additional keyword arguments to send to the solver + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + + # The cases ds = 0 and ds != 0 are handled separately, see docstring + if np.isclose(np.sum(np.abs(ds)), 0): + # Perturb problem + c_pert = c + tau * dx + b_pert = b - tau * dy + + # Regularize + P_reg = regularize_P(P, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) if solve_method != "ECOS" else None + + # Solve the perturbed problem + # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update + result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + else: + # Embed problem + A_emb = sparse.bmat([ + [A, sparse.eye(m, format=A.format)], + [None, -sparse.eye(m, format=A.format)] + ]).tocsc() + b_emb = np.hstack([b, np.zeros(m)]) + c_emb = np.hstack([c, np.zeros(m)]) + if P is not None: + P_emb = sparse.bmat([ + [P, None], + [None, np.zeros((m, m))] + ]).tocsc() + else: + P_emb = None + cone_dict_emb = copy.deepcopy(cone_dict) + if 'z' in cone_dict_emb: + cone_dict_emb['z'] += m + else: + cone_dict_emb['z'] = m + + # Perturb problem + b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) + c_emb_pert = c_emb + tau * np.hstack([dx, ds]) + + # Regularize + P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m) + c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) + + # Set warm start + x_ws = np.hstack([x, s]) + y_ws = np.hstack([y, y]) + s_ws = np.hstack([s, s]) + warm_start = (x_ws, y_ws, s_ws) if solve_method != "ECOS" else None + + # Solve the embedded problem + result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert = result_pert['x'][:n] + y_pert = result_pert['y'][:m] + s_pert = result_pert['x'][n:n+m] + return x_pert, y_pert, s_pert result["D"] = derivative result["DT"] = adjoint_derivative diff --git a/diffcp/utils.py b/diffcp/utils.py index 22dc61a..40ec85e 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -36,3 +36,16 @@ def get_random_like(A, randomness): rows, cols = A.nonzero() values = randomness(A.nnz) return sparse.csc_matrix((values, (rows, cols)), shape=A.shape) + + +def regularize_P(P, rho, size): + """Regularizes the matrix P by adding rho * I to it.""" + if rho > 0: + reg = rho * sparse.eye(size, format='csc') + if P is None: + P_reg = reg + else: + P_reg = P + reg + else: + P_reg = P + return P_reg diff --git a/examples/batch_LPGD.py b/examples/batch_LPGD.py new file mode 100644 index 0000000..81fb4ab --- /dev/null +++ b/examples/batch_LPGD.py @@ -0,0 +1,42 @@ +import diffcp +import utils +import IPython as ipy +import time +import numpy as np + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 1 + +As, bs, cs, Ks = [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + As += [A] + bs += [b] + cs += [c] + Ks += [K] + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc-tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 5): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel") + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel") + def f_backward(): + DT_batch(xs, ys, ss, tau=0.1, rho=0.1) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) \ No newline at end of file From 8b0f8ac2f54f3c43edc1d142687835d421fbc95e Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 13:35:48 +0200 Subject: [PATCH 03/17] Update Readme --- README.md | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 209 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3f85fb9..d0db92d 100644 --- a/README.md +++ b/README.md @@ -1 +1,209 @@ -This branch is currently only a fork of the original diffcp implementation. The LPGD implementation will be added soon. +# Lagrangian Proximal Gradient Descent - diffcp implementation + +This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp. +It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives. +The perturbation strength is controlled by the parameter tau, the parameter rho controls th eregularization strength. +Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization. + +Example usage: +```python +x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD') +... +dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1) +... +dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1) +``` + +Alternatively the derivative keyword arguments can be passed directly on the forward pass: +```python +x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD', derivative_kwargs=dict(tau=0.1, rho=0.1)) +... +dx, dy, ds = D(dA, db, dc) +... +dA, db, dc = DT(dx, dy, ds) +``` + + +Below follows the original Readme: + +[![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml) + +# diffcp + +`diffcp` is a Python package for computing the derivative of a convex cone program, with respect to its problem data. The derivative is implemented as an abstract linear map, with methods for its forward application and its adjoint. + +The implementation is based on the calculations in our paper [Differentiating through a cone program](http://web.stanford.edu/~boyd/papers/diff_cone_prog.html). + +### Installation +`diffcp` is available on PyPI, as a source distribution. Install it with + +```bash +pip install diffcp +``` + +You will need a C++11-capable compiler to build `diffcp`. + +`diffcp` requires: +* [NumPy](https://github.com/numpy/numpy) >= 1.15 +* [SciPy](https://github.com/scipy/scipy) >= 1.10 +* [SCS](https://github.com/bodono/scs-python) >= 2.0.2 +* [pybind11](https://github.com/pybind/pybind11/tree/stable) >= 2.4 +* [threadpoolctl](https://github.com/joblib/threadpoolctl) >= 1.1 +* [ECOS](https://github.com/embotech/ecos-python) >= 2.0.10 +* [Clarabel](https://github.com/oxfordcontrol/Clarabel.rs) >= 0.5.1 +* Python >= 3.7 + +`diffcp` uses Eigen; Eigen operations can be automatically vectorized by compilers. To enable vectorization, install with + +```bash +MARCH_NATIVE=1 pip install diffcp +``` + +OpenMP can be enabled by passing extra arguments to your compiler. For example, on linux, you can tell gcc to activate the OpenMP extension by specifying the flag "-fopenmp": + +```bash +OPENMP_FLAG="-fopenmp" pip install diffcp +``` + +To enable both vectorization and OpenMP (on linux), use + +```bash +MARCH_NATIVE=1 OPENMP_FLAG="-fopenmp" pip install diffcp +``` + +### Cone programs +`diffcp` differentiates through a primal-dual cone program pair. The primal problem must be expressed as + +``` +minimize c'x +subject to Ax + s = b + s in K +``` +where `x` and `s` are variables, `A`, `b` and `c` are the user-supplied problem data, and `K` is a user-defined convex cone. The corresponding dual problem is + +``` +minimize b'y +subject to A'y + c == 0 + y in K^* +``` + +with dual variable `y`. + +### Usage + +`diffcp` exposes the function + +```python +solve_and_derivative(A, b, c, cone_dict, warm_start=None, solver=None, **kwargs). +``` + +This function returns a primal-dual solution `x`, `y`, and `s`, along with +functions for evaluating the derivative and its adjoint (transpose). +These functions respectively compute right and left multiplication of the derivative +of the solution map at `A`, `b`, and `c` by a vector. +The `solver` argument determines which solver to use; the available solvers +are `solver="SCS"`, `solver="ECOS"`, and `solver="Clarabel"`. +If no solver is specified, `diffcp` will choose the solver itself. +In the case that the problem is not solved, i.e. the solver fails for some reason, we will raise +a `SolverError` Exception. + +#### Arguments +The arguments `A`, `b`, and `c` correspond to the problem data of a cone program. +* `A` must be a [SciPy sparse CSC matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html). +* `b` and `c` must be NumPy arrays. +* `cone_dict` is a dictionary that defines the convex cone `K`. +* `warm_start` is an optional tuple `(x, y, s)` at which to warm-start. (Note: this is only available for the SCS solver). +* `**kwargs` are keyword arguments to forward to the solver (e.g., `verbose=False`). + +These inputs must conform to the [SCS convention](https://github.com/bodono/scs-python) for problem data. The keys in `cone_dict` correspond to the cones, with +* `diffcp.ZERO` for the zero cone, +* `diffcp.POS` for the positive orthant, +* `diffcp.SOC` for a product of SOC cones, +* `diffcp.PSD` for a product of PSD cones, and +* `diffcp.EXP` for a product of exponential cones. + +The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md). + +#### Return value +The function `solve_and_derivative` returns a tuple + +```python +(x, y, s, derivative, adjoint_derivative) +``` + +* `x`, `y`, and `s` are a primal-dual solution. + +* `derivative` is a function that applies the derivative at `(A, b, c)` to perturbations `dA`, `db`, `dc`. It has the signature +```derivative(dA, db, dc) -> dx, dy, ds```, where `dA` is a SciPy sparse CSC matrix with the same sparsity pattern as `A`, and `db` and `dc` are NumPy arrays. `dx`, `dy`, and `ds` are NumPy arrays, approximating the change in the primal-dual solution due to the perturbation. + +* `adjoint_derivative` is a function that applies the adjoint of the derivative to perturbations `dx`, `dy`, `ds`. It has the signature +```adjoint_derivative(dx, dy, ds) -> dA, db, dc```, where `dx`, `dy`, and `ds` are NumPy arrays. + +#### Example +```python +import numpy as np +from scipy import sparse + +import diffcp + +cone_dict = { + diffcp.ZERO: 3, + diffcp.POS: 3, + diffcp.SOC: [5] +} + +m = 3 + 3 + 5 +n = 5 + +A, b, c = diffcp.utils.random_cone_prog(m, n, cone_dict) +x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict) + +# evaluate the derivative +nonzeros = A.nonzero() +data = 1e-4 * np.random.randn(A.size) +dA = sparse.csc_matrix((data, nonzeros), shape=A.shape) +db = 1e-4 * np.random.randn(m) +dc = 1e-4 * np.random.randn(n) +dx, dy, ds = D(dA, db, dc) + +# evaluate the adjoint of the derivative +dx = c +dy = np.zeros(m) +ds = np.zeros(m) +dA, db, dc = DT(dx, dy, ds) +``` + +For more examples, including the SDP example described in the paper, see the [`examples`](examples/) directory. + +### Citing +If you wish to cite `diffcp`, please use the following BibTex: + +``` +@article{diffcp2019, + author = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.}, + title = {Differentiating through a Cone Program}, + journal = {Journal of Applied and Numerical Optimization}, + year = {2019}, + volume = {1}, + number = {2}, + pages = {107--115}, +} + +@misc{diffcp, + author = {Agrawal, A. and Barratt, S. and Boyd, S. and Busseti, E. and Moursi, W.}, + title = {{diffcp}: differentiating through a cone program, version 1.0}, + howpublished = {\url{https://github.com/cvxgrp/diffcp}}, + year = 2019 +} +``` + +The following thesis concurrently derived the mathematics behind differentiating cone programs. +``` +@phdthesis{amos2019differentiable, + author = {Brandon Amos}, + title = {{Differentiable Optimization-Based Modeling for Machine Learning}}, + school = {Carnegie Mellon University}, + year = 2019, + month = May, +} +``` From 6e761ce6487e11bfa7b0c117e65259cf7d39f200 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 14:33:14 +0200 Subject: [PATCH 04/17] simplify argument passing --- README.md | 6 ++-- diffcp/cone_program.py | 63 +++++++++++++----------------------------- diffcp/utils.py | 23 +++++++++++++++ 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index d0db92d..934136b 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,12 @@ This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp. It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives. -The perturbation strength is controlled by the parameter tau, the parameter rho controls th eregularization strength. +The perturbation strength is controlled by the parameter tau, the parameter rho controls the regularization strength. Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization. Example usage: ```python -x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD') +x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd') ... dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1) ... @@ -16,7 +16,7 @@ dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1) Alternatively the derivative keyword arguments can be passed directly on the forward pass: ```python -x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='LPGD', derivative_kwargs=dict(tau=0.1, rho=0.1)) +x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd', derivative_kwargs=dict(tau=0.1, rho=0.1)) ... dx, dy, ds = D(dA, db, dc) ... diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index ea3fb86..5443192 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -12,8 +12,7 @@ import _diffcp import diffcp.cones as cone_lib -import copy -from diffcp.utils import regularize_P +from diffcp.utils import regularize_P, embed_problem def pi(z, cones): @@ -586,7 +585,7 @@ def derivative(dA, db, dc, **kwargs): """ if "lpgd" in mode: - dx, dy, ds = derivative_lpgd(dA, db, dc, mode=mode, **derivative_kwargs, **kwargs) + dx, dy, ds = derivative_lpgd(dA, db, dc, **derivative_kwargs, **kwargs) return dx, dy, ds else: dQ = sparse.bmat([ @@ -623,7 +622,7 @@ def adjoint_derivative(dx, dy, ds, **kwargs): """ if "lpgd" in mode: - dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, mode=mode, **derivative_kwargs, **kwargs) + dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, **derivative_kwargs, **kwargs) else: dw = -(x @ dx + y @ dy + s @ ds) dz = np.concatenate( @@ -646,7 +645,7 @@ def adjoint_derivative(dx, dy, ds, **kwargs): return dA, db, dc - def derivative_lpgd(dA, db, dc, mode, tau, rho): + def derivative_lpgd(dA, db, dc, tau, rho): """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc with optional regularization rho. Args: @@ -654,8 +653,6 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho): pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` - mode: Mode of the derivative computation; options are - ["lpgd", "lpgd_left", "lpgd_right"] corresponding to central, left, and right finite differences. tau: Perturbation strength parameter rho: Regularization parameter Returns: @@ -669,10 +666,10 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho): raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right - x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs) + x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left - x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho, solve_method, solver_kwargs) + x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) if mode == "lpgd": dx = (x_right - x_left) / (2 * tau) @@ -690,7 +687,7 @@ def derivative_lpgd(dA, db, dc, mode, tau, rho): raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") return dx, dy, ds - def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs): + def compute_perturbed_solution(dA, db, dc, tau, rho): """ Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with perturbations dA, db, dc and optional regularization rho. @@ -702,8 +699,6 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs dc: NumPy array representing perturbation in `c` tau: Perturbation strength parameter rho: Regularization parameter - solve_method: Name of solver to use; SCS, ECOS, or Clarabel - solver_kwargs: Additional keyword arguments to send to the solver Returns: x_pert: Perturbed solution of the primal variable x y_pert: Perturbed solution of the dual variable y @@ -729,7 +724,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, solve_method, solver_kwargs x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] return x_pert, y_pert, s_pert - def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho): + def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920] Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, can be regarded as an efficient adjoint finite difference method. @@ -741,7 +736,6 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho): dx: NumPy array representing perturbation in `x` dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` - mode: Mode of the adjoint derivative computation; options arem ["lpgd", "lpgd_left", "lpgd_right"] tau: Perturbation strength parameter rho: Regularization parameter Returns: @@ -755,10 +749,10 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho): raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right - x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs) + x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left - x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho, solve_method, solver_kwargs) + x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) if mode == "lpgd": dc = (x_right - x_left) / (2 * tau) @@ -778,11 +772,11 @@ def adjoint_derivative_lpgd(dx, dy, ds, mode, tau, rho): dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape) return dA, db, dc - def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solver_kwargs): + def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): """ Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho: - argmin. + + tau ( + ) + rho/2 |x-x^*|^2 + argmin. + + tau*( + ) + rho/2 |x-x^*|^2 subject to Ax + s = b-tau*dy s \in K @@ -796,9 +790,9 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve We add a constraint x'=s, replace all appearances of s with x' and solve argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> [ 0, rho*I]] - subject to [[A, I]; [x, x'] + [0; s] = [0; b-tau*dy] + subject to [[A, I]; [x, x'] + [s'; s] = [0; b-tau*dy] [0, -I]] - s \in K + (s', s) \in (0 \times K) Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case. Args: @@ -807,8 +801,6 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve ds: NumPy array representing perturbation in `s` tau: Perturbation strength parameter rho: Regularization parameter - solve_method: Name of solver to use; SCS, ECOS, or Clarabel - solver_kwargs: Additional keyword arguments to send to the solver Returns: x_pert: Perturbed solution of the primal variable x y_pert: Perturbed solution of the dual variable y @@ -836,24 +828,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] else: # Embed problem - A_emb = sparse.bmat([ - [A, sparse.eye(m, format=A.format)], - [None, -sparse.eye(m, format=A.format)] - ]).tocsc() - b_emb = np.hstack([b, np.zeros(m)]) - c_emb = np.hstack([c, np.zeros(m)]) - if P is not None: - P_emb = sparse.bmat([ - [P, None], - [None, np.zeros((m, m))] - ]).tocsc() - else: - P_emb = None - cone_dict_emb = copy.deepcopy(cone_dict) - if 'z' in cone_dict_emb: - cone_dict_emb['z'] += m - else: - cone_dict_emb['z'] = m + A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict) # Perturb problem b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) @@ -864,10 +839,10 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, solve_method, solve c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) # Set warm start - x_ws = np.hstack([x, s]) - y_ws = np.hstack([y, y]) - s_ws = np.hstack([s, s]) - warm_start = (x_ws, y_ws, s_ws) if solve_method != "ECOS" else None + if solve_method == "ECOS": + warm_start = None + else: + warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) # Solve the embedded problem result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, diff --git a/diffcp/utils.py b/diffcp/utils.py index 40ec85e..4309f98 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -1,6 +1,7 @@ import diffcp.cones as cone_lib import numpy as np from scipy import sparse +from copy import deepcopy def scs_data_from_cvxpy_problem(problem): @@ -49,3 +50,25 @@ def regularize_P(P, rho, size): else: P_reg = P return P_reg + +def embed_problem(A, b, c, P, cone_dict): + m = b.shape[0] + A_emb = sparse.bmat([ + [A, sparse.eye(m, format=A.format)], + [None, -sparse.eye(m, format=A.format)] + ]).tocsc() + b_emb = np.hstack([b, np.zeros(m)]) + c_emb = np.hstack([c, np.zeros(m)]) + if P is not None: + P_emb = sparse.bmat([ + [P, None], + [None, np.zeros((m, m))] + ]).tocsc() + else: + P_emb = None + cone_dict_emb = deepcopy(cone_dict) + if 'z' in cone_dict_emb: + cone_dict_emb['z'] += m + else: + cone_dict_emb['z'] = m + return A_emb, b_emb, c_emb, P_emb, cone_dict_emb From e4f8e8277e76487066b6e3a040e6492f3854e8e0 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 14:33:27 +0200 Subject: [PATCH 05/17] add more lpgd examples --- examples/batch_ecos.py | 4 +- examples/batch_ecos_lpgd.py | 46 ++++++++++++++++ examples/{batch_LPGD.py => batch_lpgd.py} | 0 examples/dual_example.py | 2 +- examples/dual_example_lpgd.py | 39 +++++++++++++ examples/ecos_example.py | 4 +- examples/ecos_example_lpgd.py | 39 +++++++++++++ examples/sdp_lpgd.py | 67 +++++++++++++++++++++++ 8 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 examples/batch_ecos_lpgd.py rename examples/{batch_LPGD.py => batch_lpgd.py} (100%) create mode 100644 examples/dual_example_lpgd.py create mode 100644 examples/ecos_example_lpgd.py create mode 100644 examples/sdp_lpgd.py diff --git a/examples/batch_ecos.py b/examples/batch_ecos.py index 9eca81b..0684305 100644 --- a/examples/batch_ecos.py +++ b/examples/batch_ecos.py @@ -31,9 +31,9 @@ def time_function(f, N=1): for n_jobs in range(1, 8): def f_forward(): return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False) + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False) xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False) + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False) def f_backward(): DT_batch(xs, ys, ss, mode="lsqr") diff --git a/examples/batch_ecos_lpgd.py b/examples/batch_ecos_lpgd.py new file mode 100644 index 0000000..9f5b0a7 --- /dev/null +++ b/examples/batch_ecos_lpgd.py @@ -0,0 +1,46 @@ +import diffcp +import utils +import IPython as ipy +import time +import numpy as np + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 1 + +As, bs, cs, Ks = [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + As += [A] + bs += [b] + cs += [c] + Ks += [K] + + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc - tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 8): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0)) + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0)) + + def f_backward(): + DT_batch(xs, ys, ss) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) diff --git a/examples/batch_LPGD.py b/examples/batch_lpgd.py similarity index 100% rename from examples/batch_LPGD.py rename to examples/batch_lpgd.py diff --git a/examples/dual_example.py b/examples/dual_example.py index ad0c816..07e6c1d 100644 --- a/examples/dual_example.py +++ b/examples/dual_example.py @@ -9,7 +9,7 @@ # defined as a product of a 3-d fixed cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { - 'f': 3, + 'z': 3, 'l': 3, 'q': [5] } diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py new file mode 100644 index 0000000..93ced61 --- /dev/null +++ b/examples/dual_example_lpgd.py @@ -0,0 +1,39 @@ +import diffcp + +import numpy as np +import utils +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# We evaluate the gradient of the objective with respect to A, b and c. +dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m)) + +# The gradient of the objective with respect to b should be +# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by +# Boyd & Vandenberghe). +print("db =", db) +print("-y =", -y) diff --git a/examples/ecos_example.py b/examples/ecos_example.py index 9a64182..dcec447 100644 --- a/examples/ecos_example.py +++ b/examples/ecos_example.py @@ -9,7 +9,7 @@ # defined as a product of a 3-d fixed cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { - 'f': 3, + 'z': 3, 'l': 3, 'q': [5] } @@ -23,7 +23,7 @@ # We solve the cone program and get the derivative and its adjoint x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( - A, b, c, K, solver="ECOS", verbose=False) + A, b, c, K, solve_method="ECOS", verbose=False) print("x =", x) print("y =", y) diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py new file mode 100644 index 0000000..c142253 --- /dev/null +++ b/examples/ecos_example_lpgd.py @@ -0,0 +1,39 @@ +import diffcp + +import numpy as np +import utils +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# We evaluate the gradient of the objective with respect to A, b and c. +dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m)) + +# The gradient of the objective with respect to b should be +# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by +# Boyd & Vandenberghe). +print("db =", db) +print("-y =", -y) diff --git a/examples/sdp_lpgd.py b/examples/sdp_lpgd.py new file mode 100644 index 0000000..e5cec85 --- /dev/null +++ b/examples/sdp_lpgd.py @@ -0,0 +1,67 @@ +import cvxpy as cp +import diffcp +import numpy as np + +import time + + +def scs_data_from_cvxpy_problem(problem): + data = problem.get_problem_data(cp.SCS)[0] + cone_dims = cp.reductions.solvers.conic_solvers.scs_conif.dims_to_solver_dict(data[ + "dims"]) + return data["A"], data["b"], data["c"], cone_dims + + +def randn_symm(n): + A = np.random.randn(n, n) + return (A + A.T) / 2 + + +def randn_psd(n): + A = 1. / 10 * np.random.randn(n, n) + return A@A.T + + +def main(n=3, p=3): + # Generate problem data + C = randn_psd(n) + As = [randn_symm(n) for _ in range(p)] + Bs = np.random.randn(p) + + # Extract problem data using cvxpy + X = cp.Variable((n, n), PSD=True) + objective = cp.trace(C@X) + constraints = [cp.trace(As[i]@X) == Bs[i] for i in range(p)] + prob = cp.Problem(cp.Minimize(objective), constraints) + A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob) + + # Print problem size + mn_plus_m_plus_n = A.size + b.size + c.size + n_plus_2n = c.size + 2 * b.size + entries_in_derivative = mn_plus_m_plus_n * n_plus_2n + print(f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)""") + + # Compute solution and derivative maps + start = time.perf_counter() + x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, cone_dims, eps=1e-5, mode="lpgd") + end = time.perf_counter() + print("Compute solution and set up derivative: %.2f s." % (end - start)) + + # Derivative + lpgd_args = dict(tau=0.1, rho=0.1) + start = time.perf_counter() + dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm( + C), np.zeros(y.size), np.zeros(s.size), **lpgd_args) + end = time.perf_counter() + print("Evaluate derivative: %.2f s." % (end - start)) + + # Adjoint of derivative + start = time.perf_counter() + dx, dy, ds = derivative(A, b, c, **lpgd_args) + end = time.perf_counter() + print("Evaluate adjoint of derivative: %.2f s." % (end - start)) + +if __name__ == '__main__': + np.random.seed(0) + main(50, 25) From 1fa3d05077486f018f041db8482a7f863b300a18 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 14:48:18 +0200 Subject: [PATCH 06/17] Catch error if perturbed problem infeasible --- diffcp/cone_program.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 5443192..a2e511d 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -718,8 +718,12 @@ def compute_perturbed_solution(dA, db, dc, tau, rho): warm_start = (x, y, s) # Solve the perturbed problem - result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + try: + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + except SolverError as e: + raise ValueError(f"Failed to solve perturbed problem: {e}. If it is infeasible consider decreasing "\ + "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] return x_pert, y_pert, s_pert @@ -822,8 +826,12 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): # Solve the perturbed problem # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update - result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + try: + result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + except SolverError as e: + raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\ + "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] else: @@ -845,8 +853,12 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) # Solve the embedded problem - result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + try: + result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + except SolverError as e: + raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\ + "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") # Extract the solutions x_pert = result_pert['x'][:n] y_pert = result_pert['y'][:m] From 133d30d8da6f734bdfa5f71a664170d15a3808f6 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 14:59:43 +0200 Subject: [PATCH 07/17] Give better error message on infeasibility --- diffcp/cone_program.py | 48 +++++++++++++++++++++++------------------- examples/batch_lpgd.py | 4 ++-- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index a2e511d..0725725 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -666,10 +666,18 @@ def derivative_lpgd(dA, db, dc, tau, rho): raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right - x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) + try: + x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) + except SolverError as e: + raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left - x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) + try: + x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) + except SolverError as e: + raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dx = (x_right - x_left) / (2 * tau) @@ -718,12 +726,8 @@ def compute_perturbed_solution(dA, db, dc, tau, rho): warm_start = (x, y, s) # Solve the perturbed problem - try: - result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - except SolverError as e: - raise ValueError(f"Failed to solve perturbed problem: {e}. If it is infeasible consider decreasing "\ - "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] return x_pert, y_pert, s_pert @@ -753,10 +757,18 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right - x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) + try: + x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) + except SolverError as e: + raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left - x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) + try: + x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) + except SolverError as e: + raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dc = (x_right - x_left) / (2 * tau) @@ -826,12 +838,8 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): # Solve the perturbed problem # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update - try: - result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - except SolverError as e: - raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\ - "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") + result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] else: @@ -853,12 +861,8 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) # Solve the embedded problem - try: - result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - except SolverError as e: - raise ValueError(f"Failed to solve adjoint perturbed problem: {e}. If it is infeasible consider decreasing "\ - "tau or switching to 'lpgd_left' or 'lpgd_right' mode.") + result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) # Extract the solutions x_pert = result_pert['x'][:n] y_pert = result_pert['y'][:m] diff --git a/examples/batch_lpgd.py b/examples/batch_lpgd.py index 81fb4ab..6992433 100644 --- a/examples/batch_lpgd.py +++ b/examples/batch_lpgd.py @@ -30,9 +30,9 @@ def time_function(f, N=1): for n_jobs in range(1, 5): def f_forward(): return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel") + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs) xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, - n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd', solve_method="Clarabel") + n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left') def f_backward(): DT_batch(xs, ys, ss, tau=0.1, rho=0.1) From 761b8ba4998c2175d08f741b92d64b41e6ad505b Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 15:15:16 +0200 Subject: [PATCH 08/17] fix docstrings --- diffcp/cone_program.py | 2 +- diffcp/utils.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 0725725..bf71922 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -806,7 +806,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): We add a constraint x'=s, replace all appearances of s with x' and solve argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> [ 0, rho*I]] - subject to [[A, I]; [x, x'] + [s'; s] = [0; b-tau*dy] + subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] [0, -I]] (s', s) \in (0 \times K) Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case. diff --git a/diffcp/utils.py b/diffcp/utils.py index 4309f98..4ecf1df 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -51,7 +51,14 @@ def regularize_P(P, rho, size): P_reg = P return P_reg -def embed_problem(A, b, c, P, cone_dict): +def embed_problem(A, b, c, P, cone_dict): + """Embeds the problem into a larger problem that allows costs on the slack variables + A_emb = [A, I; 0, -I] + b_emb = [b; 0] + c_emb = [c; 0] + P_emb = [P, 0; 0, 0] + cone_dict_emb = cone_dict with z shifted by m + """ m = b.shape[0] A_emb = sparse.bmat([ [A, sparse.eye(m, format=A.format)], From ca0eb6454ebcafe245a15a332c2180c93e93b8a9 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 9 Aug 2024 15:28:20 +0200 Subject: [PATCH 09/17] minor changes --- diffcp/cone_program.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index bf71922..3296b26 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -669,15 +669,15 @@ def derivative_lpgd(dA, db, dc, tau, rho): try: x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) except SolverError as e: - raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_left' mode.") + raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) except SolverError as e: - raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_right' mode.") + raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dx = (x_right - x_left) / (2 * tau) @@ -760,15 +760,15 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): try: x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) except SolverError as e: - raise ValueError(f"Computation of right-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_left' mode.") + raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) except SolverError as e: - raise ValueError(f"Computation of left-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_right' mode.") + raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ + "Consider decresing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dc = (x_right - x_left) / (2 * tau) @@ -809,7 +809,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] [0, -I]] (s', s) \in (0 \times K) - Note that we also add a regularizer on s (rho/2 |s-s^*|^2) in this case. + Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). Args: dx: NumPy array representing perturbation in `x` From c73b116d2c3f7240f6a71a67d0ce487262a6426c Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Mon, 26 Aug 2024 10:42:46 +0200 Subject: [PATCH 10/17] cosmetic changes --- diffcp/cone_program.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 3296b26..ff3415d 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -654,7 +654,7 @@ def derivative_lpgd(dA, db, dc, tau, rho): db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` tau: Perturbation strength parameter - rho: Regularization parameter + rho: Regularization strength parameter Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. @@ -670,14 +670,14 @@ def derivative_lpgd(dA, db, dc, tau, rho): x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) except SolverError as e: raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_left' mode.") + "Consider decreasing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) except SolverError as e: raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_right' mode.") + "Consider decreasing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dx = (x_right - x_left) / (2 * tau) @@ -706,7 +706,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho): db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` tau: Perturbation strength parameter - rho: Regularization parameter + rho: Regularization strength parameter Returns: x_pert: Perturbed solution of the primal variable x y_pert: Perturbed solution of the dual variable y @@ -718,7 +718,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho): b_pert = b + tau * db c_pert = c + tau * dc - # Regularize + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective P_reg = regularize_P(P, rho=rho, size=n) c_pert_reg = c_pert - rho * x @@ -745,7 +745,7 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` tau: Perturbation strength parameter - rho: Regularization parameter + rho: Regularization strength parameter Returns: (`dA`, `db`, `dc`), the result of applying the LPGD to the perturbations; the sparsity pattern of `dA` matches that of `A`. @@ -761,14 +761,14 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) except SolverError as e: raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_left' mode.") + "Consider decreasing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) except SolverError as e: raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ - "Consider decresing tau or swiching to 'lpgd_right' mode.") + "Consider decreasing tau or swiching to 'lpgd_right' mode.") if mode == "lpgd": dc = (x_right - x_left) / (2 * tau) @@ -791,7 +791,8 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): """ Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) - by solving the following perturbed problem with perturbations dx, dy, ds and perturbation parameters tau, rho: + by solving the following perturbed problem with perturbations dx, dy, ds + and perturbation/regularization parameters tau/rho: argmin. + + tau*( + ) + rho/2 |x-x^*|^2 subject to Ax + s = b-tau*dy s \in K @@ -804,11 +805,11 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): For ds!=0 we rewrite the problem as an embedded problem. We add a constraint x'=s, replace all appearances of s with x' and solve - argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> + argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> [ 0, rho*I]] - subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] + subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] [0, -I]] - (s', s) \in (0 \times K) + (s', s) \in (0 x K) Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). Args: @@ -816,7 +817,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` tau: Perturbation strength parameter - rho: Regularization parameter + rho: Regularization strength parameter Returns: x_pert: Perturbed solution of the primal variable x y_pert: Perturbed solution of the dual variable y @@ -825,11 +826,11 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): # The cases ds = 0 and ds != 0 are handled separately, see docstring if np.isclose(np.sum(np.abs(ds)), 0): - # Perturb problem + # Perturb problem (Note: perturb primal and dual linear terms in different directions) c_pert = c + tau * dx b_pert = b - tau * dy - # Regularize + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective P_reg = regularize_P(P, rho=rho, size=n) c_pert_reg = c_pert - rho * x @@ -837,20 +838,20 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): warm_start = (x, y, s) if solve_method != "ECOS" else None # Solve the perturbed problem - # TODO: If solve_method=='SCS' and rho==0, this can be sped up strongly by using solver.update + # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, solve_method=solve_method, warm_start=warm_start, **solver_kwargs) # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] else: - # Embed problem + # Embed problem (see docstring) A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict) - # Perturb problem + # Perturb problem (Note: perturb primal and dual linear terms in different directions) b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) c_emb_pert = c_emb + tau * np.hstack([dx, ds]) - # Regularize + # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m) c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) From ca78344d31f8a1c799735e428b641017484440d6 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Mon, 26 Aug 2024 11:45:16 +0200 Subject: [PATCH 11/17] update readme --- README.md | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 934136b..46f00c0 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,3 @@ -# Lagrangian Proximal Gradient Descent - diffcp implementation - -This fork implements [Lagrangian proximal gradient descent (LPGD)](https://arxiv.org/abs/2407.05920) as an option for the conic program in diffcp. -It can be understood as an efficient implementation of adjoint finite differences, enabling informative derivative replacements in the case of degenerate derivatives. -The perturbation strength is controlled by the parameter tau, the parameter rho controls the regularization strength. -Note: For the forward derivatives, LPGD computes standard finite differences with an optional regularization. - -Example usage: -```python -x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd') -... -dx, dy, ds = D(dA, db, dc, tau=0.1, rho=0.1) -... -dA, db, dc = DT(dx, dy, ds, tau=0.1, rho=0.1) -``` - -Alternatively the derivative keyword arguments can be passed directly on the forward pass: -```python -x, y, s, D, DT = solve_and_derivative(A, b, c, cone_dict, mode='lpgd', derivative_kwargs=dict(tau=0.1, rho=0.1)) -... -dx, dy, ds = D(dA, db, dc) -... -dA, db, dc = DT(dx, dy, ds) -``` - - -Below follows the original Readme: - [![Build Status](http://github.com/cvxgrp/diffcp/workflows/build/badge.svg?event=push)](https://github.com/cvxgrp/diffcp/actions/workflows/build.yml) # diffcp @@ -124,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs- The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md). +To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=LPGD` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function. + #### Return value The function `solve_and_derivative` returns a tuple From 3d1235c4ae070135738ff6dc28e6fcc97b7c8943 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Mon, 26 Aug 2024 12:38:01 +0200 Subject: [PATCH 12/17] move perturbed solution computation to utils --- diffcp/cone_program.py | 132 +++-------------------------------------- diffcp/utils.py | 124 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 123 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index ff3415d..99d2edc 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -12,7 +12,7 @@ import _diffcp import diffcp.cones as cone_lib -from diffcp.utils import regularize_P, embed_problem +from diffcp.utils import compute_perturbed_solution, compute_adjoint_perturbed_solution def pi(z, cones): @@ -667,14 +667,16 @@ def derivative_lpgd(dA, db, dc, tau, rho): if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right try: - x_right, y_right, s_right = compute_perturbed_solution(dA, db, dc, tau, rho) + x_right, y_right, s_right = compute_perturbed_solution( + dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: - x_left, y_left, s_left = compute_perturbed_solution(dA, db, dc, -tau, rho) + x_left, y_left, s_left = compute_perturbed_solution( + dA, db, dc, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_right' mode.") @@ -694,43 +696,6 @@ def derivative_lpgd(dA, db, dc, tau, rho): else: raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") return dx, dy, ds - - def compute_perturbed_solution(dA, db, dc, tau, rho): - """ - Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with - perturbations dA, db, dc and optional regularization rho. - - Args: - dA: SciPy sparse matrix in CSC format; must have same sparsity - pattern as the matrix `A` from the cone program - db: NumPy array representing perturbation in `b` - dc: NumPy array representing perturbation in `c` - tau: Perturbation strength parameter - rho: Regularization strength parameter - Returns: - x_pert: Perturbed solution of the primal variable x - y_pert: Perturbed solution of the dual variable y - s_pert: Perturbed solution of the slack variable s - """ - - # Perturb the problem - A_pert = A + tau * dA - b_pert = b + tau * db - c_pert = c + tau * dc - - # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective - P_reg = regularize_P(P, rho=rho, size=n) - c_pert_reg = c_pert - rho * x - - # Set warm start - warm_start = (x, y, s) - - # Solve the perturbed problem - result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - # Extract the solutions - x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] - return x_pert, y_pert, s_pert def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920] @@ -758,14 +723,16 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right try: - x_right, y_right, _ = compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho) + x_right, y_right, _ = compute_adjoint_perturbed_solution( + dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_left' mode.") if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: - x_left, y_left, _ = compute_adjoint_perturbed_solution(dx, dy, ds, -tau, rho) + x_left, y_left, _ = compute_adjoint_perturbed_solution( + dx, dy, ds, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_right' mode.") @@ -788,87 +755,6 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape) return dA, db, dc - def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho): - """ - Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) - by solving the following perturbed problem with perturbations dx, dy, ds - and perturbation/regularization parameters tau/rho: - argmin. + + tau*( + ) + rho/2 |x-x^*|^2 - subject to Ax + s = b-tau*dy - s \in K - - For ds=0 we solve - argmin. + - subject to Ax + s = b-tau*dy - s \in K - This is just a perturbed instance of the forward optimization problem. - - For ds!=0 we rewrite the problem as an embedded problem. - We add a constraint x'=s, replace all appearances of s with x' and solve - argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> - [ 0, rho*I]] - subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] - [0, -I]] - (s', s) \in (0 x K) - Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). - - Args: - dx: NumPy array representing perturbation in `x` - dy: NumPy array representing perturbation in `y` - ds: NumPy array representing perturbation in `s` - tau: Perturbation strength parameter - rho: Regularization strength parameter - Returns: - x_pert: Perturbed solution of the primal variable x - y_pert: Perturbed solution of the dual variable y - s_pert: Perturbed solution of the slack variable s - """ - - # The cases ds = 0 and ds != 0 are handled separately, see docstring - if np.isclose(np.sum(np.abs(ds)), 0): - # Perturb problem (Note: perturb primal and dual linear terms in different directions) - c_pert = c + tau * dx - b_pert = b - tau * dy - - # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective - P_reg = regularize_P(P, rho=rho, size=n) - c_pert_reg = c_pert - rho * x - - # Set warm start - warm_start = (x, y, s) if solve_method != "ECOS" else None - - # Solve the perturbed problem - # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update - result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - # Extract the solutions - x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] - else: - # Embed problem (see docstring) - A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict) - - # Perturb problem (Note: perturb primal and dual linear terms in different directions) - b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) - c_emb_pert = c_emb + tau * np.hstack([dx, ds]) - - # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective - P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m) - c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) - - # Set warm start - if solve_method == "ECOS": - warm_start = None - else: - warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) - - # Solve the embedded problem - result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, - solve_method=solve_method, warm_start=warm_start, **solver_kwargs) - # Extract the solutions - x_pert = result_pert['x'][:n] - y_pert = result_pert['y'][:m] - s_pert = result_pert['x'][n:n+m] - return x_pert, y_pert, s_pert result["D"] = derivative result["DT"] = adjoint_derivative diff --git a/diffcp/utils.py b/diffcp/utils.py index 4ecf1df..588d9ac 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -51,6 +51,7 @@ def regularize_P(P, rho, size): P_reg = P return P_reg + def embed_problem(A, b, c, P, cone_dict): """Embeds the problem into a larger problem that allows costs on the slack variables A_emb = [A, I; 0, -I] @@ -79,3 +80,126 @@ def embed_problem(A, b, c, P, cone_dict): else: cone_dict_emb['z'] = m return A_emb, b_emb, c_emb, P_emb, cone_dict_emb + + +def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): + """ + Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with + perturbations dA, db, dc and optional regularization rho. + + Args: + dA: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `A` from the cone program + db: NumPy array representing perturbation in `b` + dc: NumPy array representing perturbation in `c` + tau: Perturbation strength parameter + rho: Regularization strength parameter + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + m, n = A.shape + + # Perturb the problem + A_pert = A + tau * dA + b_pert = b + tau * db + c_pert = c + tau * dc + + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective + P_reg = regularize_P(P, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) + + # Solve the perturbed problem + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + return x_pert, y_pert, s_pert + + +def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): + """ + Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map) + by solving the following perturbed problem with perturbations dx, dy, ds + and perturbation/regularization parameters tau/rho: + argmin. + + tau*( + ) + rho/2 |x-x^*|^2 + subject to Ax + s = b-tau*dy + s \in K + + For ds=0 we solve + argmin. + + subject to Ax + s = b-tau*dy + s \in K + This is just a perturbed instance of the forward optimization problem. + + For ds!=0 we rewrite the problem as an embedded problem. + We add a constraint x'=s, replace all appearances of s with x' and solve + argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> + [ 0, rho*I]] + subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] + [0, -I]] + (s', s) \in (0 x K) + Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). + + Args: + dx: NumPy array representing perturbation in `x` + dy: NumPy array representing perturbation in `y` + ds: NumPy array representing perturbation in `s` + tau: Perturbation strength parameter + rho: Regularization strength parameter + Returns: + x_pert: Perturbed solution of the primal variable x + y_pert: Perturbed solution of the dual variable y + s_pert: Perturbed solution of the slack variable s + """ + m, n = A.shape + + # The cases ds = 0 and ds != 0 are handled separately, see docstring + if np.isclose(np.sum(np.abs(ds)), 0): + # Perturb problem (Note: perturb primal and dual linear terms in different directions) + c_pert = c + tau * dx + b_pert = b - tau * dy + + # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective + P_reg = regularize_P(P, rho=rho, size=n) + c_pert_reg = c_pert - rho * x + + # Set warm start + warm_start = (x, y, s) if solve_method != "ECOS" else None + + # Solve the perturbed problem + # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update + result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] + else: + # Embed problem (see docstring) + A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict) + + # Perturb problem (Note: perturb primal and dual linear terms in different directions) + b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)]) + c_emb_pert = c_emb + tau * np.hstack([dx, ds]) + + # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective + P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m) + c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) + + # Set warm start + if solve_method == "ECOS": + warm_start = None + else: + warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) + + # Solve the embedded problem + result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, + solve_method=solve_method, warm_start=warm_start, **solver_kwargs) + # Extract the solutions + x_pert = result_pert['x'][:n] + y_pert = result_pert['y'][:m] + s_pert = result_pert['x'][n:n+m] + return x_pert, y_pert, s_pert From ca1be3f09845422b0840088446c84f8b228dc18b Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Mon, 26 Aug 2024 12:40:56 +0200 Subject: [PATCH 13/17] cosmetics --- diffcp/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffcp/utils.py b/diffcp/utils.py index 588d9ac..affe8b3 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -139,9 +139,9 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_di For ds!=0 we rewrite the problem as an embedded problem. We add a constraint x'=s, replace all appearances of s with x' and solve argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']> - [ 0, rho*I]] + [ 0, rho*I]] subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0] - [0, -I]] + [0, -I]] (s', s) \in (0 x K) Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2). From 1f4e4bf3c2fee332a8099d0555c427b9721dff09 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 13 Dec 2024 22:37:49 +0100 Subject: [PATCH 14/17] resolve domments on pull request + minor edits --- README.md | 2 +- diffcp/cone_program.py | 15 +++++++++------ diffcp/utils.py | 6 +++--- examples/dual_example.py | 2 +- examples/dual_example_lpgd.py | 2 +- examples/ecos_example.py | 2 +- examples/ecos_example_lpgd.py | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 46f00c0..511896f 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs- The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md). -To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=LPGD` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function. +To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=lpgd` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function. #### Return value The function `solve_and_derivative` returns a tuple diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index 99d2edc..ec932e6 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -377,8 +377,6 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, raise ValueError("PSD cone not supported by ECOS.") if ('ed' in cone_dict) and (cone_dict['ed'] != 0): raise NotImplementedError("Dual exponential cones not supported yet.") - if warm_start is not None: - raise ValueError("ECOS does not support warm starting.") len_eq = cone_dict[cone_lib.EQ_DIM] C_ecos = c G_ecos = A[len_eq:] @@ -452,7 +450,9 @@ def solve_internal(A, b, c, cone_dict, solve_method=None, 'iter': solution['info']['iter'], 'pobj': solution['info']['pcost']} elif solve_method == "Clarabel": - # for now set P to 0 + if warm_start is not None: + raise ValueError("Clarabel currently does not support warmstarting.") + if P is None: P = sparse.csc_matrix((c.size, c.size)) @@ -519,6 +519,9 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, if mode not in ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]: raise ValueError("Unsupported mode {}; the supported modes are " "'dense', 'lsqr', 'lsmr', 'lpgd', 'lpgd_right' and 'lpgd_left'".format(mode)) + if mode in ["dense", "lsqr", "lsmr"] and P is not None: + raise ValueError("Dense, lsqr, and lsmr modes currently do not support quadratic objectives. "\ + "Consider switching to 'lpgd' mode.") if np.isnan(A.data).any(): raise RuntimeError("Found a NaN in A.") @@ -663,7 +666,7 @@ def derivative_lpgd(dA, db, dc, tau, rho): if tau is None or tau <= 0: raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.") if rho < 0: - raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") + raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right try: @@ -717,9 +720,9 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): """ if tau is None or tau <= 0: - raise ValueError(f"Finite difference mode requires tau > 0, got tau={tau}.") + raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.") if rho < 0: - raise ValueError(f"Finite difference mode requires rho >= 0, got rho={rho}.") + raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.") if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right try: diff --git a/diffcp/utils.py b/diffcp/utils.py index affe8b3..c6eae00 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -111,7 +111,7 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y c_pert_reg = c_pert - rho * x # Set warm start - warm_start = (x, y, s) + warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None # Solve the perturbed problem result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, @@ -169,7 +169,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_di c_pert_reg = c_pert - rho * x # Set warm start - warm_start = (x, y, s) if solve_method != "ECOS" else None + warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None # Solve the perturbed problem # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update @@ -190,7 +190,7 @@ def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_di c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s]) # Set warm start - if solve_method == "ECOS": + if solve_method in ["ECOS", "Clarabel"]: warm_start = None else: warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s])) diff --git a/examples/dual_example.py b/examples/dual_example.py index 07e6c1d..3c0220d 100644 --- a/examples/dual_example.py +++ b/examples/dual_example.py @@ -6,7 +6,7 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { 'z': 3, diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py index 93ced61..e0a802f 100644 --- a/examples/dual_example_lpgd.py +++ b/examples/dual_example_lpgd.py @@ -6,7 +6,7 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { 'z': 3, diff --git a/examples/ecos_example.py b/examples/ecos_example.py index dcec447..9d1534b 100644 --- a/examples/ecos_example.py +++ b/examples/ecos_example.py @@ -6,7 +6,7 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { 'z': 3, diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py index c142253..bc8777a 100644 --- a/examples/ecos_example_lpgd.py +++ b/examples/ecos_example_lpgd.py @@ -6,7 +6,7 @@ # We generate a random cone program with a cone -# defined as a product of a 3-d fixed cone, 3-d positive orthant cone, +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, # and a 5-d second order cone. K = { 'z': 3, From d2aada173f0d7b31865c4613563b97c56edd40b3 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 13 Dec 2024 22:47:26 +0100 Subject: [PATCH 15/17] add differentiation w.r.t. P --- diffcp/cone_program.py | 148 ++++++++++++++++++++++++++--------------- diffcp/utils.py | 12 +++- 2 files changed, 102 insertions(+), 58 deletions(-) diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py index ec932e6..d20cee0 100644 --- a/diffcp/cone_program.py +++ b/diffcp/cone_program.py @@ -25,14 +25,14 @@ def pi(z, cones): [u, cone_lib.pi(v, cones, dual=True), np.maximum(w, 0)]) -def solve_and_derivative_wrapper(A, b, c, cone_dict, warm_start, mode, kwargs): +def solve_and_derivative_wrapper(A, b, c, P, cone_dict, warm_start, mode, kwargs): """A wrapper around solve_and_derivative for the batch function.""" return solve_and_derivative( - A, b, c, cone_dict, warm_start=warm_start, mode=mode, **kwargs) + A, b, c, cone_dict, warm_start=warm_start, mode=mode, P=P, **kwargs) def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs_backward=-1, - mode="lsqr", warm_starts=None, **kwargs): + mode="lsqr", warm_starts=None, Ps=None, **kwargs): """ Solves a batch of cone programs and returns a function that performs a batch of derivatives. Uses a ThreadPool to perform @@ -52,6 +52,7 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs means serial and n_jobs_backward = -1 defaults to the number of CPUs (default=-1). mode - Differentiation mode in ["lsqr", "lsmr", "dense", "lpgd", "lpgd_right", "lpgd_left"]. warm_starts - A list of warm starts. + Ps - (optional) A list of P matrices. kwargs - kwargs sent to scs. Returns: @@ -59,10 +60,11 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs ys: A list of y arrays. ss: A list of s arrays. D_batch: A callable with signature - D_batch(dAs, dbs, dcs) -> dxs, dys, dss + D_batch(dAs, dbs, dcs, dPs) -> dxs, dys, dss This callable maps lists of problem data derivatives to lists of solution derivatives. DT_batch: A callable with signature - DT_batch(dxs, dys, dss) -> dAs, dbs, dcs + DT_batch(dxs, dys, dss, return_dP=False) -> dAs, dbs, dcs + DT_batch(dxs, dys, dss, return_dP=True) -> dAs, dbs, dcs, dPs This callable maps lists of solution derivatives to lists of problem data derivatives. """ batch_size = len(As) @@ -74,13 +76,16 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs n_jobs_backward = mp.cpu_count() n_jobs_forward = min(batch_size, n_jobs_forward) n_jobs_backward = min(batch_size, n_jobs_backward) + + if Ps is None: + Ps = [None] * batch_size if n_jobs_forward == 1: # serial xs, ys, ss, Ds, DTs = [], [], [], [], [] for i in range(batch_size): x, y, s, D, DT = solve_and_derivative(As[i], bs[i], cs[i], - cone_dicts[i], warm_starts[i], mode=mode, **kwargs) + cone_dicts[i], warm_starts[i], mode=mode, P=Ps[i], **kwargs) xs += [x] ys += [y] ss += [s] @@ -89,8 +94,8 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs else: # thread pool pool = ThreadPool(processes=n_jobs_forward) - args = [(A, b, c, cone_dict, warm_start, mode, kwargs) for A, b, c, cone_dict, warm_start in - zip(As, bs, cs, cone_dicts, warm_starts)] + args = [(A, b, c, P, cone_dict, warm_start, mode, kwargs) for A, b, c, P, cone_dict, warm_start in + zip(As, bs, cs, Ps, cone_dicts, warm_starts)] with threadpool_limits(limits=1): results = pool.starmap(solve_and_derivative_wrapper, args) pool.close() @@ -110,21 +115,26 @@ def D_batch(dAs, dbs, dcs, **kwargs): dss += [ds] return dxs, dys, dss - def DT_batch(dxs, dys, dss, **kwargs): - dAs, dbs, dcs = [], [], [] + def DT_batch(dxs, dys, dss, return_dP=False, **kwargs): + dAs, dbs, dcs, dPs = [], [], [], [] for i in range(batch_size): - dA, db, dc = DTs[i](dxs[i], dys[i], dss[i], **kwargs) - dAs += [dA] - dbs += [db] - dcs += [dc] - return dAs, dbs, dcs + ret = DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs) + dAs += [ret[0]] + dbs += [ret[1]] + dcs += [ret[2]] + if return_dP: + dPs += [ret[3]] + if return_dP: + return dAs, dbs, dcs, dPs + else: + return dAs, dbs, dcs else: - def D_batch(dAs, dbs, dcs, **kwargs): + def D_batch(dAs, dbs, dcs, dPs=None, **kwargs): pool = ThreadPool(processes=n_jobs_backward) def Di(i): - return Ds[i](dAs[i], dbs[i], dcs[i], **kwargs) + return Ds[i](dAs[i], dbs[i], dcs[i], dPs[i], **kwargs) results = pool.map(Di, range(batch_size)) pool.close() dxs = [r[0] for r in results] @@ -132,29 +142,33 @@ def Di(i): dss = [r[2] for r in results] return dxs, dys, dss - def DT_batch(dxs, dys, dss, **kwargs): + def DT_batch(dxs, dys, dss, return_dP=False, **kwargs): pool = ThreadPool(processes=n_jobs_backward) def DTi(i): - return DTs[i](dxs[i], dys[i], dss[i], **kwargs) + return DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs) results = pool.map(DTi, range(batch_size)) pool.close() dAs = [r[0] for r in results] dbs = [r[1] for r in results] dcs = [r[2] for r in results] - return dAs, dbs, dcs + if return_dP: + dPs = [r[3] for r in results] + return dAs, dbs, dcs, dPs + else: + return dAs, dbs, dcs return xs, ys, ss, D_batch, DT_batch -def solve_only_wrapper(A, b, c, cone_dict, warm_start, kwargs): +def solve_only_wrapper(A, b, c, P, cone_dict, warm_start, kwargs): """A wrapper around solve_only for the batch function""" return solve_only( - A, b, c, cone_dict, warm_start=warm_start, **kwargs) + A, b, c, cone_dict, warm_start=warm_start, P=P, **kwargs) def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, - warm_starts=None, **kwargs): + warm_starts=None, Ps=None, **kwargs): """ Solves a batch of cone programs. Uses a ThreadPool to perform operations across @@ -184,15 +198,15 @@ def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, xs, ys, ss = [], [], [] for i in range(batch_size): x, y, s = solve_only(As[i], bs[i], cs[i], cone_dicts[i], - warm_starts[i], **kwargs) + warm_starts[i], Ps[i], **kwargs) xs += [x] ys += [y] ss += [s] else: # thread pool pool = ThreadPool(processes=n_jobs_forward) - args = [(A, b, c, cone_dict, warm_start, kwargs) for A, b, c, cone_dict, warm_start in - zip(As, bs, cs, cone_dicts, warm_starts)] + args = [(A, b, c, P, cone_dict, warm_start, kwargs) for A, b, c, P, cone_dict, warm_start in + zip(As, bs, cs, Ps, cone_dicts, warm_starts)] with threadpool_limits(limits=1): results = pool.starmap(solve_only_wrapper, args) pool.close() @@ -208,21 +222,21 @@ class SolverError(Exception): def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', - solve_method='SCS', **kwargs): + solve_method='SCS', P=None, **kwargs): """Solves a cone program, returns its derivative as an abstract linear map. This function solves a convex cone program, with primal-dual problems - min. c^T x min. b^Ty - subject to Ax + s = b subject to A^Ty + c = 0 + min. x^T P x + c^T x min. x^T P x + b^T y + subject to Ax + s = b subject to Px + A^Ty + c = 0 s \in K y \in K^* - The problem data A, b, and c correspond to the arguments `A`, `b`, and `c`, + The problem data A, b, c, and P correspond to the arguments `A`, `b`, `c`, and `P`, and the convex cone `K` corresponds to `cone_dict`; x and s are the primal variables, and y is the dual variable. This function returns a solution (x, y, s) to the program. It also returns two functions that respectively represent application of the derivative - (at A, b, and c) and its adjoint. + (at A, b, c, and P) and its adjoint. The problem data must be formatted according to the SCS convention, see https://github.com/cvxgrp/scs. @@ -251,6 +265,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', mode: (optional) Which mode to compute derivative with, options are ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]. solve_method: (optional) Name of solver to use; SCS, ECOS, or Clarabel. + P: (optional) A sparse SciPy matrix in CSC format. kwargs: (optional) Keyword arguments to send to the solver. Returns: @@ -258,22 +273,25 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', y: Optimal value of the dual variable y. s: Optimal value of the slack variable s. derivative: A callable with signature - derivative(dA, db, dc) -> dx, dy, ds - that applies the derivative of the cone program at (A, b, and c) - to the perturbations `dA`, `db`, `dc`. `dA` must be a SciPy sparse + derivative(dA, db, dc, dP) -> dx, dy, ds + that applies the derivative of the cone program at (A, b, c, P) + to the perturbations `dA`, `db`, `dc`, `dP`. `dA` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `A`; + `dP` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `P`; `db` and `dc` are NumPy arrays. adjoint_derivative: A callable with signature - adjoint_derivative(dx, dy, ds) -> dA, db, dc + adjoint_derivative(dx, dy, ds, return_dP=False) -> dA, db, dc + adjoint_derivative(dx, dy, ds, return_dP=True) -> dA, db, dc, dP that applies the adjoint of the derivative of the cone program at - (A, b, and c) to the perturbations `dx`, `dy`, `ds`, which must be + (A, b, c, P) to the perturbations `dx`, `dy`, `ds`. `dx`, `dy`, `ds` must be NumPy arrays. The output `dA` matches the sparsity pattern of `A`. + If `return_dP` is True, the output `dP` matches the sparsity pattern of `P`. Raises: SolverError: if the cone program is infeasible or unbounded. """ result = solve_and_derivative_internal( A, b, c, cone_dict, warm_start=warm_start, mode=mode, - solve_method=solve_method, **kwargs) + solve_method=solve_method, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -283,7 +301,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr', def solve_only(A, b, c, cone_dict, warm_start=None, - solve_method='SCS', **kwargs): + solve_method='SCS', P=None, **kwargs): """ Solves a cone program and returns its solution. @@ -299,7 +317,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None, result = solve_internal( A, b, c, cone_dict, warm_start=warm_start, - solve_method=solve_method, **kwargs) + solve_method=solve_method, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -543,7 +561,7 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, derivative_kwargs = {} solver_kwargs = kwargs result = solve_internal(A, b, c, cone_dict, solve_method=solve_method, - warm_start=warm_start, raise_on_error=raise_on_error, **kwargs) + warm_start=warm_start, raise_on_error=raise_on_error, P=P, **kwargs) x = result["x"] y = result["y"] s = result["s"] @@ -575,20 +593,24 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None, pi_z = pi(z, cones) - def derivative(dA, db, dc, **kwargs): - """Applies derivative at (A, b, c) to perturbations dA, db, dc + def derivative(dA, db, dc, dP=None, **kwargs): + """Applies derivative at (A, b, c) to perturbations dA, db, dc, dP Args: dA: SciPy sparse matrix in CSC format; must have same sparsity pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` + dP: (optional) SciPy sparse matrix in CSC format; must have + same sparsity pattern as `P` from the cone program Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. """ + if P is None and dP is not None: + raise ValueError("P must be provided if dP is not None.") if "lpgd" in mode: - dx, dy, ds = derivative_lpgd(dA, db, dc, **derivative_kwargs, **kwargs) + dx, dy, ds = derivative_lpgd(dA, db, dc, dP=dP, **derivative_kwargs, **kwargs) return dx, dy, ds else: dQ = sparse.bmat([ @@ -613,20 +635,24 @@ def derivative(dA, db, dc, **kwargs): ds = D_proj_dual_cone.matvec(dv) - dv - s * dw return -dx, -dy, -ds - def adjoint_derivative(dx, dy, ds, **kwargs): + def adjoint_derivative(dx, dy, ds, return_dP=False, **kwargs): """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds Args: dx: NumPy array representing perturbation in `x` dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` + return_dP: flag to return dP Returns: (`dA`, `db`, `dc`), the result of applying the adjoint to the perturbations; the sparsity pattern of `dA` matches that of `A`. """ if "lpgd" in mode: - dA, db, dc = adjoint_derivative_lpgd(dx, dy, ds, **derivative_kwargs, **kwargs) + return adjoint_derivative_lpgd(dx, dy, ds, return_dP=return_dP, **derivative_kwargs, **kwargs) else: + if return_dP: + raise ValueError("Modes 'dense', 'lsqr', 'lsmr' currently do not support "\ + "differentiating with respect to P. Consider switching to 'lpgd' mode.") dw = -(x @ dx + y @ dy + s @ ds) dz = np.concatenate( [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) @@ -645,11 +671,10 @@ def adjoint_derivative(dx, dy, ds, **kwargs): dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] - - return dA, db, dc + return dA, db, dc - def derivative_lpgd(dA, db, dc, tau, rho): - """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc + def derivative_lpgd(dA, db, dc, tau, rho, dP=None): + """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc, dP with optional regularization rho. Args: dA: SciPy sparse matrix in CSC format; must have same sparsity @@ -658,6 +683,8 @@ def derivative_lpgd(dA, db, dc, tau, rho): dc: NumPy array representing perturbation in `c` tau: Perturbation strength parameter rho: Regularization strength parameter + dP: (optional) SciPy sparse matrix in CSC format; must have + same sparsity pattern as `P` from the cone program Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. @@ -671,7 +698,7 @@ def derivative_lpgd(dA, db, dc, tau, rho): if mode in ["lpgd", "lpgd_right"]: # Perturb the problem to the right try: x_right, y_right, s_right = compute_perturbed_solution( - dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_left' mode.") @@ -679,7 +706,7 @@ def derivative_lpgd(dA, db, dc, tau, rho): if mode in ["lpgd", "lpgd_left"]: # Perturb the problem to the left try: x_left, y_left, s_left = compute_perturbed_solution( - dA, db, dc, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) + dA, db, dc, dP, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal) except SolverError as e: raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\ "Consider decreasing tau or swiching to 'lpgd_right' mode.") @@ -700,7 +727,7 @@ def derivative_lpgd(dA, db, dc, tau, rho): raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") return dx, dy, ds - def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): + def adjoint_derivative_lpgd(dx, dy, ds, tau, rho, return_dP=False): """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920] Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, can be regarded as an efficient adjoint finite difference method. @@ -714,9 +741,12 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): ds: NumPy array representing perturbation in `s` tau: Perturbation strength parameter rho: Regularization strength parameter + return_dP: flag to return dP Returns: - (`dA`, `db`, `dc`), the result of applying the LPGD to the - perturbations; the sparsity pattern of `dA` matches that of `A`. + (`dA`, `db`, `dc`) if return_dP is False, + (`dA`, `db`, `dc`, `dP`) if return_dP is True, + the result of applying the LPGD to the perturbations; + the sparsity pattern of `dA`, `dP` matches that of `A`, `P`. """ if tau is None or tau <= 0: @@ -744,19 +774,27 @@ def adjoint_derivative_lpgd(dx, dy, ds, tau, rho): dc = (x_right - x_left) / (2 * tau) db = - (y_right - y_left) / (2 * tau) dA_data = (y_right[rows] * x_right[cols] - y_left[rows] * x_left[cols]) / (2 * tau) + dP_data = (x_right[cols] * x_right[cols] - x_left[cols] * x_left[cols]) / (2 * tau) elif mode == "lpgd_right": dc = (x_right - x) / tau db = - (y_right - y) / tau dA_data = (y_right[rows] * x_right[cols] - y[rows] * x[cols]) / tau + dP_data = (x_right[cols] * x_right[cols] - x[cols] * x[cols]) / tau elif mode == "lpgd_left": dc = (x - x_left) / tau db = - (y - y_left) / tau dA_data = (y[rows] * x[cols] - y_left[rows] * x_left[cols]) / tau + dP_data = (x[cols] * x[cols] - x_left[cols] * x_left[cols]) / tau else: raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.") dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape) - return dA, db, dc + + if return_dP: + dP = sparse.csc_matrix((dP_data, (cols, cols)), shape=P.shape) + return dA, db, dc, dP + else: + return dA, db, dc result["D"] = derivative diff --git a/diffcp/utils.py b/diffcp/utils.py index c6eae00..c0dc87b 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -82,7 +82,7 @@ def embed_problem(A, b, c, P, cone_dict): return A_emb, b_emb, c_emb, P_emb, cone_dict_emb -def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): +def compute_perturbed_solution(dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal): """ Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with perturbations dA, db, dc and optional regularization rho. @@ -92,6 +92,8 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` + dP: Optional: SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix PA` from the cone program tau: Perturbation strength parameter rho: Regularization strength parameter Returns: @@ -105,16 +107,20 @@ def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y A_pert = A + tau * dA b_pert = b + tau * db c_pert = c + tau * dc + if dP is not None: + P_pert = P + tau * dP + else: + P_pert = P # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective - P_reg = regularize_P(P, rho=rho, size=n) + P_pert_reg = regularize_P(P_pert, rho=rho, size=n) c_pert_reg = c_pert - rho * x # Set warm start warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None # Solve the perturbed problem - result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, + result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_pert_reg, cone_dict=cone_dict, solve_method=solve_method, warm_start=warm_start, **solver_kwargs) # Extract the solutions x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"] From dcfa6bafdded6cdb4406a8b78f654c27ef77b3b0 Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Fri, 13 Dec 2024 22:47:40 +0100 Subject: [PATCH 16/17] add examples for differentiating w.r.t. P --- examples/batch_clarabel_lpgd_quadratic.py | 47 +++++++++++++++++++++ examples/clarabel_example_lpgd_quadratic.py | 40 ++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 examples/batch_clarabel_lpgd_quadratic.py create mode 100644 examples/clarabel_example_lpgd_quadratic.py diff --git a/examples/batch_clarabel_lpgd_quadratic.py b/examples/batch_clarabel_lpgd_quadratic.py new file mode 100644 index 0000000..11654fb --- /dev/null +++ b/examples/batch_clarabel_lpgd_quadratic.py @@ -0,0 +1,47 @@ +import diffcp +import time +import numpy as np +import scipy.sparse as sparse + +m = 100 +n = 50 + +batch_size = 16 +n_jobs = 2 + +As, bs, cs, Ks, Ps = [], [], [], [], [] +for _ in range(batch_size): + A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n) + P = sparse.csc_matrix((c.size, c.size)) + P = sparse.triu(P).tocsc() + As += [A] + bs += [b] + cs += [c] + Ks += [K] + Ps += [P] + +def time_function(f, N=1): + result = [] + for i in range(N): + tic = time.time() + f() + toc = time.time() + result += [toc - tic] + return np.mean(result), np.std(result) + +for n_jobs in range(1, 8): + def f_forward(): + return diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1), + Ps=Ps) + xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks, + n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False, + mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1), Ps=Ps) + + def f_backward(): + DT_batch(xs, ys, ss, return_dP=True) + + mean_forward, std_forward = time_function(f_forward) + mean_backward, std_backward = time_function(f_backward) + print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" % + (n_jobs, mean_forward, std_forward, mean_backward, std_backward)) diff --git a/examples/clarabel_example_lpgd_quadratic.py b/examples/clarabel_example_lpgd_quadratic.py new file mode 100644 index 0000000..613e2f7 --- /dev/null +++ b/examples/clarabel_example_lpgd_quadratic.py @@ -0,0 +1,40 @@ +import diffcp + +import numpy as np +import utils +from scipy import sparse + +np.set_printoptions(precision=5, suppress=True) + + +# We generate a random cone program with a cone +# defined as a product of a 3-d zero cone, 3-d positive orthant cone, +# and a 5-d second order cone. +K = { + 'z': 3, + 'l': 3, + 'q': [5] +} + +m = 3 + 3 + 5 +n = 5 + +np.random.seed(0) + +A, b, c = utils.random_cone_prog(m, n, K) +P = sparse.csc_matrix((c.size, c.size)) +P = sparse.triu(P).tocsc() + +# We solve the cone program and get the derivative and its adjoint +x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( + A, b, c, K, P=P, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0)) + +print("x =", x) +print("y =", y) +print("s =", s) + +# Adjoint derivative +dA, db, dc, dP = adjoint_derivative(dx=c, dy=np.zeros(m), ds=np.zeros(m), return_dP=True) + +# Derivative (dummy inputs) +dx, dy, ds = derivative(dA=A, db=b, dc=c, dP=P) From 8a9069c307aef737243822cebfbca88954edc86b Mon Sep 17 00:00:00 2001 From: Anselm Paulus Date: Sat, 14 Dec 2024 11:10:39 +0100 Subject: [PATCH 17/17] minor cosmetic change --- diffcp/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diffcp/utils.py b/diffcp/utils.py index c0dc87b..c927747 100644 --- a/diffcp/utils.py +++ b/diffcp/utils.py @@ -92,8 +92,8 @@ def compute_perturbed_solution(dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` - dP: Optional: SciPy sparse matrix in CSC format; must have same sparsity - pattern as the matrix PA` from the cone program + dP: (optional) SciPy sparse matrix in CSC format; must have same sparsity + pattern as the matrix `P` from the cone program tau: Perturbation strength parameter rho: Regularization strength parameter Returns: