cvxgrp · a-paulus · Jul 29, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/README.md b/README.md
@@ -96,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs-
 
 The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).
 
+To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=LPGD` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function.
+
 #### Return value
 The function `solve_and_derivative` returns a tuple
 

diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
diff --git a/diffcp/utils.py b/diffcp/utils.py
@@ -1,6 +1,7 @@
 import diffcp.cones as cone_lib
 import numpy as np
 from scipy import sparse
+from copy import deepcopy
 
 
 def scs_data_from_cvxpy_problem(problem):
@@ -36,3 +37,169 @@ def get_random_like(A, randomness):
     rows, cols = A.nonzero()
     values = randomness(A.nnz)
     return sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
+
+
+def regularize_P(P, rho, size):
+    """Regularizes the matrix P by adding rho * I to it."""
+    if rho > 0:
+        reg = rho * sparse.eye(size, format='csc')
+        if P is None:
+            P_reg = reg
+        else:
+            P_reg = P + reg
+    else:
+        P_reg = P
+    return P_reg
+
+
+def embed_problem(A, b, c, P, cone_dict):
+    """Embeds the problem into a larger problem that allows costs on the slack variables
+    A_emb = [A, I; 0, -I]
+    b_emb = [b; 0]
+    c_emb = [c; 0]
+    P_emb = [P, 0; 0, 0]
+    cone_dict_emb = cone_dict with z shifted by m
+    """
+    m = b.shape[0]         
+    A_emb = sparse.bmat([
+        [A, sparse.eye(m, format=A.format)],
+        [None, -sparse.eye(m, format=A.format)]
+    ]).tocsc()
+    b_emb = np.hstack([b, np.zeros(m)])
+    c_emb = np.hstack([c, np.zeros(m)])
+    if P is not None:
+        P_emb = sparse.bmat([
+            [P, None],
+            [None, np.zeros((m, m))]
+        ]).tocsc()
+    else:
+        P_emb = None
+    cone_dict_emb = deepcopy(cone_dict)
+    if 'z' in cone_dict_emb:
+        cone_dict_emb['z'] += m
+    else:
+        cone_dict_emb['z'] = m
+    return A_emb, b_emb, c_emb, P_emb, cone_dict_emb
+
+
+def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
+    perturbations dA, db, dc and optional regularization rho.
+
+    Args:
+        dA: SciPy sparse matrix in CSC format; must have same sparsity
+            pattern as the matrix `A` from the cone program
+        db: NumPy array representing perturbation in `b`
+        dc: NumPy array representing perturbation in `c`
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # Perturb the problem
+    A_pert = A + tau * dA
+    b_pert = b + tau * db
+    c_pert = c + tau * dc
+
+    # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+    P_reg = regularize_P(P, rho=rho, size=n)
+    c_pert_reg = c_pert - rho * x
+
+    # Set warm start
+    warm_start = (x, y, s)
+
+    # Solve the perturbed problem
+    result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+    # Extract the solutions
+    x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    return x_pert, y_pert, s_pert
+
+
+def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
+    by solving the following perturbed problem with perturbations dx, dy, ds 
+    and perturbation/regularization parameters tau/rho:
+    argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
+    subject to  Ax + s = b-tau*dy
+                s \in K
+
+    For ds=0 we solve
+    argmin.     <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
+    subject to  Ax + s = b-tau*dy
+                s \in K
+    This is just a perturbed instance of the forward optimization problem.
+
+    For ds!=0 we rewrite the problem as an embedded problem.
+    We add a constraint x'=s, replace all appearances of s with x' and solve
+    argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
+                           [      0, rho*I]]
+    subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
+                 [0, -I]]
+                (s', s) \in (0 x K)
+    Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
+
+    Args:
+        dx: NumPy array representing perturbation in `x`
+        dy: NumPy array representing perturbation in `y`
+        ds: NumPy array representing perturbation in `s`
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # The cases ds = 0 and ds != 0 are handled separately, see docstring
+    if np.isclose(np.sum(np.abs(ds)), 0):
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        c_pert = c + tau * dx
+        b_pert = b - tau * dy
+
+        # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+        P_reg = regularize_P(P, rho=rho, size=n)
+        c_pert_reg = c_pert - rho * x
+
+        # Set warm start
+        warm_start = (x, y, s) if solve_method != "ECOS" else None
+
+        # Solve the perturbed problem
+        # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
+        result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    else:
+        # Embed problem (see docstring)
+        A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
+
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
+        c_emb_pert = c_emb + tau * np.hstack([dx, ds])
+
+        # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
+        P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
+        c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
+
+        # Set warm start
+        if solve_method == "ECOS":
+            warm_start = None
+        else:
+            warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
+
+        # Solve the embedded problem
+        result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                    solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert = result_pert['x'][:n]
+        y_pert = result_pert['y'][:m]
+        s_pert = result_pert['x'][n:n+m]
+    return x_pert, y_pert, s_pert
diff --git a/examples/batch_ecos.py b/examples/batch_ecos.py
@@ -31,9 +31,9 @@ def time_function(f, N=1):
 for n_jobs in range(1, 8):
     def f_forward():
         return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
     xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
 
     def f_backward():
         DT_batch(xs, ys, ss, mode="lsqr")

diff --git a/examples/batch_ecos_lpgd.py b/examples/batch_ecos_lpgd.py
@@ -0,0 +1,46 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc - tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 8):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, 
+                                                 mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False,
+                                                                      mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+
+    def f_backward():
+        DT_batch(xs, ys, ss)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+          (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
diff --git a/examples/batch_lpgd.py b/examples/batch_lpgd.py
@@ -0,0 +1,42 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc-tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 5):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=n_jobs, n_jobs_backward=n_jobs)
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left')
+    def f_backward():
+        DT_batch(xs, ys, ss, tau=0.1, rho=0.1)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+            (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
diff --git a/examples/dual_example.py b/examples/dual_example.py
@@ -9,7 +9,7 @@
 # defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }

diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)
diff --git a/examples/ecos_example.py b/examples/ecos_example.py
@@ -9,7 +9,7 @@
 # defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }
@@ -23,7 +23,7 @@
 
 # We solve the cone program and get the derivative and its adjoint
 x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
-    A, b, c, K, solver="ECOS", verbose=False)
+    A, b, c, K, solve_method="ECOS", verbose=False)
 
 print("x =", x)
 print("y =", y)

diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)