Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge LPGD into diffcp #67

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs-

The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).

To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=LPGD` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function.

#### Return value
The function `solve_and_derivative` returns a tuple

Expand Down
261 changes: 196 additions & 65 deletions diffcp/cone_program.py

Large diffs are not rendered by default.

167 changes: 167 additions & 0 deletions diffcp/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import diffcp.cones as cone_lib
import numpy as np
from scipy import sparse
from copy import deepcopy


def scs_data_from_cvxpy_problem(problem):
Expand Down Expand Up @@ -36,3 +37,169 @@ def get_random_like(A, randomness):
rows, cols = A.nonzero()
values = randomness(A.nnz)
return sparse.csc_matrix((values, (rows, cols)), shape=A.shape)


def regularize_P(P, rho, size):
"""Regularizes the matrix P by adding rho * I to it."""
if rho > 0:
reg = rho * sparse.eye(size, format='csc')
if P is None:
P_reg = reg
else:
P_reg = P + reg
else:
P_reg = P
return P_reg


def embed_problem(A, b, c, P, cone_dict):
"""Embeds the problem into a larger problem that allows costs on the slack variables
A_emb = [A, I; 0, -I]
b_emb = [b; 0]
c_emb = [c; 0]
P_emb = [P, 0; 0, 0]
cone_dict_emb = cone_dict with z shifted by m
"""
m = b.shape[0]
A_emb = sparse.bmat([
[A, sparse.eye(m, format=A.format)],
[None, -sparse.eye(m, format=A.format)]
]).tocsc()
b_emb = np.hstack([b, np.zeros(m)])
c_emb = np.hstack([c, np.zeros(m)])
if P is not None:
P_emb = sparse.bmat([
[P, None],
[None, np.zeros((m, m))]
]).tocsc()
else:
P_emb = None
cone_dict_emb = deepcopy(cone_dict)
if 'z' in cone_dict_emb:
cone_dict_emb['z'] += m
else:
cone_dict_emb['z'] = m
return A_emb, b_emb, c_emb, P_emb, cone_dict_emb


def compute_perturbed_solution(dA, db, dc, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
"""
Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with
perturbations dA, db, dc and optional regularization rho.

Args:
dA: SciPy sparse matrix in CSC format; must have same sparsity
pattern as the matrix `A` from the cone program
db: NumPy array representing perturbation in `b`
dc: NumPy array representing perturbation in `c`
tau: Perturbation strength parameter
rho: Regularization strength parameter
Returns:
x_pert: Perturbed solution of the primal variable x
y_pert: Perturbed solution of the dual variable y
s_pert: Perturbed solution of the slack variable s
"""
m, n = A.shape

# Perturb the problem
A_pert = A + tau * dA
b_pert = b + tau * db
c_pert = c + tau * dc

# Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
P_reg = regularize_P(P, rho=rho, size=n)
c_pert_reg = c_pert - rho * x

# Set warm start
warm_start = (x, y, s)

# Solve the perturbed problem
result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict,
solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
# Extract the solutions
x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
return x_pert, y_pert, s_pert


def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
"""
Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
by solving the following perturbed problem with perturbations dx, dy, ds
and perturbation/regularization parameters tau/rho:
argmin. <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
subject to Ax + s = b-tau*dy
s \in K

For ds=0 we solve
argmin. <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
subject to Ax + s = b-tau*dy
s \in K
This is just a perturbed instance of the forward optimization problem.

For ds!=0 we rewrite the problem as an embedded problem.
We add a constraint x'=s, replace all appearances of s with x' and solve
argmin. <[x, x'], [[P+rho*I, 0]; [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
[ 0, rho*I]]
subject to [[A, I]; [x, x'] + [s'; s] = [b-tau*dy; 0]
[0, -I]]
(s', s) \in (0 x K)
Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).

Args:
dx: NumPy array representing perturbation in `x`
dy: NumPy array representing perturbation in `y`
ds: NumPy array representing perturbation in `s`
tau: Perturbation strength parameter
rho: Regularization strength parameter
Returns:
x_pert: Perturbed solution of the primal variable x
y_pert: Perturbed solution of the dual variable y
s_pert: Perturbed solution of the slack variable s
"""
m, n = A.shape

# The cases ds = 0 and ds != 0 are handled separately, see docstring
if np.isclose(np.sum(np.abs(ds)), 0):
# Perturb problem (Note: perturb primal and dual linear terms in different directions)
c_pert = c + tau * dx
b_pert = b - tau * dy

# Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
P_reg = regularize_P(P, rho=rho, size=n)
c_pert_reg = c_pert - rho * x

# Set warm start
warm_start = (x, y, s) if solve_method != "ECOS" else None

# Solve the perturbed problem
# Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict,
solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
# Extract the solutions
x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
else:
# Embed problem (see docstring)
A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)

# Perturb problem (Note: perturb primal and dual linear terms in different directions)
b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
c_emb_pert = c_emb + tau * np.hstack([dx, ds])

# Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])

# Set warm start
if solve_method == "ECOS":
warm_start = None
else:
warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does Clarabel use a warmstart?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not seem like they support it, currently in diffcp the warm_start is also not passed to the Clarabel solver so it definitely is not used at the moment. Probably better to make this explicit, I will add a check to throw an error when trying to use warmstarteing with Clarabel.


# Solve the embedded problem
result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb,
solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
# Extract the solutions
x_pert = result_pert['x'][:n]
y_pert = result_pert['y'][:m]
s_pert = result_pert['x'][n:n+m]
return x_pert, y_pert, s_pert
4 changes: 2 additions & 2 deletions examples/batch_ecos.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def time_function(f, N=1):
for n_jobs in range(1, 8):
def f_forward():
return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

;)


def f_backward():
DT_batch(xs, ys, ss, mode="lsqr")
Expand Down
46 changes: 46 additions & 0 deletions examples/batch_ecos_lpgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import diffcp
import utils
import IPython as ipy
import time
import numpy as np

m = 100
n = 50

batch_size = 16
n_jobs = 1

As, bs, cs, Ks = [], [], [], []
for _ in range(batch_size):
A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
As += [A]
bs += [b]
cs += [c]
Ks += [K]


def time_function(f, N=1):
result = []
for i in range(N):
tic = time.time()
f()
toc = time.time()
result += [toc - tic]
return np.mean(result), np.std(result)

for n_jobs in range(1, 8):
def f_forward():
return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False,
mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False,
mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))

def f_backward():
DT_batch(xs, ys, ss)

mean_forward, std_forward = time_function(f_forward)
mean_backward, std_backward = time_function(f_backward)
print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
(n_jobs, mean_forward, std_forward, mean_backward, std_backward))
42 changes: 42 additions & 0 deletions examples/batch_lpgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import diffcp
import utils
import IPython as ipy
import time
import numpy as np

m = 100
n = 50

batch_size = 16
n_jobs = 1

As, bs, cs, Ks = [], [], [], []
for _ in range(batch_size):
A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
As += [A]
bs += [b]
cs += [c]
Ks += [K]

def time_function(f, N=1):
result = []
for i in range(N):
tic = time.time()
f()
toc = time.time()
result += [toc-tic]
return np.mean(result), np.std(result)

for n_jobs in range(1, 5):
def f_forward():
return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=n_jobs, n_jobs_backward=n_jobs)
xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left')
def f_backward():
DT_batch(xs, ys, ss, tau=0.1, rho=0.1)

mean_forward, std_forward = time_function(f_forward)
mean_backward, std_backward = time_function(f_backward)
print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
(n_jobs, mean_forward, std_forward, mean_backward, std_backward))
2 changes: 1 addition & 1 deletion examples/dual_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
# defined as a product of a 3-d zero cone, 3-d positive orthant cone,

# and a 5-d second order cone.
K = {
'f': 3,
'z': 3,
'l': 3,
'q': [5]
}
Expand Down
39 changes: 39 additions & 0 deletions examples/dual_example_lpgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import diffcp

import numpy as np
import utils
np.set_printoptions(precision=5, suppress=True)


# We generate a random cone program with a cone
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
# and a 5-d second order cone.
K = {
'z': 3,
'l': 3,
'q': [5]
}

m = 3 + 3 + 5
n = 5

np.random.seed(0)

A, b, c = utils.random_cone_prog(m, n, K)

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1))

print("x =", x)
print("y =", y)
print("s =", s)

# We evaluate the gradient of the objective with respect to A, b and c.
dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))

# The gradient of the objective with respect to b should be
# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
# Boyd & Vandenberghe).
print("db =", db)
print("-y =", -y)
4 changes: 2 additions & 2 deletions examples/ecos_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
# defined as a product of a 3-d zero cone, 3-d positive orthant cone,

# and a 5-d second order cone.
K = {
'f': 3,
'z': 3,
'l': 3,
'q': [5]
}
Expand All @@ -23,7 +23,7 @@

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
A, b, c, K, solver="ECOS", verbose=False)
A, b, c, K, solve_method="ECOS", verbose=False)

print("x =", x)
print("y =", y)
Expand Down
39 changes: 39 additions & 0 deletions examples/ecos_example_lpgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import diffcp

import numpy as np
import utils
np.set_printoptions(precision=5, suppress=True)


# We generate a random cone program with a cone
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
# defined as a product of a 3-d zero cone, 3-d positive orthant cone,

# and a 5-d second order cone.
K = {
'z': 3,
'l': 3,
'q': [5]
}

m = 3 + 3 + 5
n = 5

np.random.seed(0)

A, b, c = utils.random_cone_prog(m, n, K)

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0))

print("x =", x)
print("y =", y)
print("s =", s)

# We evaluate the gradient of the objective with respect to A, b and c.
dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))

# The gradient of the objective with respect to b should be
# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
# Boyd & Vandenberghe).
print("db =", db)
print("-y =", -y)
Loading
Loading