diff --git a/README.md b/README.md
index 639b0d0..511896f 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,8 @@ These inputs must conform to the [SCS convention](https://github.com/bodono/scs-
 
 The values in `cone_dict` denote the sizes of each cone; the values of `diffcp.SOC`, `diffcp.PSD`, and `diffcp.EXP` should be lists. The order of the rows of `A` must match the ordering of the cones given above. For more details, consult the [SCS documentation](https://github.com/cvxgrp/scs/blob/master/README.md).
 
+To enable [Lagrangian Proximal Gradient Descent (LPGD)](https://arxiv.org/abs/2407.05920) differentiation of the conic program based on efficient finite-differences, provide the `mode=lpgd` option along with the argument `derivative_kwargs=dict(tau=0.1, rho=0.1)` to specify the perturbation and regularization strength. Alternatively, the derivative kwargs can also be passed directly to the returned `derivative` and `adjoint_derivative` function.
+
 #### Return value
 The function `solve_and_derivative` returns a tuple
 
diff --git a/diffcp/cone_program.py b/diffcp/cone_program.py
index 9919108..d20cee0 100644
--- a/diffcp/cone_program.py
+++ b/diffcp/cone_program.py
@@ -12,6 +12,7 @@
 
 import _diffcp
 import diffcp.cones as cone_lib
+from diffcp.utils import compute_perturbed_solution, compute_adjoint_perturbed_solution
 
 
 def pi(z, cones):
@@ -24,14 +25,14 @@ def pi(z, cones):
         [u, cone_lib.pi(v, cones, dual=True), np.maximum(w, 0)])
 
 
-def solve_and_derivative_wrapper(A, b, c, cone_dict, warm_start, mode, kwargs):
+def solve_and_derivative_wrapper(A, b, c, P, cone_dict, warm_start, mode, kwargs):
     """A wrapper around solve_and_derivative for the batch function."""
     return solve_and_derivative(
-        A, b, c, cone_dict, warm_start=warm_start, mode=mode, **kwargs)
+        A, b, c, cone_dict, warm_start=warm_start, mode=mode, P=P, **kwargs)
 
 
 def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs_backward=-1,
-                               mode="lsqr", warm_starts=None, **kwargs):
+                               mode="lsqr", warm_starts=None, Ps=None, **kwargs):
     """
     Solves a batch of cone programs and returns a function that
     performs a batch of derivatives. Uses a ThreadPool to perform
@@ -49,8 +50,9 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs
             means serial and n_jobs_forward = -1 defaults to the number of CPUs (default=-1).
         n_jobs_backward - Number of jobs to use in the backward pass. n_jobs_backward = 1
             means serial and n_jobs_backward = -1 defaults to the number of CPUs (default=-1).
-        mode - Differentiation mode in ["lsqr", "lsmr", "dense"].
+        mode - Differentiation mode in ["lsqr", "lsmr", "dense", "lpgd", "lpgd_right", "lpgd_left"].
         warm_starts - A list of warm starts.
+        Ps - (optional) A list of P matrices.
         kwargs - kwargs sent to scs.
 
     Returns:
@@ -58,10 +60,11 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs
         ys: A list of y arrays.
         ss: A list of s arrays.
         D_batch: A callable with signature
-                D_batch(dAs, dbs, dcs) -> dxs, dys, dss
+                D_batch(dAs, dbs, dcs, dPs) -> dxs, dys, dss
             This callable maps lists of problem data derivatives to lists of solution derivatives.
         DT_batch: A callable with signature
-                DT_batch(dxs, dys, dss) -> dAs, dbs, dcs
+                DT_batch(dxs, dys, dss, return_dP=False) -> dAs, dbs, dcs
+                DT_batch(dxs, dys, dss, return_dP=True) -> dAs, dbs, dcs, dPs
             This callable maps lists of solution derivatives to lists of problem data derivatives.
     """
     batch_size = len(As)
@@ -73,13 +76,16 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs
         n_jobs_backward = mp.cpu_count()
     n_jobs_forward = min(batch_size, n_jobs_forward)
     n_jobs_backward = min(batch_size, n_jobs_backward)
+    
+    if Ps is None:
+        Ps = [None] * batch_size
 
     if n_jobs_forward == 1:
         # serial
         xs, ys, ss, Ds, DTs = [], [], [], [], []
         for i in range(batch_size):
             x, y, s, D, DT = solve_and_derivative(As[i], bs[i], cs[i],
-                                                  cone_dicts[i], warm_starts[i], mode=mode, **kwargs)
+                                                  cone_dicts[i], warm_starts[i], mode=mode, P=Ps[i], **kwargs)
             xs += [x]
             ys += [y]
             ss += [s]
@@ -88,8 +94,8 @@ def solve_and_derivative_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1, n_jobs
     else:
         # thread pool
         pool = ThreadPool(processes=n_jobs_forward)
-        args = [(A, b, c, cone_dict, warm_start, mode, kwargs) for A, b, c, cone_dict, warm_start in
-                zip(As, bs, cs, cone_dicts, warm_starts)]
+        args = [(A, b, c, P, cone_dict, warm_start, mode, kwargs) for A, b, c, P, cone_dict, warm_start in
+                zip(As, bs, cs, Ps, cone_dicts, warm_starts)]
         with threadpool_limits(limits=1):
             results = pool.starmap(solve_and_derivative_wrapper, args)
         pool.close()
@@ -109,21 +115,26 @@ def D_batch(dAs, dbs, dcs, **kwargs):
                 dss += [ds]
             return dxs, dys, dss
 
-        def DT_batch(dxs, dys, dss, **kwargs):
-            dAs, dbs, dcs = [], [], []
+        def DT_batch(dxs, dys, dss, return_dP=False, **kwargs):
+            dAs, dbs, dcs, dPs = [], [], [], []
             for i in range(batch_size):
-                dA, db, dc = DTs[i](dxs[i], dys[i], dss[i], **kwargs)
-                dAs += [dA]
-                dbs += [db]
-                dcs += [dc]
-            return dAs, dbs, dcs
+                ret = DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs)
+                dAs += [ret[0]]
+                dbs += [ret[1]]
+                dcs += [ret[2]]
+                if return_dP:
+                    dPs += [ret[3]]
+            if return_dP:
+                return dAs, dbs, dcs, dPs
+            else:
+                return dAs, dbs, dcs
     else:
 
-        def D_batch(dAs, dbs, dcs, **kwargs):
+        def D_batch(dAs, dbs, dcs, dPs=None, **kwargs):
             pool = ThreadPool(processes=n_jobs_backward)
 
             def Di(i):
-                return Ds[i](dAs[i], dbs[i], dcs[i], **kwargs)
+                return Ds[i](dAs[i], dbs[i], dcs[i], dPs[i], **kwargs)
             results = pool.map(Di, range(batch_size))
             pool.close()
             dxs = [r[0] for r in results]
@@ -131,29 +142,33 @@ def Di(i):
             dss = [r[2] for r in results]
             return dxs, dys, dss
 
-        def DT_batch(dxs, dys, dss, **kwargs):
+        def DT_batch(dxs, dys, dss, return_dP=False, **kwargs):
             pool = ThreadPool(processes=n_jobs_backward)
 
             def DTi(i):
-                return DTs[i](dxs[i], dys[i], dss[i], **kwargs)
+                return DTs[i](dxs[i], dys[i], dss[i], return_dP=return_dP, **kwargs)
             results = pool.map(DTi, range(batch_size))
             pool.close()
             dAs = [r[0] for r in results]
             dbs = [r[1] for r in results]
             dcs = [r[2] for r in results]
-            return dAs, dbs, dcs
+            if return_dP:
+                dPs = [r[3] for r in results]
+                return dAs, dbs, dcs, dPs
+            else:
+                return dAs, dbs, dcs
 
     return xs, ys, ss, D_batch, DT_batch
 
 
-def solve_only_wrapper(A, b, c, cone_dict, warm_start, kwargs):
+def solve_only_wrapper(A, b, c, P, cone_dict, warm_start, kwargs):
     """A wrapper around solve_only for the batch function"""
     return solve_only(
-        A, b, c, cone_dict, warm_start=warm_start, **kwargs)
+        A, b, c, cone_dict, warm_start=warm_start, P=P, **kwargs)
 
 
 def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1,
-                     warm_starts=None, **kwargs):
+                     warm_starts=None, Ps=None, **kwargs):
     """
     Solves a batch of cone programs. 
     Uses a ThreadPool to perform operations across
@@ -183,15 +198,15 @@ def solve_only_batch(As, bs, cs, cone_dicts, n_jobs_forward=-1,
         xs, ys, ss = [], [], []
         for i in range(batch_size):
             x, y, s = solve_only(As[i], bs[i], cs[i], cone_dicts[i],
-                                 warm_starts[i], **kwargs)
+                                 warm_starts[i], Ps[i], **kwargs)
             xs += [x]
             ys += [y]
             ss += [s]
     else:
         # thread pool
         pool = ThreadPool(processes=n_jobs_forward)
-        args = [(A, b, c, cone_dict, warm_start, kwargs) for A, b, c, cone_dict, warm_start in
-                zip(As, bs, cs, cone_dicts, warm_starts)]
+        args = [(A, b, c, P, cone_dict, warm_start, kwargs) for A, b, c, P, cone_dict, warm_start in
+                zip(As, bs, cs, Ps, cone_dicts, warm_starts)]
         with threadpool_limits(limits=1):
             results = pool.starmap(solve_only_wrapper, args)
         pool.close()
@@ -207,21 +222,21 @@ class SolverError(Exception):
 
 
 def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr',
-                         solve_method='SCS', **kwargs):
+                         solve_method='SCS', P=None, **kwargs):
     """Solves a cone program, returns its derivative as an abstract linear map.
 
     This function solves a convex cone program, with primal-dual problems
-        min.        c^T x                  min.        b^Ty
-        subject to  Ax + s = b             subject to  A^Ty + c = 0
+        min.        x^T P x + c^T x        min.        x^T P x + b^T y
+        subject to  Ax + s = b             subject to  Px + A^Ty + c = 0
                     s \in K                            y \in K^*
 
-    The problem data A, b, and c correspond to the arguments `A`, `b`, and `c`,
+    The problem data A, b, c, and P correspond to the arguments `A`, `b`, `c`, and `P`,
     and the convex cone `K` corresponds to `cone_dict`; x and s are the primal
     variables, and y is the dual variable.
 
     This function returns a solution (x, y, s) to the program. It also returns
     two functions that respectively represent application of the derivative
-    (at A, b, and c) and its adjoint.
+    (at A, b, c, and P) and its adjoint.
 
     The problem data must be formatted according to the SCS convention, see
     https://github.com/cvxgrp/scs.
@@ -248,8 +263,9 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr',
           exponential cones. See SCS documentation for more details.
       warm_start: (optional) A tuple (x, y, s) at which to warm-start SCS.
       mode: (optional) Which mode to compute derivative with, options are
-          ["dense", "lsqr", "lsmr"].
+          ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"].
       solve_method: (optional) Name of solver to use; SCS, ECOS, or Clarabel.
+      P: (optional) A sparse SciPy matrix in CSC format.
       kwargs: (optional) Keyword arguments to send to the solver.
 
     Returns:
@@ -257,22 +273,25 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr',
         y: Optimal value of the dual variable y.
         s: Optimal value of the slack variable s.
         derivative: A callable with signature
-                derivative(dA, db, dc) -> dx, dy, ds
-            that applies the derivative of the cone program at (A, b, and c)
-            to the perturbations `dA`, `db`, `dc`. `dA` must be a SciPy sparse
+                derivative(dA, db, dc, dP) -> dx, dy, ds
+            that applies the derivative of the cone program at (A, b, c, P)
+            to the perturbations `dA`, `db`, `dc`, `dP`. `dA` must be a SciPy sparse
             matrix in CSC format with the same sparsity pattern as `A`;
+            `dP` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `P`;
             `db` and `dc` are NumPy arrays.
         adjoint_derivative: A callable with signature
-                adjoint_derivative(dx, dy, ds) -> dA, db, dc
+                adjoint_derivative(dx, dy, ds, return_dP=False) -> dA, db, dc
+                adjoint_derivative(dx, dy, ds, return_dP=True) -> dA, db, dc, dP
             that applies the adjoint of the derivative of the cone program at
-            (A, b, and c) to the perturbations `dx`, `dy`, `ds`, which must be
+            (A, b, c, P) to the perturbations `dx`, `dy`, `ds`. `dx`, `dy`, `ds` must be
             NumPy arrays. The output `dA` matches the sparsity pattern of `A`.
+            If `return_dP` is True, the output `dP` matches the sparsity pattern of `P`.
     Raises:
         SolverError: if the cone program is infeasible or unbounded.
     """
     result = solve_and_derivative_internal(
         A, b, c, cone_dict, warm_start=warm_start, mode=mode,
-        solve_method=solve_method, **kwargs)
+        solve_method=solve_method, P=P, **kwargs)
     x = result["x"]
     y = result["y"]
     s = result["s"]
@@ -282,7 +301,7 @@ def solve_and_derivative(A, b, c, cone_dict, warm_start=None, mode='lsqr',
 
 
 def solve_only(A, b, c, cone_dict, warm_start=None,
-                solve_method='SCS', **kwargs):
+                solve_method='SCS', P=None, **kwargs):
     """
     Solves a cone program and returns its solution.
     
@@ -298,7 +317,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None,
     
     result = solve_internal(
         A, b, c, cone_dict, warm_start=warm_start,
-        solve_method=solve_method, **kwargs)
+        solve_method=solve_method, P=P, **kwargs)
     x = result["x"]
     y = result["y"]
     s = result["s"]
@@ -306,7 +325,7 @@ def solve_only(A, b, c, cone_dict, warm_start=None,
 
 
 def solve_internal(A, b, c, cone_dict, solve_method=None,
-        warm_start=None, raise_on_error=True, **kwargs):
+        warm_start=None, raise_on_error=True, P=None, **kwargs):
 
     if solve_method is None:
         psd_cone = ('s' in cone_dict) and (cone_dict['s'] != [])
@@ -335,6 +354,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                 del kwargs["eps"]
 
         data = {
+            "P": P,
             "A": A,
             "b": b,
             "c": c
@@ -349,7 +369,7 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
         result = scs.solve(data, cone_dict, **kwargs)
 
         status = result["info"]["status"]
-        inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)"}
+        inaccurate_status = {"Solved/Inaccurate", "solved (inaccurate - reached max_iters)", "solved (inaccurate - reached time_limit_secs)"}
         if status in inaccurate_status and "acceleration_lookback" not in kwargs:
             # anderson acceleration is sometimes unstable
             result = scs.solve(
@@ -367,14 +387,14 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                 return result
 
     elif solve_method == "ECOS":
+        if P is not None:
+            raise ValueError("ECOS does not support quadratic objectives.")
         if warm_start is not None:
             raise ValueError('ECOS does not support warmstart.')
         if ('s' in cone_dict) and (cone_dict['s'] != []):
             raise ValueError("PSD cone not supported by ECOS.")
         if ('ed' in cone_dict) and (cone_dict['ed'] != 0):
             raise NotImplementedError("Dual exponential cones not supported yet.")
-        if warm_start is not None:
-            raise ValueError("ECOS does not support warm starting.")
         len_eq = cone_dict[cone_lib.EQ_DIM]
         C_ecos = c
         G_ecos = A[len_eq:]
@@ -448,8 +468,11 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
                           'iter': solution['info']['iter'],
                           'pobj': solution['info']['pcost']}
     elif solve_method == "Clarabel":
-        # for now set P to 0
-        P = sparse.csc_matrix((c.size, c.size))
+        if warm_start is not None:
+            raise ValueError("Clarabel currently does not support warmstarting.")
+        
+        if P is None:
+            P = sparse.csc_matrix((c.size, c.size))
 
         cones = []
         if "z" in cone_dict:
@@ -510,10 +533,13 @@ def solve_internal(A, b, c, cone_dict, solve_method=None,
     return result
 
 def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
-        warm_start=None, mode='lsqr', raise_on_error=True, **kwargs):
-    if mode not in ["dense", "lsqr", "lsmr"]:
+        warm_start=None, mode='lsqr', raise_on_error=True, P=None, **kwargs):
+    if mode not in ["dense", "lsqr", "lsmr", "lpgd", "lpgd_right", "lpgd_left"]:
         raise ValueError("Unsupported mode {}; the supported modes are "
-                         "'dense', 'lsqr' and 'lsmr'".format(mode))
+                         "'dense', 'lsqr', 'lsmr', 'lpgd', 'lpgd_right' and 'lpgd_left'".format(mode))
+    if mode in ["dense", "lsqr", "lsmr"] and P is not None:
+        raise ValueError("Dense, lsqr, and lsmr modes currently do not support quadratic objectives. "\
+                         "Consider switching to 'lpgd' mode.")
     if np.isnan(A.data).any():
         raise RuntimeError("Found a NaN in A.")
 
@@ -529,8 +555,13 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
     # eliminate explicit zeros in A, we no longer need them
     A.eliminate_zeros()
 
+    if "derivative_kwargs" in kwargs:
+        derivative_kwargs = kwargs.pop("derivative_kwargs")
+    else:
+        derivative_kwargs = {}
+    solver_kwargs = kwargs
     result = solve_internal(A, b, c, cone_dict, solve_method=solve_method,
-        warm_start=warm_start, raise_on_error=raise_on_error, **kwargs)
+        warm_start=warm_start, raise_on_error=raise_on_error, P=P, **kwargs)
     x = result["x"]
     y = result["y"]
     s = result["s"]
@@ -538,92 +569,233 @@ def solve_and_derivative_internal(A, b, c, cone_dict, solve_method=None,
     # pre-compute quantities for the derivative
     m, n = A.shape
     N = m + n + 1
-    cones = cone_lib.parse_cone_dict(cone_dict)
-    cones_parsed = cone_lib.parse_cone_dict_cpp(cones)
-    z = (x, y - s, np.array([1]))
-    u, v, w = z
 
-    Q = sparse.bmat([
-        [None, A.T, np.expand_dims(c, - 1)],
-        [-A, None, np.expand_dims(b, -1)],
-        [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]
-    ])
-
-    D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True)
-    if mode == "dense":
-        Q_dense = Q.todense()
-        M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w)
-        MT = M.T
-    elif mode in ("lsqr", "lsmr"):
-        M = _diffcp.M_operator(Q, cones_parsed, u, v, w)
-        MT = M.transpose()
-
-    pi_z = pi(z, cones)
-
-    def derivative(dA, db, dc, **kwargs):
-        """Applies derivative at (A, b, c) to perturbations dA, db, dc
+    if "lpgd" not in mode:  # pre-compute quantities for the derivative
+        cones = cone_lib.parse_cone_dict(cone_dict)
+        cones_parsed = cone_lib.parse_cone_dict_cpp(cones)
+        z = (x, y - s, np.array([1]))
+        u, v, w = z
+
+        Q = sparse.bmat([
+            [None, A.T, np.expand_dims(c, - 1)],
+            [-A, None, np.expand_dims(b, -1)],
+            [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]
+        ])
+
+        D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True)
+        if mode == "dense":
+            Q_dense = Q.todense()
+            M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w)
+            MT = M.T
+        elif mode in ("lsqr", "lsmr"):
+            M = _diffcp.M_operator(Q, cones_parsed, u, v, w)
+            MT = M.transpose()
+
+        pi_z = pi(z, cones)
+
+    def derivative(dA, db, dc, dP=None, **kwargs):
+        """Applies derivative at (A, b, c) to perturbations dA, db, dc, dP
         Args:
             dA: SciPy sparse matrix in CSC format; must have same sparsity
                 pattern as the matrix `A` from the cone program
             db: NumPy array representing perturbation in `b`
             dc: NumPy array representing perturbation in `c`
+            dP: (optional) SciPy sparse matrix in CSC format; must have 
+                same sparsity pattern as `P` from the cone program
         Returns:
            NumPy arrays dx, dy, ds, the result of applying the derivative
            to the perturbations.
         """
-        dQ = sparse.bmat([
-            [None, dA.T, np.expand_dims(dc, - 1)],
-            [-dA, None, np.expand_dims(db, -1)],
-            [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]
-        ])
-        rhs = dQ @ pi_z
-        if np.allclose(rhs, 0):
-            dz = np.zeros(rhs.size)
-        elif mode == "dense":
-            dz = _diffcp._solve_derivative_dense(M, MT, rhs)
-        elif mode == "lsqr":
-            dz = _diffcp.lsqr(M, rhs).solution
-        elif mode == "lsmr":
-            M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec)
-            dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12)
-
-        du, dv, dw = np.split(dz, [n, n + m])
-        dx = du - x * dw
-        dy = D_proj_dual_cone.matvec(dv) - y * dw
-        ds = D_proj_dual_cone.matvec(dv) - dv - s * dw
-        return -dx, -dy, -ds
-
-    def adjoint_derivative(dx, dy, ds, **kwargs):
+        if P is None and dP is not None:
+            raise ValueError("P must be provided if dP is not None.")
+
+        if "lpgd" in mode:
+            dx, dy, ds = derivative_lpgd(dA, db, dc, dP=dP, **derivative_kwargs, **kwargs)
+            return dx, dy, ds
+        else:
+            dQ = sparse.bmat([
+                [None, dA.T, np.expand_dims(dc, - 1)],
+                [-dA, None, np.expand_dims(db, -1)],
+                [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]
+            ])
+            rhs = dQ @ pi_z
+            if np.allclose(rhs, 0):
+                dz = np.zeros(rhs.size)
+            elif mode == "dense":
+                dz = _diffcp._solve_derivative_dense(M, MT, rhs)
+            elif mode == "lsqr":
+                dz = _diffcp.lsqr(M, rhs).solution
+            elif mode == "lsmr":
+                M_sp = sparse.linalg.LinearOperator(dQ.shape, matvec=M.matvec, rmatvec=M.rmatvec)
+                dz, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(M_sp, rhs, maxiter=10*M_sp.shape[0], atol=1e-12, btol=1e-12)
+
+            du, dv, dw = np.split(dz, [n, n + m])
+            dx = du - x * dw
+            dy = D_proj_dual_cone.matvec(dv) - y * dw
+            ds = D_proj_dual_cone.matvec(dv) - dv - s * dw
+            return -dx, -dy, -ds
+
+    def adjoint_derivative(dx, dy, ds, return_dP=False, **kwargs):
         """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds
         Args:
             dx: NumPy array representing perturbation in `x`
             dy: NumPy array representing perturbation in `y`
             ds: NumPy array representing perturbation in `s`
+            return_dP: flag to return dP
         Returns:
             (`dA`, `db`, `dc`), the result of applying the adjoint to the
             perturbations; the sparsity pattern of `dA` matches that of `A`.
         """
 
-        dw = -(x @ dx + y @ dy + s @ ds)
-        dz = np.concatenate(
-            [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])])
-
-        if np.allclose(dz, 0):
-            r = np.zeros(dz.shape)
-        elif mode == "dense":
-            r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz)
-        elif mode == "lsqr":
-            r = _diffcp.lsqr(MT, dz).solution
-        elif mode == "lsmr":
-            MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec)
-            r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10)
-
-        values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols]
-        dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
-        db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m]
-        dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n]
-
-        return dA, db, dc
+        if "lpgd" in mode:
+            return adjoint_derivative_lpgd(dx, dy, ds, return_dP=return_dP, **derivative_kwargs, **kwargs)
+        else:
+            if return_dP:
+                raise ValueError("Modes 'dense', 'lsqr', 'lsmr' currently do not support "\
+                                 "differentiating with respect to P. Consider switching to 'lpgd' mode.")
+            dw = -(x @ dx + y @ dy + s @ ds)
+            dz = np.concatenate(
+                [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])])
+
+            if np.allclose(dz, 0):
+                r = np.zeros(dz.shape)
+            elif mode == "dense":
+                r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz)
+            elif mode == "lsqr":
+                r = _diffcp.lsqr(MT, dz).solution
+            elif mode == "lsmr":
+                MT_sp = sparse.linalg.LinearOperator(dz.shape*2, matvec=MT.matvec, rmatvec=MT.rmatvec)
+                r, istop, itn, normr, normar, norma, conda, normx = sparse.linalg.lsmr(MT_sp, dz, maxiter=10*MT_sp.shape[0], atol=1e-10, btol=1e-10)
+
+            values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols]
+            dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
+            db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m]
+            dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n]
+            return dA, db, dc
+    
+    def derivative_lpgd(dA, db, dc, tau, rho, dP=None):
+        """Computes standard finite difference derivative at (A, b, c) to perturbations dA, db, dc, dP
+        with optional regularization rho.
+        Args:
+            dA: SciPy sparse matrix in CSC format; must have same sparsity
+                pattern as the matrix `A` from the cone program
+            db: NumPy array representing perturbation in `b`
+            dc: NumPy array representing perturbation in `c`
+            tau: Perturbation strength parameter
+            rho: Regularization strength parameter
+            dP: (optional) SciPy sparse matrix in CSC format; must have 
+                same sparsity pattern as `P` from the cone program
+        Returns:
+           NumPy arrays dx, dy, ds, the result of applying the derivative
+           to the perturbations.
+        """
+
+        if tau is None or tau <= 0:
+            raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.")
+        if rho < 0:
+            raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.")
+        
+        if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
+            try:
+                x_right, y_right, s_right = compute_perturbed_solution(
+                    dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
+            except SolverError as e:
+                raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
+                                   "Consider decreasing tau or swiching to 'lpgd_left' mode.")
+
+        if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
+            try:
+                x_left, y_left, s_left = compute_perturbed_solution(
+                    dA, db, dc, dP, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
+            except SolverError as e:
+                raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
+                                   "Consider decreasing tau or swiching to 'lpgd_right' mode.")
+
+        if mode == "lpgd":
+            dx = (x_right - x_left) / (2 * tau)
+            dy = (y_right - y_left) / (2 * tau)
+            ds = (s_right - s_left) / (2 * tau)
+        elif mode == "lpgd_right":
+            dx = (x_right - x) / tau
+            dy = (y_right - y) / tau
+            ds = (s_right - s) / tau
+        elif mode == "lpgd_left":
+            dx = (x - x_left) / tau
+            dy = (y - y_left) / tau
+            ds = (s - s_left) / tau
+        else:
+            raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
+        return dx, dy, ds
+    
+    def adjoint_derivative_lpgd(dx, dy, ds, tau, rho, return_dP=False):
+        """Lagrangian Proximal Gradient Descent (LPGD) [arxiv.org/abs/2407.05920]
+        Computes informative replacements for adjoint derivatives given incoming gradients dx, dy, ds, 
+        can be regarded as an efficient adjoint finite difference method.
+        Tau controls the perturbation strength. In the limit tau -> 0, LPGD computes the exact adjoint derivative.
+        For finite tau, LPGD computes an informative replacement of the adjoint derivative, avoiding degeneracies.
+        Rho controls the regularization strength. If rho > 0, the perturbed problem is regularized.
+
+        Args:
+            dx: NumPy array representing perturbation in `x`
+            dy: NumPy array representing perturbation in `y`
+            ds: NumPy array representing perturbation in `s`
+            tau: Perturbation strength parameter
+            rho: Regularization strength parameter
+            return_dP: flag to return dP
+        Returns:
+            (`dA`, `db`, `dc`) if return_dP is False,
+            (`dA`, `db`, `dc`, `dP`) if return_dP is True,
+            the result of applying the LPGD to the perturbations; 
+            the sparsity pattern of `dA`, `dP` matches that of `A`, `P`.
+        """
+
+        if tau is None or tau <= 0:
+            raise ValueError(f"LPGD mode requires tau > 0, got tau={tau}.")
+        if rho < 0:
+            raise ValueError(f"LPGD mode requires rho >= 0, got rho={rho}.")
+
+        if mode in ["lpgd", "lpgd_right"]:  # Perturb the problem to the right
+            try:
+                x_right, y_right, _ = compute_adjoint_perturbed_solution(
+                    dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
+            except SolverError as e:
+                raise SolverError(f"Computation of right-perturbed problem failed: {e}. "\
+                                   "Consider decreasing tau or swiching to 'lpgd_left' mode.")
+
+        if mode in ["lpgd", "lpgd_left"]:  # Perturb the problem to the left
+            try:
+                x_left, y_left, _ = compute_adjoint_perturbed_solution(
+                    dx, dy, ds, -tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal)
+            except SolverError as e:
+                raise SolverError(f"Computation of left-perturbed problem failed: {e}. "\
+                                   "Consider decreasing tau or swiching to 'lpgd_right' mode.")
+
+        if mode == "lpgd":
+            dc = (x_right - x_left) / (2 * tau)
+            db = - (y_right - y_left) / (2 * tau)
+            dA_data = (y_right[rows] * x_right[cols] - y_left[rows] * x_left[cols]) / (2 * tau)
+            dP_data = (x_right[cols] * x_right[cols] - x_left[cols] * x_left[cols]) / (2 * tau)
+        elif mode == "lpgd_right":
+            dc = (x_right - x) / tau
+            db = - (y_right - y) / tau
+            dA_data = (y_right[rows] * x_right[cols] - y[rows] * x[cols]) / tau
+            dP_data = (x_right[cols] * x_right[cols] - x[cols] * x[cols]) / tau
+        elif mode == "lpgd_left":
+            dc = (x - x_left) / tau
+            db = - (y - y_left) / tau
+            dA_data = (y[rows] * x[cols] - y_left[rows] * x_left[cols]) / tau
+            dP_data = (x[cols] * x[cols] - x_left[cols] * x_left[cols]) / tau
+        else:
+            raise ValueError(f"Only modes 'lpgd', 'lpgd_left', 'lpgd_right' can be used, got {mode}.")
+        
+        dA = sparse.csc_matrix((dA_data, (rows, cols)), shape=A.shape)
+        
+        if return_dP:
+            dP = sparse.csc_matrix((dP_data, (cols, cols)), shape=P.shape)
+            return dA, db, dc, dP
+        else:
+            return dA, db, dc
+
 
     result["D"] = derivative
     result["DT"] = adjoint_derivative
diff --git a/diffcp/utils.py b/diffcp/utils.py
index 22dc61a..c927747 100644
--- a/diffcp/utils.py
+++ b/diffcp/utils.py
@@ -1,6 +1,7 @@
 import diffcp.cones as cone_lib
 import numpy as np
 from scipy import sparse
+from copy import deepcopy
 
 
 def scs_data_from_cvxpy_problem(problem):
@@ -36,3 +37,175 @@ def get_random_like(A, randomness):
     rows, cols = A.nonzero()
     values = randomness(A.nnz)
     return sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
+
+
+def regularize_P(P, rho, size):
+    """Regularizes the matrix P by adding rho * I to it."""
+    if rho > 0:
+        reg = rho * sparse.eye(size, format='csc')
+        if P is None:
+            P_reg = reg
+        else:
+            P_reg = P + reg
+    else:
+        P_reg = P
+    return P_reg
+
+
+def embed_problem(A, b, c, P, cone_dict):
+    """Embeds the problem into a larger problem that allows costs on the slack variables
+    A_emb = [A, I; 0, -I]
+    b_emb = [b; 0]
+    c_emb = [c; 0]
+    P_emb = [P, 0; 0, 0]
+    cone_dict_emb = cone_dict with z shifted by m
+    """
+    m = b.shape[0]         
+    A_emb = sparse.bmat([
+        [A, sparse.eye(m, format=A.format)],
+        [None, -sparse.eye(m, format=A.format)]
+    ]).tocsc()
+    b_emb = np.hstack([b, np.zeros(m)])
+    c_emb = np.hstack([c, np.zeros(m)])
+    if P is not None:
+        P_emb = sparse.bmat([
+            [P, None],
+            [None, np.zeros((m, m))]
+        ]).tocsc()
+    else:
+        P_emb = None
+    cone_dict_emb = deepcopy(cone_dict)
+    if 'z' in cone_dict_emb:
+        cone_dict_emb['z'] += m
+    else:
+        cone_dict_emb['z'] = m
+    return A_emb, b_emb, c_emb, P_emb, cone_dict_emb
+
+
+def compute_perturbed_solution(dA, db, dc, dP, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the perturbed solution x_right, y_right, s_right at (A, b, c) with 
+    perturbations dA, db, dc and optional regularization rho.
+    
+    Args:
+        dA: SciPy sparse matrix in CSC format; must have same sparsity
+            pattern as the matrix `A` from the cone program
+        db: NumPy array representing perturbation in `b`
+        dc: NumPy array representing perturbation in `c`
+        dP: (optional) SciPy sparse matrix in CSC format; must have same sparsity
+            pattern as the matrix `P` from the cone program
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # Perturb the problem
+    A_pert = A + tau * dA
+    b_pert = b + tau * db
+    c_pert = c + tau * dc
+    if dP is not None:
+        P_pert = P + tau * dP
+    else:
+        P_pert = P
+
+    # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+    P_pert_reg = regularize_P(P_pert, rho=rho, size=n)
+    c_pert_reg = c_pert - rho * x
+
+    # Set warm start
+    warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None
+
+    # Solve the perturbed problem
+    result_pert = solve_internal(A=A_pert, b=b_pert, c=c_pert_reg, P=P_pert_reg, cone_dict=cone_dict, 
+                                solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+    # Extract the solutions
+    x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    return x_pert, y_pert, s_pert
+
+
+def compute_adjoint_perturbed_solution(dx, dy, ds, tau, rho, A, b, c, P, cone_dict, x, y, s, solver_kwargs, solve_method, solve_internal):
+    """
+    Computes the adjoint perturbed solution x_right, y_right, s_right (Lagrangian Proximal Map)
+    by solving the following perturbed problem with perturbations dx, dy, ds 
+    and perturbation/regularization parameters tau/rho:
+    argmin.     <x,Px> + <c,x> + tau*(<dx,x> + <ds,s>) + rho/2 |x-x^*|^2
+    subject to  Ax + s = b-tau*dy
+                s \in K
+
+    For ds=0 we solve
+    argmin.     <x,(P+rho*I)x> + <c+tau*dx-rho*x^*, x>
+    subject to  Ax + s = b-tau*dy
+                s \in K
+    This is just a perturbed instance of the forward optimization problem.
+
+    For ds!=0 we rewrite the problem as an embedded problem.
+    We add a constraint x'=s, replace all appearances of s with x' and solve
+    argmin.     <[x, x'], [[P+rho*I,     0];  [x, x']> + <[c+tau*dx-rho*x^*, tau*ds-rho*s^*], [x, x']>
+                           [      0, rho*I]]
+    subject to  [[A,  I];  [x, x'] + [s'; s] = [b-tau*dy; 0]
+                 [0, -I]]
+                (s', s) \in (0 x K)
+    Note that we also add a regularizer on s in this case (rho/2 |s-s^*|^2).
+    
+    Args:
+        dx: NumPy array representing perturbation in `x`
+        dy: NumPy array representing perturbation in `y`
+        ds: NumPy array representing perturbation in `s`
+        tau: Perturbation strength parameter
+        rho: Regularization strength parameter
+    Returns:
+        x_pert: Perturbed solution of the primal variable x
+        y_pert: Perturbed solution of the dual variable y
+        s_pert: Perturbed solution of the slack variable s
+    """
+    m, n = A.shape
+
+    # The cases ds = 0 and ds != 0 are handled separately, see docstring
+    if np.isclose(np.sum(np.abs(ds)), 0):
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        c_pert = c + tau * dx
+        b_pert = b - tau * dy
+
+        # Regularize: Effectively adds a rho/2 |x-x^*|^2 term to the objective
+        P_reg = regularize_P(P, rho=rho, size=n)
+        c_pert_reg = c_pert - rho * x
+
+        # Set warm start
+        warm_start = (x, y, s) if solve_method not in ["ECOS", "Clarabel"] else None
+
+        # Solve the perturbed problem
+        # Note: In special case solve_method=='SCS' and rho==0, this could be sped up strongly by using solver.update
+        result_pert = solve_internal(A=A, b=b_pert, c=c_pert_reg, P=P_reg, cone_dict=cone_dict, 
+                                        solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert, y_pert, s_pert = result_pert["x"], result_pert["y"], result_pert["s"]
+    else:
+        # Embed problem (see docstring)
+        A_emb, b_emb, c_emb, P_emb, cone_dict_emb = embed_problem(A, b, c, P, cone_dict)
+
+        # Perturb problem (Note: perturb primal and dual linear terms in different directions)
+        b_emb_pert = b_emb - tau * np.hstack([dy, np.zeros(m)])
+        c_emb_pert = c_emb + tau * np.hstack([dx, ds])
+        
+        # Regularize: Effectively adds a rho/2 (|x-x^*|^2 + |s-s^*|^2) term to the objective
+        P_emb_reg = regularize_P(P_emb, rho=rho, size=n+m)
+        c_emb_pert_reg = c_emb_pert - rho * np.hstack([x, s])
+
+        # Set warm start
+        if solve_method in ["ECOS", "Clarabel"]:
+            warm_start = None
+        else:
+            warm_start = (np.hstack([x, s]), np.hstack([y, y]), np.hstack([s, s]))
+
+        # Solve the embedded problem
+        result_pert = solve_internal(A=A_emb, b=b_emb_pert, c=c_emb_pert_reg, P=P_emb_reg, cone_dict=cone_dict_emb, 
+                                    solve_method=solve_method, warm_start=warm_start, **solver_kwargs)
+        # Extract the solutions
+        x_pert = result_pert['x'][:n]
+        y_pert = result_pert['y'][:m]
+        s_pert = result_pert['x'][n:n+m]
+    return x_pert, y_pert, s_pert
diff --git a/examples/batch_clarabel_lpgd_quadratic.py b/examples/batch_clarabel_lpgd_quadratic.py
new file mode 100644
index 0000000..11654fb
--- /dev/null
+++ b/examples/batch_clarabel_lpgd_quadratic.py
@@ -0,0 +1,47 @@
+import diffcp
+import time
+import numpy as np
+import scipy.sparse as sparse
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 2
+
+As, bs, cs, Ks, Ps = [], [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    P = sparse.csc_matrix((c.size, c.size))
+    P = sparse.triu(P).tocsc()
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+    Ps += [P]
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc - tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 8):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1),
+                                                 Ps=Ps)
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="Clarabel", verbose=False,
+                                                                      mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1), Ps=Ps)
+
+    def f_backward():
+        DT_batch(xs, ys, ss, return_dP=True)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+          (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
diff --git a/examples/batch_ecos.py b/examples/batch_ecos.py
index 9eca81b..0684305 100644
--- a/examples/batch_ecos.py
+++ b/examples/batch_ecos.py
@@ -31,9 +31,9 @@ def time_function(f, N=1):
 for n_jobs in range(1, 8):
     def f_forward():
         return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
     xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
-                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solver="ECOS", verbose=False)
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False)
 
     def f_backward():
         DT_batch(xs, ys, ss, mode="lsqr")
diff --git a/examples/batch_ecos_lpgd.py b/examples/batch_ecos_lpgd.py
new file mode 100644
index 0000000..9f5b0a7
--- /dev/null
+++ b/examples/batch_ecos_lpgd.py
@@ -0,0 +1,46 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc - tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 8):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                 n_jobs_forward=n_jobs, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False, 
+                                                 mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                                                                      n_jobs_forward=1, n_jobs_backward=n_jobs, solve_method="ECOS", verbose=False,
+                                                                      mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.0))
+
+    def f_backward():
+        DT_batch(xs, ys, ss)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+          (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
diff --git a/examples/batch_lpgd.py b/examples/batch_lpgd.py
new file mode 100644
index 0000000..6992433
--- /dev/null
+++ b/examples/batch_lpgd.py
@@ -0,0 +1,42 @@
+import diffcp
+import utils
+import IPython as ipy
+import time
+import numpy as np
+
+m = 100
+n = 50
+
+batch_size = 16
+n_jobs = 1
+
+As, bs, cs, Ks = [], [], [], []
+for _ in range(batch_size):
+    A, b, c, K = diffcp.utils.least_squares_eq_scs_data(m, n)
+    As += [A]
+    bs += [b]
+    cs += [c]
+    Ks += [K]
+
+def time_function(f, N=1):
+    result = []
+    for i in range(N):
+        tic = time.time()
+        f()
+        toc = time.time()
+        result += [toc-tic]
+    return np.mean(result), np.std(result)
+
+for n_jobs in range(1, 5):
+    def f_forward():
+        return diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=n_jobs, n_jobs_backward=n_jobs)
+    xs, ys, ss, D_batch, DT_batch = diffcp.solve_and_derivative_batch(As, bs, cs, Ks,
+                n_jobs_forward=1, n_jobs_backward=n_jobs, mode='lpgd_left')
+    def f_backward():
+        DT_batch(xs, ys, ss, tau=0.1, rho=0.1)
+
+    mean_forward, std_forward = time_function(f_forward)
+    mean_backward, std_backward = time_function(f_backward)
+    print ("%03d | %4.4f +/- %2.2f | %4.4f +/- %2.2f" %
+            (n_jobs, mean_forward, std_forward, mean_backward, std_backward))
\ No newline at end of file
diff --git a/examples/clarabel_example_lpgd_quadratic.py b/examples/clarabel_example_lpgd_quadratic.py
new file mode 100644
index 0000000..613e2f7
--- /dev/null
+++ b/examples/clarabel_example_lpgd_quadratic.py
@@ -0,0 +1,40 @@
+import diffcp
+
+import numpy as np
+import utils
+from scipy import sparse
+
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+P = sparse.csc_matrix((c.size, c.size))
+P = sparse.triu(P).tocsc()
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, P=P, solve_method="Clarabel", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# Adjoint derivative
+dA, db, dc, dP = adjoint_derivative(dx=c, dy=np.zeros(m), ds=np.zeros(m), return_dP=True)
+
+# Derivative (dummy inputs)
+dx, dy, ds = derivative(dA=A, db=b, dc=c, dP=P)
diff --git a/examples/dual_example.py b/examples/dual_example.py
index ad0c816..3c0220d 100644
--- a/examples/dual_example.py
+++ b/examples/dual_example.py
@@ -6,10 +6,10 @@
 
 
 # We generate a random cone program with a cone
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }
diff --git a/examples/dual_example_lpgd.py b/examples/dual_example_lpgd.py
new file mode 100644
index 0000000..e0a802f
--- /dev/null
+++ b/examples/dual_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, eps=1e-10, mode="lpgd", derivative_kwargs=dict(tau=1e-3, rho=0.1))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)
diff --git a/examples/ecos_example.py b/examples/ecos_example.py
index 9a64182..9d1534b 100644
--- a/examples/ecos_example.py
+++ b/examples/ecos_example.py
@@ -6,10 +6,10 @@
 
 
 # We generate a random cone program with a cone
-# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
 # and a 5-d second order cone.
 K = {
-    'f': 3,
+    'z': 3,
     'l': 3,
     'q': [5]
 }
@@ -23,7 +23,7 @@
 
 # We solve the cone program and get the derivative and its adjoint
 x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
-    A, b, c, K, solver="ECOS", verbose=False)
+    A, b, c, K, solve_method="ECOS", verbose=False)
 
 print("x =", x)
 print("y =", y)
diff --git a/examples/ecos_example_lpgd.py b/examples/ecos_example_lpgd.py
new file mode 100644
index 0000000..bc8777a
--- /dev/null
+++ b/examples/ecos_example_lpgd.py
@@ -0,0 +1,39 @@
+import diffcp
+
+import numpy as np
+import utils
+np.set_printoptions(precision=5, suppress=True)
+
+
+# We generate a random cone program with a cone
+# defined as a product of a 3-d zero cone, 3-d positive orthant cone,
+# and a 5-d second order cone.
+K = {
+    'z': 3,
+    'l': 3,
+    'q': [5]
+}
+
+m = 3 + 3 + 5
+n = 5
+
+np.random.seed(0)
+
+A, b, c = utils.random_cone_prog(m, n, K)
+
+# We solve the cone program and get the derivative and its adjoint
+x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+    A, b, c, K, solve_method="ECOS", verbose=False, mode="lpgd", derivative_kwargs=dict(tau=0.1, rho=0.0))
+
+print("x =", x)
+print("y =", y)
+print("s =", s)
+
+# We evaluate the gradient of the objective with respect to A, b and c.
+dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m))
+
+# The gradient of the objective with respect to b should be
+# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
+# Boyd & Vandenberghe).
+print("db =", db)
+print("-y =", -y)
diff --git a/examples/sdp_lpgd.py b/examples/sdp_lpgd.py
new file mode 100644
index 0000000..e5cec85
--- /dev/null
+++ b/examples/sdp_lpgd.py
@@ -0,0 +1,67 @@
+import cvxpy as cp
+import diffcp
+import numpy as np
+
+import time
+
+
+def scs_data_from_cvxpy_problem(problem):
+    data = problem.get_problem_data(cp.SCS)[0]
+    cone_dims = cp.reductions.solvers.conic_solvers.scs_conif.dims_to_solver_dict(data[
+                                                                                  "dims"])
+    return data["A"], data["b"], data["c"], cone_dims
+
+
+def randn_symm(n):
+    A = np.random.randn(n, n)
+    return (A + A.T) / 2
+
+
+def randn_psd(n):
+    A = 1. / 10 * np.random.randn(n, n)
+    return A@A.T
+
+
+def main(n=3, p=3):
+    # Generate problem data
+    C = randn_psd(n)
+    As = [randn_symm(n) for _ in range(p)]
+    Bs = np.random.randn(p)
+
+    # Extract problem data using cvxpy
+    X = cp.Variable((n, n), PSD=True)
+    objective = cp.trace(C@X)
+    constraints = [cp.trace(As[i]@X) == Bs[i] for i in range(p)]
+    prob = cp.Problem(cp.Minimize(objective), constraints)
+    A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob)
+
+    # Print problem size
+    mn_plus_m_plus_n = A.size + b.size + c.size
+    n_plus_2n = c.size + 2 * b.size
+    entries_in_derivative = mn_plus_m_plus_n * n_plus_2n
+    print(f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)""")
+
+    # Compute solution and derivative maps
+    start = time.perf_counter()
+    x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
+        A, b, c, cone_dims, eps=1e-5, mode="lpgd")
+    end = time.perf_counter()
+    print("Compute solution and set up derivative: %.2f s." % (end - start))
+
+    # Derivative
+    lpgd_args = dict(tau=0.1, rho=0.1)
+    start = time.perf_counter()
+    dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm(
+        C), np.zeros(y.size), np.zeros(s.size), **lpgd_args)
+    end = time.perf_counter()
+    print("Evaluate derivative: %.2f s." % (end - start))
+
+    # Adjoint of derivative
+    start = time.perf_counter()
+    dx, dy, ds = derivative(A, b, c, **lpgd_args)
+    end = time.perf_counter()
+    print("Evaluate adjoint of derivative: %.2f s." % (end - start))
+
+if __name__ == '__main__':
+    np.random.seed(0)
+    main(50, 25)