From 4031dd0927c0d04f9c43bbc571b6e55a0209bc67 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Sat, 2 Nov 2019 11:29:43 -0700
Subject: [PATCH 01/19] WIP proposal for solve_and_derivative interface

---
 module/__init__.py  |  2 +-
 module/interface.py | 94 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/module/__init__.py b/module/__init__.py
index 497b3ab9..935f8b77 100644
--- a/module/__init__.py
+++ b/module/__init__.py
@@ -1,3 +1,3 @@
 from osqp.interface import OSQP
-from osqp.interface import solve
+from osqp.interface import solve, solve_and_derivative
 from osqp._osqp import constant
diff --git a/module/interface.py b/module/interface.py
index d551c928..544202ac 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -5,12 +5,13 @@
 from builtins import object
 import osqp._osqp as _osqp  # Internal low level module
 import numpy as np
+import scipy.sparse as spa
+import scipy.sparse.linalg as sla
 from platform import system
 import osqp.codegen as cg
 import osqp.utils as utils
 import sys
 
-
 class OSQP(object):
     def __init__(self):
         self._model = _osqp.OSQP()
@@ -276,16 +277,87 @@ def codegen(self, folder, project_type='', parameters='vectors',
 
 
 def solve(P=None, q=None, A=None, l=None, u=None, **settings):
-        """
-        Solve problem of the form 
+    """
+    Solve problem of the form
+
+    minimize     1/2 x' * P * x + q' * x
+    subject to   l <= A * x <= u
+
+    solver settings can be specified as additional keyword arguments.
+    This function disables the GIL because it internally performs
+    setup solve and cleanup.
+    """
+
+    unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
+    return _osqp.solve(*unpacked_data, **settings)
+
+
+def solve_and_derivative(P=None, q=None, A=None, l=None, u=None, **settings):
+    unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
+    result = _osqp.solve(*unpacked_data, **settings)
+
+    m, n = A.shape
+    x = result.x
+    y = result.y
+    z = A.dot(x)
+
+    def adjoint_derivative(dx=None, dy=None, dz=None,
+                           P_idx=None, A_idx=None, diff_mode='full'):
+        if A_idx is None:
+            A_idx = A.nonzero()
+
+        if P_idx is None:
+            P_idx = P.nonzero()
+
+        if dy is not None or dz is not None:
+            raise NotImplementedError
+
+        if diff_mode == 'active':
+            # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
+            # Guess which linear constraints are lower-active, upper-active, free
+            ind_low = np.where(z - l < - y)[0]
+            ind_upp = np.where(u - z < y)[0]
+            n_low = len(ind_low)
+            n_upp = len(ind_upp)
+
+            # Form A_red from the assumed active constraints
+            A_red = spa.vstack([A[ind_low], A[ind_upp]])
+
+            # Form KKT linear system
+            KKT = spa.vstack([spa.hstack([P, A_red.T]),
+                            spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])])
+            rhs = np.hstack([dx, np.zeros(n_low + n_upp)])
+
+            # Get solution
+            r_sol = sla.spsolve(KKT, rhs)
+            # r_sol = sla.lsqr(KKT, rhs)[0]
+
+            r_x =  r_sol[:n]
+            r_yl = r_sol[n:n + n_low]
+            r_yu = r_sol[n + n_low:]
+            r_y = np.zeros(m)
+            r_y[ind_low] = r_yl
+            r_y[ind_upp] = r_yu
+
+            dl = np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
+                for j in range(m)])
+            du = np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
+                for j in range(m)])
+        elif diff_mode == 'full':
+            # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
+            # but use slack variables too
+            raise NotImplementedError
+        else:
+            raise RuntimeError(f"Unrecognized differentiation mode: {diff_mode}")
 
-        minimize     1/2 x' * P * x + q' * x
-        subject to   l <= A * x <= u
+        # Extract derivatives
+        rows, cols = P_idx
+        dP = -.5 * (r_x[rows] * x[cols] + r_x[cols] * x[rows])
 
-        solver settings can be specified as additional keyword arguments. 
-        This function disables the GIL because it internally performs 
-        setup solve and cleanup.
-        """
+        rows, cols = A_idx
+        dA = -(y[rows] * r_x[cols] + r_y[rows] * x[cols])
+        dq = -r_x
+        return (dP, dq, dA, dl, du)
+
+    return result, adjoint_derivative
 
-        unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
-        return _osqp.solve(*unpacked_data, **settings)

From 6ad2632aefa6e5bc1932b5e59410dfda78492bb5 Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Sat, 2 Nov 2019 12:23:02 -0700
Subject: [PATCH 02/19] Replace f-string

---
 module/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module/interface.py b/module/interface.py
index 544202ac..4b837886 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -348,7 +348,7 @@ def adjoint_derivative(dx=None, dy=None, dz=None,
             # but use slack variables too
             raise NotImplementedError
         else:
-            raise RuntimeError(f"Unrecognized differentiation mode: {diff_mode}")
+            raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
 
         # Extract derivatives
         rows, cols = P_idx

From 8d426639d449b2984b71cbd03467387f6e3e2e37 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 8 Nov 2019 10:46:26 -0500
Subject: [PATCH 03/19] Added object-oriented derivatives interface

- Release GIL on OSQP object functions
- Removed OSQP one-shot solve function (useless now and repeats code)
- Added test for 'qr_active' differentiation mode
- Dropped Python 2 support (next release will not support it)
---
 .travis.yml                                   |  14 -
 appveyor.yml                                  |   1 -
 extension/include/osqpmodulemethods.h         | 298 +-----------------
 extension/include/osqpobjectpy.h              |  11 +-
 extension/src/osqpmodule.c                    |  32 +-
 module/__init__.py                            |   1 -
 .../codegen/files_to_generate/emosqpmodule.c  |  49 +--
 module/interface.py                           |  58 ++--
 module/tests/basic_test.py                    |  17 +-
 module/tests/derivative_test.py               |  65 ++++
 module/tests/multithread_test.py              |  66 ++++
 osqp_sources                                  |   2 +-
 requirements.txt                              |   1 -
 setup.py                                      |  10 +-
 14 files changed, 200 insertions(+), 425 deletions(-)
 create mode 100644 module/tests/derivative_test.py
 create mode 100644 module/tests/multithread_test.py

diff --git a/.travis.yml b/.travis.yml
index be040dbd..309f2a51 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,9 +21,6 @@ env:
 
 matrix:
     include:
-      - os: linux
-        env:
-          - PYTHON_VERSION=2.7
       - os: linux
         env:
           - PYTHON_VERSION=3.5
@@ -33,9 +30,6 @@ matrix:
       - os: linux
         env:
           - PYTHON_VERSION=3.7
-      - os: osx
-        env:
-          - PYTHON_VERSION=2.7
       - os: osx
         env:
           - PYTHON_VERSION=3.5
@@ -45,14 +39,6 @@ matrix:
       - os: osx
         env:
           - PYTHON_VERSION=3.7
-    allow_failures:
-      - os: linux
-        env:
-          - PYTHON_VERSION=2.7
-      - os: osx
-        env:
-          - PYTHON_VERSION=2.7
-
 
 install:
   - if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
diff --git a/appveyor.yml b/appveyor.yml
index a84c6a12..39e7f8fc 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -10,7 +10,6 @@ environment:
   PYPI_PASSWORD:
     secure: 3gQtEWf4jXJLJKP8oM22oCShTJ3VI+BtRYczfzO3RSQ=
   matrix:
-    - PYTHON_VERSION: 2.7
     - PYTHON_VERSION: 3.5
     - PYTHON_VERSION: 3.6
     - PYTHON_VERSION: 3.7
diff --git a/extension/include/osqpmodulemethods.h b/extension/include/osqpmodulemethods.h
index 219fb43b..8c4ed150 100644
--- a/extension/include/osqpmodulemethods.h
+++ b/extension/include/osqpmodulemethods.h
@@ -5,297 +5,6 @@
  * OSQP methods independently from any object                          *
  ***********************************************************************/
 
-static PyObject * OSQP_module_solve(OSQP *self, PyObject *args, PyObject *kwargs) {
-  c_int n, m;  // Problem dimensions
-  c_int exitflag_setup, exitflag_solve;
-
-  // Variables for setup
-  PyOSQPData *pydata;
-  OSQPData * data;
-  OSQPSettings * settings;
-  OSQPWorkspace * workspace;  // Pointer to C workspace structure
-  PyArrayObject *Px, *Pi, *Pp, *q, *Ax, *Ai, *Ap, *l, *u;
-
-
-  // Variables for solve
-  // Create status object
-  PyObject * status;
-  // Create obj_val object
-  PyObject * obj_val;
-  // Create solution objects
-  PyObject * x, *y, *prim_inf_cert, *dual_inf_cert;
-  // Define info related variables
-  static char *argparse_string_info;
-  PyObject *info_list;
-  PyObject *info;
-
-  // Results
-  PyObject *results_list;
-  PyObject *results;
-
-  npy_intp nd[1];
-  npy_intp md[1];
-
-  static char *kwlist[] = {"dims",                     // nvars and ncons
-			   "Px", "Pi", "Pp", "q",      // Cost function
-			   "Ax", "Ai", "Ap", "l", "u", // Constraints
-			   "scaling",
-			   "adaptive_rho", "adaptive_rho_interval",
-			   "adaptive_rho_tolerance", "adaptive_rho_fraction",
-			   "rho", "sigma", "max_iter", "eps_abs", "eps_rel",
-			   "eps_prim_inf", "eps_dual_inf", "alpha", "delta",
-			   "linsys_solver", "polish",
-			   "polish_refine_iter", "verbose",
-			   "scaled_termination",
-			   "check_termination", "warm_start",
-			   "time_limit", NULL};        // Settings
-
-#ifdef DLONG
-
-  // NB: linsys_solver is enum type which is stored as int (regardless on how c_int is defined).
-#ifdef DFLOAT
-  static char * argparse_string_setup = "(LL)O!O!O!O!O!O!O!O!O!|LLLffffLffffffiLLLLLLf";
-#else
-  static char * argparse_string_setup = "(LL)O!O!O!O!O!O!O!O!O!|LLLddddLddddddiLLLLLLd";
-#endif
-
-#else
-
-#ifdef DFLOAT
-  static char * argparse_string_setup = "(ii)O!O!O!O!O!O!O!O!O!|iiiffffiffffffiiiiiiif";
-#else
-  static char * argparse_string_setup = "(ii)O!O!O!O!O!O!O!O!O!|iiiddddiddddddiiiiiiid";
-#endif
-
-#endif
-
-  // Initialize settings
-  settings = (OSQPSettings *)c_malloc(sizeof(OSQPSettings));
-  osqp_set_default_settings(settings);
-
-  if( !PyArg_ParseTupleAndKeywords(args, kwargs, argparse_string_setup, kwlist,
-				   &n, &m,
-				   &PyArray_Type, &Px,
-				   &PyArray_Type, &Pi,
-				   &PyArray_Type, &Pp,
-				   &PyArray_Type, &q,
-				   &PyArray_Type, &Ax,
-				   &PyArray_Type, &Ai,
-				   &PyArray_Type, &Ap,
-				   &PyArray_Type, &l,
-				   &PyArray_Type, &u,
-				   &settings->scaling,
-				   &settings->adaptive_rho,
-				   &settings->adaptive_rho_interval,
-				   &settings->adaptive_rho_tolerance,
-				   &settings->adaptive_rho_fraction,
-				   &settings->rho,
-				   &settings->sigma,
-				   &settings->max_iter,
-				   &settings->eps_abs,
-				   &settings->eps_rel,
-				   &settings->eps_prim_inf,
-				   &settings->eps_dual_inf,
-				   &settings->alpha,
-				   &settings->delta,
-				   &settings->linsys_solver,
-				   &settings->polish,
-				   &settings->polish_refine_iter,
-				   &settings->verbose,
-				   &settings->scaled_termination,
-				   &settings->check_termination,
-				   &settings->warm_start,
-				   &settings->time_limit)) {
-    return (PyObject *) NULL;
-  }
-
-  // Create Data from parsed vectors
-  pydata = create_pydata(n, m, Px, Pi, Pp, q, Ax, Ai, Ap, l, u);
-  data = create_data(pydata);
-
-  // Perform setup and solve
-  // release the GIL
-  Py_BEGIN_ALLOW_THREADS;
-  // Create Workspace object
-  exitflag_setup = osqp_setup(&(workspace), data, settings);
-  exitflag_solve = osqp_solve(workspace);
-  // reacquire the GIL
-  Py_END_ALLOW_THREADS;
-  
-  // Cleanup data and settings
-  free_data(data, pydata);
-  c_free(settings);
-
-  // Check successful setup and solve
-  if (exitflag_setup){ // Workspace allocation error
-    PyErr_SetString(PyExc_ValueError, "Workspace allocation error!");
-    return (PyObject *) NULL;
-  }
-
-  if(exitflag_solve){
-      PyErr_SetString(PyExc_ValueError, "OSQP solve error!");
-      return (PyObject *) NULL;
-  }
-
-  // Temporary solution
-  nd[0] = (npy_intp)workspace->data->n;  // Dimensions in R^n
-  md[0] = (npy_intp)workspace->data->m;  // Dimensions in R^m
-
-  // If problem is not primal or dual infeasible store it
-  if ((workspace->info->status_val != OSQP_PRIMAL_INFEASIBLE) &&
-      (workspace->info->status_val != OSQP_PRIMAL_INFEASIBLE_INACCURATE) &&
-      (workspace->info->status_val != OSQP_DUAL_INFEASIBLE) &&
-      (workspace->info->status_val != OSQP_DUAL_INFEASIBLE_INACCURATE)){
-
-    // Primal and dual solutions
-    x = (PyObject *)PyArrayFromCArray(workspace->solution->x, nd);
-    y = (PyObject *)PyArrayFromCArray(workspace->solution->y, md);
-
-    // Infeasibility certificates -> None values
-    prim_inf_cert = PyArray_EMPTY(1, nd, NPY_OBJECT, 0);
-    dual_inf_cert = PyArray_EMPTY(1, md, NPY_OBJECT, 0);
-
-  } else if (workspace->info->status_val == OSQP_PRIMAL_INFEASIBLE ||
-	     workspace->info->status_val == OSQP_PRIMAL_INFEASIBLE_INACCURATE) {
-    // primal infeasible
-
-    // Primal and dual solution arrays -> None values
-    x = PyArray_EMPTY(1, nd, NPY_OBJECT, 0);
-    y = PyArray_EMPTY(1, md, NPY_OBJECT, 0);
-
-    // Primal infeasibility certificate
-    prim_inf_cert = (PyObject *)PyArrayFromCArray(workspace->delta_y, md);
-
-    // Dual infeasibility certificate -> None values
-    dual_inf_cert = PyArray_EMPTY(1, nd, NPY_OBJECT, 0);
-
-    // Set objective value to infinity
-    workspace->info->obj_val = NPY_INFINITY;
-
-  } else {
-    // dual infeasible
-
-    // Primal and dual solution arrays -> None values
-    x = PyArray_EMPTY(1, nd, NPY_OBJECT, 0);
-    y = PyArray_EMPTY(1, md, NPY_OBJECT, 0);
-
-    // Primal infeasibility certificate -> None values
-    prim_inf_cert = PyArray_EMPTY(1, md, NPY_OBJECT, 0);
-
-    // Dual infeasibility certificate
-    dual_inf_cert = (PyObject *)PyArrayFromCArray(workspace->delta_x, nd);
-
-    // Set objective value to -infinity
-    workspace->info->obj_val = -NPY_INFINITY;
-  }
-
-  /*  CREATE INFO OBJECT */
-  // Store status string
-  status = PyUnicode_FromString(workspace->info->status);
-
-  // Store obj_val
-  if (workspace->info->status_val == OSQP_NON_CVX) {	// non convex
-    obj_val = PyFloat_FromDouble(Py_NAN);
-  } else {
-    obj_val = PyFloat_FromDouble(workspace->info->obj_val);
-  }
-
-#ifdef PROFILING
-
-#ifdef DLONG
-
-#ifdef DFLOAT
-  argparse_string_info = "LOLLOfffffffLf";
-#else
-  argparse_string_info = "LOLLOdddddddLd";
-#endif
-
-#else
-
-#ifdef DFLOAT
-  argparse_string_info = "iOiiOfffffffif";
-#else
-  argparse_string_info = "iOiiOdddddddid";
-#endif
-
-#endif /* DLONG */
-
-  info_list = Py_BuildValue(argparse_string_info,
-			    workspace->info->iter,
-			    status,
-			    workspace->info->status_val,
-			    workspace->info->status_polish,
-			    obj_val,
-			    workspace->info->pri_res,
-			    workspace->info->dua_res,
-			    workspace->info->setup_time,
-			    workspace->info->solve_time,
-			    workspace->info->update_time,
-			    workspace->info->polish_time,
-			    workspace->info->run_time,
-			    workspace->info->rho_updates,
-			    workspace->info->rho_estimate
-			    );
-#else /* PROFILING */
-
-#ifdef DLONG
-
-#ifdef DFLOAT
-  argparse_string = "LOLLOffLf";
-#else
-  argparse_string = "LOLLOddLd";
-#endif
-
-#else
-
-#ifdef DFLOAT
-  argparse_string = "iOiiOffif";
-#else
-  argparse_string = "iOiiOddid";
-#endif
-
-#endif /* DLONG */
-
-  info_list = Py_BuildValue(argparse_string_info,
-			    workspace->info->iter,
-			    status,
-			    workspace->info->status_val,
-			    workspace->info->status_polish,
-			    obj_val,
-			    workspace->info->pri_res,
-			    workspace->info->dua_res,
-			    workspace->info->rho_updates,
-			    workspace->info->rho_estimate,
-			    );
-#endif /* PROFILING */
-
-  info = PyObject_CallObject((PyObject *) &OSQP_info_Type, info_list);
-
-  /* Release the info argument list. */
-  Py_DECREF(info_list);
-
-  /*  CREATE RESULTS OBJECT */
-  results_list = Py_BuildValue("OOOOO", x, y, prim_inf_cert, dual_inf_cert, info);
-
-  /* Call the class object. */
-  results = PyObject_CallObject((PyObject *) &OSQP_results_Type, results_list);
-
-  // Delete results list
-  Py_DECREF(results_list);
-
-  // Cleanup workspace
-  if (osqp_cleanup(workspace)) {
-    PyErr_SetString(PyExc_ValueError, "Workspace deallocation error!");
-    return (PyObject *) NULL;
-  }
-
-  // Return results    
-  return results;
-
-
-}
-
-
 static PyObject *OSQP_constant(OSQP *self, PyObject *args) {
 
     char * constant_name;  // String less than 32 chars
@@ -380,10 +89,9 @@ static PyObject *OSQP_constant(OSQP *self, PyObject *args) {
 
 
 static PyMethodDef OSQP_module_methods[] = {
-					    {"solve", (PyCFunction)OSQP_module_solve,METH_VARARGS|METH_KEYWORDS, PyDoc_STR("Setup solve and cleanup OSQP problem. This function releases GIL.")},
-					        {"constant", (PyCFunction)OSQP_constant, METH_VARARGS, PyDoc_STR("Return internal OSQP constant")},
-					    {NULL, NULL}		/* sentinel */
+	{"constant", (PyCFunction)OSQP_constant, METH_VARARGS, PyDoc_STR("Return internal OSQP constant")},
+	{NULL, NULL}		/* sentinel */
 };
 
-  
+
 #endif
diff --git a/extension/include/osqpobjectpy.h b/extension/include/osqpobjectpy.h
index 9b01643e..7f80b915 100644
--- a/extension/include/osqpobjectpy.h
+++ b/extension/include/osqpobjectpy.h
@@ -36,7 +36,7 @@ static c_int OSQP_dealloc(OSQP *self) {
 // Solve Optimization Problem
 static PyObject * OSQP_solve(OSQP *self) {
     c_int exitflag;
-    
+
     // Create status object
     PyObject * status;
 
@@ -71,7 +71,12 @@ static PyObject * OSQP_solve(OSQP *self) {
     /**
      *  Solve QP Problem
      */
+
+    // Release the GIL
+    Py_BEGIN_ALLOW_THREADS;
     exitflag = osqp_solve(self->workspace);
+    Py_END_ALLOW_THREADS;
+
     if(exitflag){
         PyErr_SetString(PyExc_ValueError, "OSQP solve error!");
         return (PyObject *) NULL;
@@ -317,7 +322,11 @@ static PyObject * OSQP_setup(OSQP *self, PyObject *args, PyObject *kwargs) {
     data = create_data(pydata);
 
     // Create Workspace object
+    // Release the GIL
+    Py_BEGIN_ALLOW_THREADS;
     exitflag = osqp_setup(&(self->workspace), data, settings);
+    Py_END_ALLOW_THREADS;
+
 
     // Cleanup data and settings
     free_data(data, pydata);
diff --git a/extension/src/osqpmodule.c b/extension/src/osqpmodule.c
index 24d4da20..b3b36870 100644
--- a/extension/src/osqpmodule.c
+++ b/extension/src/osqpmodule.c
@@ -9,10 +9,10 @@
 
 /* The PyInt variable is a PyLong in Python3.x.
  */
-#if PY_MAJOR_VERSION >= 3
-#define PyInt_AsLong PyLong_AsLong
-#define PyInt_Check PyLong_Check
-#endif
+// #if PY_MAJOR_VERSION >= 3
+// #define PyInt_AsLong PyLong_AsLong
+// #define PyInt_Check PyLong_Check
+// #endif
 
 
 // OSQP Object type
@@ -36,8 +36,7 @@ static PyTypeObject OSQP_Type;
  * Interface Methods    *
  ************************/
 
- /* Module initialization for Python 3*/
- #if PY_MAJOR_VERSION >= 3
+ /* Module initialization*/
  static struct PyModuleDef moduledef = {
      PyModuleDef_HEAD_INIT, "_osqp",       /* m_name */
      NULL,                                 /* m_doc */
@@ -48,20 +47,14 @@ static PyTypeObject OSQP_Type;
      NULL,                                 /* m_clear */
      NULL,                                 /* m_free */
  };
- #endif
-
 
 
 static PyObject * moduleinit(void){
 
     PyObject *m;
 
-    // Initialize module (no methods. all inside OSQP object)
-#if PY_MAJOR_VERSION >= 3
+    // Initialize module
     m = PyModule_Create(&moduledef);
-#else
-    m = Py_InitModule3("_osqp", OSQP_module_methods, NULL);
-#endif
 
     if (m == NULL) return NULL;
 
@@ -86,20 +79,7 @@ static PyObject * moduleinit(void){
 
 
 // Init OSQP Internal module
-#if PY_MAJOR_VERSION >= 3
 PyMODINIT_FUNC PyInit__osqp(void) {
-#else
-PyMODINIT_FUNC init_osqp(void) {
-#endif
-
     import_array(); /* for numpy arrays */
-
-    // Module initialization is not a global variable in
-    // Python 3
-#if PY_MAJOR_VERSION >= 3
     return moduleinit();
-#else
-    moduleinit();
-#endif
-
 }
diff --git a/module/__init__.py b/module/__init__.py
index 935f8b77..f216e4ce 100644
--- a/module/__init__.py
+++ b/module/__init__.py
@@ -1,3 +1,2 @@
 from osqp.interface import OSQP
-from osqp.interface import solve, solve_and_derivative
 from osqp._osqp import constant
diff --git a/module/codegen/files_to_generate/emosqpmodule.c b/module/codegen/files_to_generate/emosqpmodule.c
index 645ae077..7859154b 100644
--- a/module/codegen/files_to_generate/emosqpmodule.c
+++ b/module/codegen/files_to_generate/emosqpmodule.c
@@ -126,10 +126,10 @@ c_float toc(PyTimer* t) {
 
 /* The PyInt variable is a PyLong in Python3.x.
  */
-#if PY_MAJOR_VERSION >= 3
-#define PyInt_AsLong PyLong_AsLong
-#define PyInt_Check PyLong_Check
-#endif
+// #if PY_MAJOR_VERSION >= 3
+// #define PyInt_AsLong PyLong_AsLong
+// #define PyInt_Check PyLong_Check
+// #endif
 
 
 // Get float type from OSQP setup
@@ -664,23 +664,22 @@ static PyObject * OSQP_update_P_A(PyObject *self, PyObject *args) {
 
 
 static PyMethodDef PYTHON_EXT_NAME_methods[] = {
-    {"solve", (PyCFunction)OSQP_solve, METH_NOARGS, "Solve QP"},
-    {"update_lin_cost", (PyCFunction)OSQP_update_lin_cost, METH_VARARGS, "Update linear cost"},
-    {"update_lower_bound", (PyCFunction)OSQP_update_lower_bound, METH_VARARGS, "Update lower bound"},
-    {"update_upper_bound", (PyCFunction)OSQP_update_upper_bound, METH_VARARGS, "Update upper bound"},
-		{"update_bounds", (PyCFunction)OSQP_update_bounds, METH_VARARGS, "Update bounds"},
-		#if EMBEDDED != 1
-		{"update_P", (PyCFunction)OSQP_update_P, METH_VARARGS, "Update matrix P"},
-		{"update_A", (PyCFunction)OSQP_update_A, METH_VARARGS, "Update matrix A"},
-		{"update_P_A", (PyCFunction)OSQP_update_P_A, METH_VARARGS, "Update matrices P and A"},
-		#endif
-		{NULL, NULL, 0, NULL}
+	{"solve", (PyCFunction)OSQP_solve, METH_NOARGS, "Solve QP"},
+	{"update_lin_cost", (PyCFunction)OSQP_update_lin_cost, METH_VARARGS, "Update linear cost"},
+	{"update_lower_bound", (PyCFunction)OSQP_update_lower_bound, METH_VARARGS, "Update lower bound"},
+	{"update_upper_bound", (PyCFunction)OSQP_update_upper_bound, METH_VARARGS, "Update upper bound"},
+	{"update_bounds", (PyCFunction)OSQP_update_bounds, METH_VARARGS, "Update bounds"},
+#if EMBEDDED != 1
+	{"update_P", (PyCFunction)OSQP_update_P, METH_VARARGS, "Update matrix P"},
+	{"update_A", (PyCFunction)OSQP_update_A, METH_VARARGS, "Update matrix A"},
+	{"update_P_A", (PyCFunction)OSQP_update_P_A, METH_VARARGS, "Update matrices P and A"},
+#endif
+	{NULL, NULL, 0, NULL}
 };
 
 
 
 /* Module initialization for Python 3*/
- #if PY_MAJOR_VERSION >= 3
 static struct PyModuleDef moduledef = {
     PyModuleDef_HEAD_INIT, "PYTHON_EXT_NAME",    /* m_name */
     "Embedded OSQP solver",             /* m_doc */
@@ -691,7 +690,6 @@ static struct PyModuleDef moduledef = {
     NULL,                               /* m_clear */
     NULL,                               /* m_free */
 };
- #endif
 
 
 
@@ -699,12 +697,9 @@ static PyObject * moduleinit(void){
 
 		PyObject *m;
 
-    // Initialize module (no methods. all inside OSQP object)
-    #if PY_MAJOR_VERSION >= 3
+    // Initialize module
     m = PyModule_Create(&moduledef);
-    #else
-    m = Py_InitModule3("PYTHON_EXT_NAME", PYTHON_EXT_NAME_methods, "Embedded OSQP solver");
-    #endif
+
     if (m == NULL)
     		return NULL;
 
@@ -715,19 +710,9 @@ static PyObject * moduleinit(void){
 
 
 // Init Osqp Internal module
-#if PY_MAJOR_VERSION >= 3
 PyMODINIT_FUNC PyInit_PYTHON_EXT_NAME(void)
-#else
-PyMODINIT_FUNC initPYTHON_EXT_NAME(void)
-#endif
 {
     import_array(); /* for numpy arrays */
 
-    // Module initialization is not a global variable in
-    // Python 3
-    #if PY_MAJOR_VERSION >= 3
     return moduleinit();
-    #else
-    moduleinit();
-    #endif
 }
diff --git a/module/interface.py b/module/interface.py
index 4b837886..465d11db 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -12,6 +12,7 @@
 import osqp.utils as utils
 import sys
 
+
 class OSQP(object):
     def __init__(self):
         self._model = _osqp.OSQP()
@@ -28,6 +29,8 @@ def setup(self, P=None, q=None, A=None, l=None, u=None, **settings):
 
         solver settings can be specified as additional keyword arguments
         """
+        # TODO(bart): this will be unnecessary when the derivative will be in C
+        self._derivative_cache = {'P': P, 'q': q, 'A': A, 'l': l, 'u': u}
 
         unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
         self._model.setup(*unpacked_data, **settings)
@@ -199,7 +202,12 @@ def solve(self):
         Solve QP Problem
         """
         # Solve QP
-        return self._model.solve()
+        results = self._model.solve()
+
+        # TODO(bart): this will be unnecessary when the derivative will be in C
+        self._derivative_cache['results'] = results
+
+        return results
 
     def warm_start(self, x=None, y=None):
         """
@@ -275,34 +283,23 @@ def codegen(self, folder, project_type='', parameters='vectors',
         cg.codegen(work, folder, python_ext_name, project_type,
                    embedded, force_rewrite, float_flag, long_flag)
 
+    def adjoint_derivative(self, dx=None, dy=None, dz=None,
+            P_idx=None, A_idx=None, diff_mode='full'):
 
-def solve(P=None, q=None, A=None, l=None, u=None, **settings):
-    """
-    Solve problem of the form
-
-    minimize     1/2 x' * P * x + q' * x
-    subject to   l <= A * x <= u
-
-    solver settings can be specified as additional keyword arguments.
-    This function disables the GIL because it internally performs
-    setup solve and cleanup.
-    """
-
-    unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
-    return _osqp.solve(*unpacked_data, **settings)
+        P, q = self._derivative_cache['P'], self._derivative_cache['q']
+        A = self._derivative_cache['A']
+        l, u = self._derivative_cache['l'], self._derivative_cache['u']
 
+        results = self._derivative_cache['results']
 
-def solve_and_derivative(P=None, q=None, A=None, l=None, u=None, **settings):
-    unpacked_data, settings = utils.prepare_data(P, q, A, l, u, **settings)
-    result = _osqp.solve(*unpacked_data, **settings)
+        if results.info.status != "solved":
+            raise ValueError("Problem has not been solved to optimality. You cannot take derivatives")
 
-    m, n = A.shape
-    x = result.x
-    y = result.y
-    z = A.dot(x)
+        m, n = A.shape
+        x = results.x
+        y = results.y
+        z = A.dot(x)
 
-    def adjoint_derivative(dx=None, dy=None, dz=None,
-                           P_idx=None, A_idx=None, diff_mode='full'):
         if A_idx is None:
             A_idx = A.nonzero()
 
@@ -312,7 +309,7 @@ def adjoint_derivative(dx=None, dy=None, dz=None,
         if dy is not None or dz is not None:
             raise NotImplementedError
 
-        if diff_mode == 'active':
+        if diff_mode == 'qr_active':
             # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
             # Guess which linear constraints are lower-active, upper-active, free
             ind_low = np.where(z - l < - y)[0]
@@ -325,7 +322,7 @@ def adjoint_derivative(dx=None, dy=None, dz=None,
 
             # Form KKT linear system
             KKT = spa.vstack([spa.hstack([P, A_red.T]),
-                            spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])])
+                            spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])], format='csc')
             rhs = np.hstack([dx, np.zeros(n_low + n_upp)])
 
             # Get solution
@@ -343,7 +340,11 @@ def adjoint_derivative(dx=None, dy=None, dz=None,
                 for j in range(m)])
             du = np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
                 for j in range(m)])
-        elif diff_mode == 'full':
+        elif diff_mode == 'qr':
+            # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
+            # but use slack variables too
+            raise NotImplementedError
+        elif diff_mode == 'lsqr':
             # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
             # but use slack variables too
             raise NotImplementedError
@@ -358,6 +359,3 @@ def adjoint_derivative(dx=None, dy=None, dz=None,
         dA = -(y[rows] * r_x[cols] + r_y[rows] * x[cols])
         dq = -r_x
         return (dP, dq, dA, dl, du)
-
-    return result, adjoint_derivative
-
diff --git a/module/tests/basic_test.py b/module/tests/basic_test.py
index dd526fac..91737992 100644
--- a/module/tests/basic_test.py
+++ b/module/tests/basic_test.py
@@ -155,20 +155,5 @@ def test_upper_triangular_P(self):
         # Assert equal
         nptest.assert_array_almost_equal(res_default.x, res_triu.x)
         nptest.assert_array_almost_equal(res_default.y, res_triu.y)
-        nptest.assert_array_almost_equal(res_default.info.obj_val, 
+        nptest.assert_array_almost_equal(res_default.info.obj_val,
                                          res_triu.info.obj_val)
-
-
-    def test_solve_full_vs_object(self):
-
-        # Solve problem with object
-        res = self.model.solve()
-
-        # Solve problem with module function
-        res_module = osqp.solve(P=self.P, q=self.q, A=self.A, l=self.l, u=self.u,
-                                **self.opts)
-
-        # Assert close
-        nptest.assert_array_almost_equal(res.x, res_module.x)
-        nptest.assert_array_almost_equal(res.y, res_module.y)
-        nptest.assert_array_almost_equal(res.info.obj_val, res_module.info.obj_val)
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
new file mode 100644
index 00000000..012b5333
--- /dev/null
+++ b/module/tests/derivative_test.py
@@ -0,0 +1,65 @@
+# Test osqp python module
+import osqp
+# import osqppurepy as osqp
+import numpy as np
+import numpy.random as npr
+from scipy import sparse
+import scipy as sp
+import numdifftools as nd
+import numpy.testing as npt
+
+# Unit Test
+import unittest
+
+
+#  diff_modes = ['qr', 'lsqr', 'qr_active']
+diff_modes = ['qr_active']
+
+ATOL = 1e-2
+RTOL = 1e-4
+
+class derivative_tests(unittest.TestCase):
+
+    def get_grads(self, n=10, m=3, P_scale=1., A_scale=1., diff_mode=diff_modes[0]):
+        npr.seed(1)
+        L = np.random.randn(n, n)
+        P = sparse.csc_matrix(L.dot(L.T))
+        x_0 = npr.randn(n)
+        s_0 = npr.rand(m)
+        A = sparse.csc_matrix(npr.randn(m, n))
+        u = A.dot(x_0) + s_0
+        l = - 10 * npr.rand(m)
+        q = npr.randn(n)
+        true_x = npr.randn(n)
+
+        # Get gradients by solving with osqp
+        m = osqp.OSQP()
+        m.setup(P, q, A, l, u, verbose=True)
+        results = m.solve()
+        x = results.x
+        grads = m.adjoint_derivative(dx=x - true_x, diff_mode=diff_mode)
+
+        return [P, q,  A, l, u, true_x], grads
+
+    def test_dl_dp(self, verbose=True):
+        n, m = 5, 5
+        for diff_mode in diff_modes:
+
+            # Get gradients
+            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
+                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+            print(f'--- {diff_mode}')
+
+            def f(q):
+                m = osqp.OSQP()
+                m.setup(P, q, A, l, u, verbose=False)
+                res = m.solve()
+                x_hat = res.x
+
+                return 0.5 * np.sum(np.square(x_hat - true_x))
+
+            dq_fd = nd.Gradient(f)(q)  # Take numerical gradient of f
+            if verbose:
+                print('dq_fd: ', np.round(dq_fd, decimals=4))
+                print('dq: ', np.round(dq, decimals=4))
+            npt.assert_allclose(dq_fd, dq, rtol=RTOL, atol=ATOL)
diff --git a/module/tests/multithread_test.py b/module/tests/multithread_test.py
new file mode 100644
index 00000000..b9a3ea2d
--- /dev/null
+++ b/module/tests/multithread_test.py
@@ -0,0 +1,66 @@
+# Test osqp python module
+import osqp
+# import osqppurepy as osqp
+from multiprocessing.pool import ThreadPool
+import time
+import numpy as np
+from scipy import sparse
+import scipy as sp
+
+# Unit Test
+import unittest
+
+
+class multithread_tests(unittest.TestCase):
+
+    def test_multithread(self):
+        data = []
+
+        n_rep = 20
+
+        for i in range(n_rep):
+            m = 1000
+            n = 500
+            Ad = sparse.random(m, n, density=0.3, format='csc')
+            b = np.random.randn(m)
+
+            # OSQP data
+            P = sparse.block_diag([sparse.csc_matrix((n, n)), sparse.eye(m)], format='csc')
+            q = np.zeros(n+m)
+            A = sparse.vstack([
+                    sparse.hstack([Ad, -sparse.eye(m)]),
+                    sparse.hstack([sparse.eye(n), sparse.csc_matrix((n, m))])], format='csc')
+            l = np.hstack([b, np.zeros(n)])
+            u = np.hstack([b, np.ones(n)])
+
+            data.append((P, q, A, l, u))
+
+        def f(i):
+            P, q, A, l, u = data[i]
+            m = osqp.OSQP()
+            m.setup(P, q, A, l, u, verbose=False)
+            m.solve()
+
+        pool = ThreadPool(2)
+
+        tic = time.time()
+        for i in range(n_rep):
+            f(i)
+        t_serial = time.time() - tic
+
+        tic = time.time()
+        pool.map(f, range(n_rep))
+        t_parallel = time.time() - tic
+
+        self.assertLess(t_parallel, t_serial)
+
+
+
+
+
+
+
+
+
+
+
diff --git a/osqp_sources b/osqp_sources
index 027d4d69..621d0575 160000
--- a/osqp_sources
+++ b/osqp_sources
@@ -1 +1 @@
-Subproject commit 027d4d696e0d65ad8bb6e1a41efdd17e391ef134
+Subproject commit 621d0575dcb42168b42a097e70b2628179880654
diff --git a/requirements.txt b/requirements.txt
index 777b5a82..4059a76e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
 numpy >= 1.7
 scipy >= 0.13.2
-future;python_version<="2.7"
diff --git a/setup.py b/setup.py
index ca33c4d9..51a9688b 100644
--- a/setup.py
+++ b/setup.py
@@ -49,10 +49,7 @@
 
 # Check if windows linux or mac to pass flag
 if system() == 'Windows':
-    if sys.version_info.major == 3:
-        cmake_args += ['-G', 'Visual Studio 14 2015']
-    else:
-        cmake_args += ['-G', 'Visual Studio 9 2008']
+    cmake_args += ['-G', 'Visual Studio 14 2015']
     # Differentiate between 32-bit and 64-bit
     if sys.maxsize // 2 ** 32 > 0:
         cmake_args[-1] += ' Win64'
@@ -81,8 +78,7 @@
 # Pass python to compiler launched from setup.py
 define_macros += [('PYTHON', None)]
 
-# Pass python version to cmake
-py_version = "%i.%i" % sys.version_info[:2]
+# Pass python include dirs to cmake
 cmake_args += ['-DPYTHON_INCLUDE_DIRS=%s' % sysconfig.get_python_inc()]
 
 
@@ -122,7 +118,7 @@
 libraries = []
 if system() == 'Linux':
     libraries += ['rt']
-if system() == 'Windows' and sys.version_info[0] == 3:
+if system() == 'Windows':
     # They moved the stdio library to another place.
     # We need to include this to fix the dependency
     libraries += ['legacy_stdio_definitions']

From 563b32508f0ae4da0fcbd355f204643eefca6e93 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 8 Nov 2019 12:06:12 -0500
Subject: [PATCH 04/19] Install numdifftools

---
 .travis.yml  | 1 +
 appveyor.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 309f2a51..f2d69c94 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,6 +52,7 @@ install:
   - conda create -n testenv python=$PYTHON_VERSION pytest numpy
   - source activate testenv
   - conda install conda conda-verify conda-build anaconda-client twine
+  - conda install -c conda-forge numdifftools  # TODO: Delete this test dependency
   - conda info -a
   # Add MKL shared libraries (for MKL pardiso) to the path
   - |
diff --git a/appveyor.yml b/appveyor.yml
index 39e7f8fc..91a6d8bc 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -39,6 +39,7 @@ install:
   - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
+  - conda install -c conda-forge numdifftools  # TODO: Delete this test dependency
   - conda install vs2008_express_vc_python_patch
     # Fix for 64-bit Python 2.7 builds, courtesy vs2008_express_vc_python_patch
   - call setup_x64

From a88e20346541cdc01b95a3ec9678c45f8fd5f166 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 8 Nov 2019 16:42:39 -0500
Subject: [PATCH 05/19] Added lsqr mode. All tests passing except dA

---
 module/interface.py             |  92 ++++++++++++++++------
 module/tests/derivative_test.py | 132 +++++++++++++++++++++++++++++---
 2 files changed, 190 insertions(+), 34 deletions(-)

diff --git a/module/interface.py b/module/interface.py
index 465d11db..f84c8b2e 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -40,6 +40,8 @@ def update(self, q=None, l=None, u=None,
         """
         Update OSQP problem arguments
         """
+        # TODO(bart): this will be unnecessary when the derivative will be in C
+        self._derivative_cache = None
 
         # get problem dimensions
         (n, m) = self._model.dimensions()
@@ -283,8 +285,11 @@ def codegen(self, folder, project_type='', parameters='vectors',
         cg.codegen(work, folder, python_ext_name, project_type,
                    embedded, force_rewrite, float_flag, long_flag)
 
-    def adjoint_derivative(self, dx=None, dy=None, dz=None,
-            P_idx=None, A_idx=None, diff_mode='full'):
+    def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
+            P_idx=None, A_idx=None, diff_mode='lsqr'):
+        """
+        Compute adjoint derivative after solve.
+        """
 
         P, q = self._derivative_cache['P'], self._derivative_cache['q']
         A = self._derivative_cache['A']
@@ -299,6 +304,8 @@ def adjoint_derivative(self, dx=None, dy=None, dz=None,
         x = results.x
         y = results.y
         z = A.dot(x)
+        y_u = np.maximum(y, 0)
+        y_l = -np.minimum(y, 0)
 
         if A_idx is None:
             A_idx = A.nonzero()
@@ -306,10 +313,48 @@ def adjoint_derivative(self, dx=None, dy=None, dz=None,
         if P_idx is None:
             P_idx = P.nonzero()
 
-        if dy is not None or dz is not None:
-            raise NotImplementedError
+        if dy_u is None:
+            dy_u = np.zeros(m)
+        if dy_l is None:
+            dy_l = np.zeros(m)
+
+        if 'active' not in diff_mode:
+            # Make sure M matrix exists
+            if 'M' not in self._derivative_cache:
+
+                    M = spa.bmat([
+                        [P, A.T, -A.T],
+                        [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
+                        [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
+                    ])
+                    self._derivative_cache['M'] = M
+
+            # Prepare rhs
+            d_sol = np.concatenate([dx, dy_u, dy_l])
+
+            if diff_mode == 'lsqr':
 
-        if diff_mode == 'qr_active':
+                r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
+
+            elif diff_mode == 'qr':
+                # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
+                # but use slack variables too
+                raise NotImplementedError
+            else:
+                raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
+
+            r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
+
+            # Extract derivatives for the constraints
+            rows, cols = A_idx
+            dA_vals = \
+                y_u[rows] * r_x[cols] + y_u[rows] * (r_yu[rows] * x[cols]) - \
+                (y_l[rows] * r_x[cols] + y_l[rows] * (r_yl[rows] * x[cols]))
+            dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
+            du = - y_u * r_yu
+            dl = y_l * r_yl
+
+        elif diff_mode == 'lu_active':
             # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
             # Guess which linear constraints are lower-active, upper-active, free
             ind_low = np.where(z - l < - y)[0]
@@ -320,42 +365,39 @@ def adjoint_derivative(self, dx=None, dy=None, dz=None,
             # Form A_red from the assumed active constraints
             A_red = spa.vstack([A[ind_low], A[ind_upp]])
 
+            # Form reduced dy
+            dy_red = np.concatenate([dy_l, dy_u])[np.concatenate([ind_low, ind_upp])]
+
             # Form KKT linear system
             KKT = spa.vstack([spa.hstack([P, A_red.T]),
                             spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])], format='csc')
-            rhs = np.hstack([dx, np.zeros(n_low + n_upp)])
+            rhs = - np.hstack([dx, dy_red])
 
             # Get solution
             r_sol = sla.spsolve(KKT, rhs)
-            # r_sol = sla.lsqr(KKT, rhs)[0]
+            r_x, r_yl, r_yu =  np.split(r_sol, [n, n + n_low])
 
-            r_x =  r_sol[:n]
-            r_yl = r_sol[n:n + n_low]
-            r_yu = r_sol[n + n_low:]
             r_y = np.zeros(m)
             r_y[ind_low] = r_yl
             r_y[ind_upp] = r_yu
 
-            dl = np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
+
+            # Extract derivatives for the constraints A, l, u
+            dl = - np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
                 for j in range(m)])
-            du = np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
+            du = - np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
                 for j in range(m)])
-        elif diff_mode == 'qr':
-            # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
-            # but use slack variables too
-            raise NotImplementedError
-        elif diff_mode == 'lsqr':
-            # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
-            # but use slack variables too
-            raise NotImplementedError
+            rows, cols = A_idx
+            dA_vals = y[rows] * r_x[cols] + r_y[rows] * x[cols]
+            dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
+
         else:
             raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
 
-        # Extract derivatives
+        # Extract derivatives for the cost (P, q)
         rows, cols = P_idx
-        dP = -.5 * (r_x[rows] * x[cols] + r_x[cols] * x[rows])
+        dP_vals = .5 * (r_x[rows] * x[cols] + r_x[cols] * x[rows])
+        dP = spa.csc_matrix((dP_vals, P_idx), shape=P.shape)
+        dq = r_x
 
-        rows, cols = A_idx
-        dA = -(y[rows] * r_x[cols] + r_y[rows] * x[cols])
-        dq = -r_x
         return (dP, dq, dA, dl, du)
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index 012b5333..f1eff029 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -4,7 +4,6 @@
 import numpy as np
 import numpy.random as npr
 from scipy import sparse
-import scipy as sp
 import numdifftools as nd
 import numpy.testing as npt
 
@@ -12,47 +11,53 @@
 import unittest
 
 
-#  diff_modes = ['qr', 'lsqr', 'qr_active']
-diff_modes = ['qr_active']
+#  diff_modes = ['qr', 'lsqr', 'lu_active']
+diff_modes = [
+              'lsqr',
+              'lu_active'
+              ]
 
 ATOL = 1e-2
 RTOL = 1e-4
+eps_abs = 1e-06
+eps_rel = 1e-06
 
 class derivative_tests(unittest.TestCase):
 
     def get_grads(self, n=10, m=3, P_scale=1., A_scale=1., diff_mode=diff_modes[0]):
         npr.seed(1)
         L = np.random.randn(n, n)
-        P = sparse.csc_matrix(L.dot(L.T))
+        P = sparse.csc_matrix(L.dot(L.T) + 5. * sparse.eye(n))
         x_0 = npr.randn(n)
         s_0 = npr.rand(m)
         A = sparse.csc_matrix(npr.randn(m, n))
         u = A.dot(x_0) + s_0
-        l = - 10 * npr.rand(m)
+        l = -5 - 10 * npr.rand(m)
         q = npr.randn(n)
         true_x = npr.randn(n)
 
         # Get gradients by solving with osqp
         m = osqp.OSQP()
-        m.setup(P, q, A, l, u, verbose=True)
+        m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
         results = m.solve()
+        if results.info.status != "solved":
+            raise ValueError("Problem not solved!")
         x = results.x
         grads = m.adjoint_derivative(dx=x - true_x, diff_mode=diff_mode)
 
         return [P, q,  A, l, u, true_x], grads
 
-    def test_dl_dp(self, verbose=True):
+    def test_dl_dq(self, verbose=False):
         n, m = 5, 5
         for diff_mode in diff_modes:
 
             # Get gradients
             [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
                 n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
-            print(f'--- {diff_mode}')
 
             def f(q):
                 m = osqp.OSQP()
-                m.setup(P, q, A, l, u, verbose=False)
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
                 res = m.solve()
                 x_hat = res.x
 
@@ -60,6 +65,115 @@ def f(q):
 
             dq_fd = nd.Gradient(f)(q)  # Take numerical gradient of f
             if verbose:
+                print('--- ' + diff_mode)
                 print('dq_fd: ', np.round(dq_fd, decimals=4))
                 print('dq: ', np.round(dq, decimals=4))
             npt.assert_allclose(dq_fd, dq, rtol=RTOL, atol=ATOL)
+
+
+    def test_dl_dP(self, verbose=False):
+        n, m = 10, 10
+        for diff_mode in diff_modes:
+
+            # Get gradients
+            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
+                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+
+            P_idx = P.nonzero()
+
+            def f(P_val):
+                P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
+                # Go from values to sparse matrix
+                m = osqp.OSQP()
+                m.setup(P_qp, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                res = m.solve()
+                x_hat = res.x
+
+                return 0.5 * np.sum(np.square(x_hat - true_x))
+
+            dP_fd_val = nd.Gradient(f)(P.data)  # Take numerical gradient of f
+            dP_fd = sparse.csc_matrix((dP_fd_val, P_idx), shape=P.shape)
+            dP_fd = (dP_fd + dP_fd.T)/2
+            if verbose:
+                print('--- ' + diff_mode)
+                print('dP_fd: ', np.round(dP_fd.data, decimals=4))
+                print('dP: ', np.round(dP.data, decimals=4))
+
+            npt.assert_allclose(dP.todense(), dP_fd.todense(), rtol=RTOL, atol=ATOL)
+
+
+
+    def test_dl_dA(self, verbose=True):
+        n, m = 3, 3
+        for diff_mode in diff_modes:
+
+            # Get gradients
+            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
+                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+
+            A_idx = A.nonzero()
+
+            def f(A_val):
+                A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
+                # Go from values to sparse matrix
+                m = osqp.OSQP()
+                m.setup(P, q, A_qp, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                res = m.solve()
+                x_hat = res.x
+
+                return 0.5 * np.sum(np.square(x_hat - true_x))
+
+            dA_fd_val = nd.Gradient(f)(A.data)  # Take numerical gradient of f
+            dA_fd = sparse.csc_matrix((dA_fd_val, A_idx), shape=A.shape)
+            if verbose:
+                print('--- ' + diff_mode)
+                print('dA_fd: ', np.round(dA_fd.data, decimals=4))
+                print('dA: ', np.round(dA.data, decimals=4))
+
+            npt.assert_allclose(dA.todense(), dA_fd.todense(), rtol=RTOL, atol=ATOL)
+
+    def test_dl_dl(self, verbose=True):
+        n, m = 10, 10
+        for diff_mode in diff_modes:
+
+            # Get gradients
+            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
+                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+
+            def f(l):
+                m = osqp.OSQP()
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                res = m.solve()
+                x_hat = res.x
+
+                return 0.5 * np.sum(np.square(x_hat - true_x))
+
+            dl_fd = nd.Gradient(f)(l)  # Take numerical gradient of f
+            if verbose:
+                print('--- ' + diff_mode)
+                print('dl_fd: ', np.round(dl_fd, decimals=4))
+                print('dl: ', np.round(dl, decimals=4))
+            npt.assert_allclose(dl_fd, dl, rtol=RTOL, atol=ATOL)
+
+    def test_dl_du(self, verbose=True):
+        n, m = 5, 5
+        for diff_mode in diff_modes:
+
+            # Get gradients
+            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
+                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+
+            def f(u):
+                m = osqp.OSQP()
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                res = m.solve()
+                x_hat = res.x
+
+                return 0.5 * np.sum(np.square(x_hat - true_x))
+
+            du_fd = nd.Gradient(f)(u)  # Take numerical gradient of f
+            if verbose:
+                print('--- ' + diff_mode)
+                print('du_fd: ', np.round(du_fd, decimals=4))
+                print('du: ', np.round(du, decimals=4))
+            npt.assert_allclose(du_fd, du, rtol=RTOL, atol=ATOL)

From f24323cd77140056bc51c7c63418cae2c0b7263d Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 8 Nov 2019 18:04:37 -0500
Subject: [PATCH 06/19] ci fix

---
 .travis.yml            | 1 -
 appveyor.yml           | 1 -
 conda-recipe/meta.yaml | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f2d69c94..309f2a51 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,7 +52,6 @@ install:
   - conda create -n testenv python=$PYTHON_VERSION pytest numpy
   - source activate testenv
   - conda install conda conda-verify conda-build anaconda-client twine
-  - conda install -c conda-forge numdifftools  # TODO: Delete this test dependency
   - conda info -a
   # Add MKL shared libraries (for MKL pardiso) to the path
   - |
diff --git a/appveyor.yml b/appveyor.yml
index 91a6d8bc..39e7f8fc 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -39,7 +39,6 @@ install:
   - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
-  - conda install -c conda-forge numdifftools  # TODO: Delete this test dependency
   - conda install vs2008_express_vc_python_patch
     # Fix for 64-bit Python 2.7 builds, courtesy vs2008_express_vc_python_patch
   - call setup_x64
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index dfb1be8d..aeec761c 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -36,6 +36,7 @@ test:
   requires:
     - pytest
     - cmake
+    - numdifftools
   imports:
     - osqp
     - osqppurepy

From 1ecfbb18d8a30ec0cc9fe96fea2c84ff9ed7429d Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Sat, 9 Nov 2019 11:27:09 -0500
Subject: [PATCH 07/19] Using approx_fprime from scipy. Fixed tests for dA.
 Fixed derivative cache

---
 conda-recipe/meta.yaml          |   1 -
 module/interface.py             |  37 +++++++++-
 module/tests/derivative_test.py | 125 +++++++++++++++++++++-----------
 3 files changed, 117 insertions(+), 46 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index aeec761c..dfb1be8d 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -36,7 +36,6 @@ test:
   requires:
     - pytest
     - cmake
-    - numdifftools
   imports:
     - osqp
     - osqppurepy
diff --git a/module/interface.py b/module/interface.py
index f84c8b2e..c309c214 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -40,8 +40,6 @@ def update(self, q=None, l=None, u=None,
         """
         Update OSQP problem arguments
         """
-        # TODO(bart): this will be unnecessary when the derivative will be in C
-        self._derivative_cache = None
 
         # get problem dimensions
         (n, m) = self._model.dimensions()
@@ -108,6 +106,36 @@ def update(self, q=None, l=None, u=None,
         if Px is not None and Ax is not None:
             self._model.update_P_A(Px, Px_idx, len(Px), Ax, Ax_idx, len(Ax))
 
+
+        # TODO(bart): this will be unnecessary when the derivative will be in C
+        # update problem data in self._derivative_cache
+        if q is not None:
+            self._derivative_cache["q"] = q
+
+        if l is not None:
+            self._derivative_cache["l"] = l
+
+        if u is not None:
+            self._derivative_cache["u"] = u
+
+        if Px is not None:
+            if Px_idx.size == 0:
+                self._derivative_cache["P"].data = Px
+            else:
+                self._derivative_cache["P"].data[Px_idx] = Px
+
+        if Ax is not None:
+            if Ax_idx.size == 0:
+                self._derivative_cache["A"].data = Ax
+            else:
+                self._derivative_cache["A"].data[Ax_idx] = Ax
+
+        # delete results from self._derivative_cache to prohibit
+        # taking the derivative of unsolved problems
+        if "results" in self._derivative_cache.keys():
+            del self._derivative_cache["results"]
+
+
     def update_settings(self, **kwargs):
         """
         Update OSQP solver settings
@@ -295,7 +323,10 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
         A = self._derivative_cache['A']
         l, u = self._derivative_cache['l'], self._derivative_cache['u']
 
-        results = self._derivative_cache['results']
+        try:
+            results = self._derivative_cache['results']
+        except KeyError:
+            raise ValueError("Problem has not been solved. You cannot take derivatives. Please call the solve function.")
 
         if results.info.status != "solved":
             raise ValueError("Problem has not been solved to optimality. You cannot take derivatives")
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index f1eff029..d9536d91 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -4,27 +4,29 @@
 import numpy as np
 import numpy.random as npr
 from scipy import sparse
-import numdifftools as nd
+from scipy.optimize import check_grad, approx_fprime
 import numpy.testing as npt
 
 # Unit Test
 import unittest
 
 
-#  diff_modes = ['qr', 'lsqr', 'lu_active']
 diff_modes = [
               'lsqr',
               'lu_active'
               ]
 
-ATOL = 1e-2
-RTOL = 1e-4
-eps_abs = 1e-06
-eps_rel = 1e-06
+ATOL = 1e-3
+RTOL = 1e-3
+eps_abs = 1e-08
+eps_rel = 1e-08
+max_iter = 10000
+grad_precision = 1e-05
+
 
 class derivative_tests(unittest.TestCase):
 
-    def get_grads(self, n=10, m=3, P_scale=1., A_scale=1., diff_mode=diff_modes[0]):
+    def get_prob(self, n=10, m=3, P_scale=1., A_scale=1.):
         npr.seed(1)
         L = np.random.randn(n, n)
         P = sparse.csc_matrix(L.dot(L.T) + 5. * sparse.eye(n))
@@ -36,95 +38,117 @@ def get_grads(self, n=10, m=3, P_scale=1., A_scale=1., diff_mode=diff_modes[0]):
         q = npr.randn(n)
         true_x = npr.randn(n)
 
+        return [P, q,  A, l, u, true_x]
+
+    def get_grads(self, P, q, A, l, u, true_x, diff_mode=diff_modes[0]):
         # Get gradients by solving with osqp
         m = osqp.OSQP()
-        m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+        m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
         results = m.solve()
         if results.info.status != "solved":
             raise ValueError("Problem not solved!")
         x = results.x
         grads = m.adjoint_derivative(dx=x - true_x, diff_mode=diff_mode)
 
-        return [P, q,  A, l, u, true_x], grads
+        return grads
 
     def test_dl_dq(self, verbose=False):
         n, m = 5, 5
         for diff_mode in diff_modes:
 
-            # Get gradients
-            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
-                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+            P, q, A, l, u, true_x = prob
+
+            def grad(q):
+                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
+                return dq
 
             def f(q):
                 m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
                 res = m.solve()
+                if res.info.status != "solved":
+                    raise ValueError("Problem not solved!")
                 x_hat = res.x
 
                 return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dq_fd = nd.Gradient(f)(q)  # Take numerical gradient of f
+            dq = grad(q)
+            dq_fd = approx_fprime(q, f, grad_precision)
+
             if verbose:
                 print('--- ' + diff_mode)
                 print('dq_fd: ', np.round(dq_fd, decimals=4))
                 print('dq: ', np.round(dq, decimals=4))
+
             npt.assert_allclose(dq_fd, dq, rtol=RTOL, atol=ATOL)
 
 
-    def test_dl_dP(self, verbose=False):
-        n, m = 10, 10
+    def test_dl_dP(self, verbose=True):
+        n, m = 3, 3
         for diff_mode in diff_modes:
 
-            # Get gradients
-            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
-                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
-
+            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+            P, q, A, l, u, true_x = prob
             P_idx = P.nonzero()
 
+            def grad(P_val):
+                P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
+                [dP, dq, dA, dl, du] = self.get_grads(P_qp, q, A, l, u, true_x, diff_mode=diff_mode)
+                return dP
+
             def f(P_val):
                 P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
-                # Go from values to sparse matrix
                 m = osqp.OSQP()
-                m.setup(P_qp, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                m.setup(P_qp, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
                 res = m.solve()
+                if res.info.status != "solved":
+                    raise ValueError("Problem not solved!")
                 x_hat = res.x
 
                 return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dP_fd_val = nd.Gradient(f)(P.data)  # Take numerical gradient of f
+            dP = grad(P.data)
+            dP_fd_val = approx_fprime(P.data, f, grad_precision)
             dP_fd = sparse.csc_matrix((dP_fd_val, P_idx), shape=P.shape)
             dP_fd = (dP_fd + dP_fd.T)/2
+
             if verbose:
                 print('--- ' + diff_mode)
                 print('dP_fd: ', np.round(dP_fd.data, decimals=4))
-                print('dP: ', np.round(dP.data, decimals=4))
+                print('dA: ', np.round(dP.data, decimals=4))
 
             npt.assert_allclose(dP.todense(), dP_fd.todense(), rtol=RTOL, atol=ATOL)
 
 
-
     def test_dl_dA(self, verbose=True):
         n, m = 3, 3
         for diff_mode in diff_modes:
 
-            # Get gradients
-            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
-                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
-
+            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+            P, q, A, l, u, true_x = prob
             A_idx = A.nonzero()
 
+            def grad(A_val):
+                A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
+                [dP, dq, dA, dl, du] = self.get_grads(P, q, A_qp, l, u, true_x, diff_mode=diff_mode)
+                return dA
+
             def f(A_val):
                 A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
-                # Go from values to sparse matrix
                 m = osqp.OSQP()
-                m.setup(P, q, A_qp, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                m.setup(P, q, A_qp, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
                 res = m.solve()
+                if res.info.status != "solved":
+                    raise ValueError("Problem not solved!")
                 x_hat = res.x
 
                 return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dA_fd_val = nd.Gradient(f)(A.data)  # Take numerical gradient of f
+            dA = grad(A.data)
+            dA_fd_val = approx_fprime(A.data, f, grad_precision)
             dA_fd = sparse.csc_matrix((dA_fd_val, A_idx), shape=A.shape)
+
             if verbose:
                 print('--- ' + diff_mode)
                 print('dA_fd: ', np.round(dA_fd.data, decimals=4))
@@ -136,44 +160,61 @@ def test_dl_dl(self, verbose=True):
         n, m = 10, 10
         for diff_mode in diff_modes:
 
-            # Get gradients
-            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
-                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+            P, q, A, l, u, true_x = prob
+
+            def grad(l):
+                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
+                return dl
 
             def f(l):
                 m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
                 res = m.solve()
+                if res.info.status != "solved":
+                    raise ValueError("Problem not solved!")
                 x_hat = res.x
 
                 return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dl_fd = nd.Gradient(f)(l)  # Take numerical gradient of f
+            dl = grad(l)
+            dl_fd = approx_fprime(l, f, grad_precision)
+
             if verbose:
                 print('--- ' + diff_mode)
                 print('dl_fd: ', np.round(dl_fd, decimals=4))
                 print('dl: ', np.round(dl, decimals=4))
+
             npt.assert_allclose(dl_fd, dl, rtol=RTOL, atol=ATOL)
 
     def test_dl_du(self, verbose=True):
         n, m = 5, 5
         for diff_mode in diff_modes:
 
-            # Get gradients
-            [P, q, A, l, u, true_x], [dP, dq, dA, dl, du] = self.get_grads(
-                n=n, m=m, P_scale=100., A_scale=100., diff_mode=diff_mode)
+            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+            P, q, A, l, u, true_x = prob
+
+            def grad(u):
+                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
+                return du
 
             def f(u):
                 m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, verbose=False)
+                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
                 res = m.solve()
+                if res.info.status != "solved":
+                    raise ValueError("Problem not solved!")
                 x_hat = res.x
 
                 return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            du_fd = nd.Gradient(f)(u)  # Take numerical gradient of f
+            du = grad(u)
+            du_fd = approx_fprime(u, f, grad_precision)
+
             if verbose:
                 print('--- ' + diff_mode)
                 print('du_fd: ', np.round(du_fd, decimals=4))
                 print('du: ', np.round(du, decimals=4))
+
             npt.assert_allclose(du_fd, du, rtol=RTOL, atol=ATOL)
+

From 47a22d4c88e1c6872ee47fbb4c70194c38e5d432 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Sat, 9 Nov 2019 11:30:04 -0500
Subject: [PATCH 08/19] Simplified dA_vals

---
 module/interface.py             | 5 ++---
 module/tests/derivative_test.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/module/interface.py b/module/interface.py
index c309c214..10e0c633 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -378,9 +378,8 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
 
             # Extract derivatives for the constraints
             rows, cols = A_idx
-            dA_vals = \
-                y_u[rows] * r_x[cols] + y_u[rows] * (r_yu[rows] * x[cols]) - \
-                (y_l[rows] * r_x[cols] + y_l[rows] * (r_yl[rows] * x[cols]))
+            dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
+                (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
             dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
             du = - y_u * r_yu
             dl = y_l * r_yl
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index d9536d91..608cd01a 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -52,7 +52,7 @@ def get_grads(self, P, q, A, l, u, true_x, diff_mode=diff_modes[0]):
 
         return grads
 
-    def test_dl_dq(self, verbose=False):
+    def test_dl_dq(self, verbose=True):
         n, m = 5, 5
         for diff_mode in diff_modes:
 

From a3e094b07af9d911feeed3fa20dd5883306ecab5 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Sat, 9 Nov 2019 15:29:37 -0500
Subject: [PATCH 09/19] Removed py27 installation utils for windows

---
 appveyor.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 39e7f8fc..fc75b454 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -39,9 +39,6 @@ install:
   - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
-  - conda install vs2008_express_vc_python_patch
-    # Fix for 64-bit Python 2.7 builds, courtesy vs2008_express_vc_python_patch
-  - call setup_x64
   - conda info -a
 
 test_script:

From 115e28cd071fb2aaeb6013cfd0e3c11308864da4 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Tue, 12 Nov 2019 14:46:21 -0500
Subject: [PATCH 10/19] Added python 3.8

---
 .travis.yml  | 6 ++++++
 appveyor.yml | 1 +
 2 files changed, 7 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 309f2a51..d5bf9853 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,9 @@ matrix:
       - os: linux
         env:
           - PYTHON_VERSION=3.7
+      - os: linux
+        env:
+          - PYTHON_VERSION=3.8
       - os: osx
         env:
           - PYTHON_VERSION=3.5
@@ -39,6 +42,9 @@ matrix:
       - os: osx
         env:
           - PYTHON_VERSION=3.7
+      - os: osx
+        env:
+          - PYTHON_VERSION=3.8
 
 install:
   - if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
diff --git a/appveyor.yml b/appveyor.yml
index fc75b454..04cbf889 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -13,6 +13,7 @@ environment:
     - PYTHON_VERSION: 3.5
     - PYTHON_VERSION: 3.6
     - PYTHON_VERSION: 3.7
+    - PYTHON_VERSION: 3.8
 
 
 init:

From 0a28ed9b484c708046e97855b5737ddb440c4ed7 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Tue, 12 Nov 2019 15:26:08 -0500
Subject: [PATCH 11/19] Revert "Added python 3.8"

This reverts commit 115e28cd071fb2aaeb6013cfd0e3c11308864da4.
---
 .travis.yml  | 6 ------
 appveyor.yml | 1 -
 2 files changed, 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d5bf9853..309f2a51 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,9 +30,6 @@ matrix:
       - os: linux
         env:
           - PYTHON_VERSION=3.7
-      - os: linux
-        env:
-          - PYTHON_VERSION=3.8
       - os: osx
         env:
           - PYTHON_VERSION=3.5
@@ -42,9 +39,6 @@ matrix:
       - os: osx
         env:
           - PYTHON_VERSION=3.7
-      - os: osx
-        env:
-          - PYTHON_VERSION=3.8
 
 install:
   - if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
diff --git a/appveyor.yml b/appveyor.yml
index 04cbf889..fc75b454 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -13,7 +13,6 @@ environment:
     - PYTHON_VERSION: 3.5
     - PYTHON_VERSION: 3.6
     - PYTHON_VERSION: 3.7
-    - PYTHON_VERSION: 3.8
 
 
 init:

From 7004b6982c2a0ab83bca3134e0c811d2c3e635bd Mon Sep 17 00:00:00 2001
From: Brandon Amos <brandon.amos.cs@gmail.com>
Date: Thu, 28 Nov 2019 14:23:18 -0800
Subject: [PATCH 12/19] Add lu diff_mode

---
 module/interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/interface.py b/module/interface.py
index 10e0c633..e8a0f472 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -364,9 +364,9 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
             d_sol = np.concatenate([dx, dy_u, dy_l])
 
             if diff_mode == 'lsqr':
-
                 r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
-
+            elif diff_mode == 'lu':
+                r_sol = - sla.spsolve(self._derivative_cache['M'].T, d_sol)
             elif diff_mode == 'qr':
                 # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
                 # but use slack variables too

From 30584268f031f836cd7b9ed05e3d5e6bc6ef0ea2 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Thu, 2 Jan 2020 17:00:40 +0100
Subject: [PATCH 13/19] Woring ldl with scikit-sparse

---
 module/interface.py             | 21 +++++++++++++++++++--
 module/tests/derivative_test.py |  4 +++-
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/module/interface.py b/module/interface.py
index e8a0f472..6bf955da 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -10,6 +10,7 @@
 from platform import system
 import osqp.codegen as cg
 import osqp.utils as utils
+from sksparse.cholmod import cholesky
 import sys
 
 
@@ -354,19 +355,28 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
             if 'M' not in self._derivative_cache:
 
                     M = spa.bmat([
-                        [P, A.T, -A.T],
+                        [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
                         [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
                         [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
-                    ])
+                    ]).tocsc()
                     self._derivative_cache['M'] = M
 
             # Prepare rhs
             d_sol = np.concatenate([dx, dy_u, dy_l])
 
+            # Normalized version
+            d_sol = np.concatenate([dx,
+                                    spa.diags(y_u) @ dy_u,
+                                    spa.diags(y_l) @ dy_l])
+
+
             if diff_mode == 'lsqr':
                 r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
             elif diff_mode == 'lu':
                 r_sol = - sla.spsolve(self._derivative_cache['M'].T, d_sol)
+            elif diff_mode == 'ldl':
+                factor = cholesky(self._derivative_cache['M'].T.tocsc())
+                r_sol = - factor(d_sol)
             elif diff_mode == 'qr':
                 # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
                 # but use slack variables too
@@ -376,6 +386,13 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
 
             r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
 
+            #  print("r_yu = ", r_yu)
+            #  print("r_yl = ", r_yl)
+
+            #  # Restore normalization
+            #  r_yu = spa.diags(y_u) @ r_yu
+            #  r_yl = spa.diags(y_l) @ r_yl
+
             # Extract derivatives for the constraints
             rows, cols = A_idx
             dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index 608cd01a..c4437831 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -13,7 +13,9 @@
 
 diff_modes = [
               'lsqr',
-              'lu_active'
+              'lu_active',
+              'lu',
+              'ldl'
               ]
 
 ATOL = 1e-3

From 6f3e6f58bfc6794365feee6181865a4c23d816cf Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Wed, 25 Mar 2020 18:39:34 -0400
Subject: [PATCH 14/19] Added qdldl-based derivatives

---
 .gitignore                      |   1 +
 .travis.yml                     |   1 +
 appveyor.yml                    |   1 +
 module/interface.py             | 226 +++++++++++++++++++-------------
 module/tests/derivative_test.py |  12 +-
 requirements.txt                |   1 +
 6 files changed, 142 insertions(+), 100 deletions(-)

diff --git a/.gitignore b/.gitignore
index b20ea41d..74b8cea3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,4 @@ ENV/
 
 .gdb_history
 .cquery_cached_index/
+pip-wheel-metadata/
diff --git a/.travis.yml b/.travis.yml
index 309f2a51..c986c2a1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,6 +53,7 @@ install:
   - source activate testenv
   - conda install conda conda-verify conda-build anaconda-client twine
   - conda info -a
+  - pip install qdldl
   # Add MKL shared libraries (for MKL pardiso) to the path
   - |
     MKL_PARDISO_LIB_DIR=`python -c 'import numpy.distutils.system_info as sysinfo; print(sysinfo.get_info("mkl")["library_dirs"][0])'`
diff --git a/appveyor.yml b/appveyor.yml
index fc75b454..f939f0b2 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,6 +40,7 @@ install:
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
   - conda info -a
+  - pip install qdldl
 
 test_script:
   - conda build --python %PYTHON_VERSION% conda-recipe --output-folder conda-bld
diff --git a/module/interface.py b/module/interface.py
index 6bf955da..7ef4e8dc 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -12,6 +12,7 @@
 import osqp.utils as utils
 from sksparse.cholmod import cholesky
 import sys
+import qdldl
 
 
 class OSQP(object):
@@ -315,7 +316,7 @@ def codegen(self, folder, project_type='', parameters='vectors',
                    embedded, force_rewrite, float_flag, long_flag)
 
     def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
-            P_idx=None, A_idx=None, diff_mode='lsqr'):
+            P_idx=None, A_idx=None):
         """
         Compute adjoint derivative after solve.
         """
@@ -350,96 +351,36 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
         if dy_l is None:
             dy_l = np.zeros(m)
 
-        if 'active' not in diff_mode:
-            # Make sure M matrix exists
-            if 'M' not in self._derivative_cache:
-
-                    M = spa.bmat([
-                        [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
-                        [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
-                        [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
-                    ]).tocsc()
-                    self._derivative_cache['M'] = M
-
-            # Prepare rhs
-            d_sol = np.concatenate([dx, dy_u, dy_l])
-
-            # Normalized version
-            d_sol = np.concatenate([dx,
-                                    spa.diags(y_u) @ dy_u,
-                                    spa.diags(y_l) @ dy_l])
-
-
-            if diff_mode == 'lsqr':
-                r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
-            elif diff_mode == 'lu':
-                r_sol = - sla.spsolve(self._derivative_cache['M'].T, d_sol)
-            elif diff_mode == 'ldl':
-                factor = cholesky(self._derivative_cache['M'].T.tocsc())
-                r_sol = - factor(d_sol)
-            elif diff_mode == 'qr':
-                # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
-                # but use slack variables too
-                raise NotImplementedError
-            else:
-                raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
-
-            r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
-
-            #  print("r_yu = ", r_yu)
-            #  print("r_yl = ", r_yl)
-
-            #  # Restore normalization
-            #  r_yu = spa.diags(y_u) @ r_yu
-            #  r_yl = spa.diags(y_l) @ r_yl
-
-            # Extract derivatives for the constraints
-            rows, cols = A_idx
-            dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
-                (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
-            dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
-            du = - y_u * r_yu
-            dl = y_l * r_yl
-
-        elif diff_mode == 'lu_active':
-            # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
-            # Guess which linear constraints are lower-active, upper-active, free
-            ind_low = np.where(z - l < - y)[0]
-            ind_upp = np.where(u - z < y)[0]
-            n_low = len(ind_low)
-            n_upp = len(ind_upp)
-
-            # Form A_red from the assumed active constraints
-            A_red = spa.vstack([A[ind_low], A[ind_upp]])
-
-            # Form reduced dy
-            dy_red = np.concatenate([dy_l, dy_u])[np.concatenate([ind_low, ind_upp])]
-
-            # Form KKT linear system
-            KKT = spa.vstack([spa.hstack([P, A_red.T]),
-                            spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])], format='csc')
-            rhs = - np.hstack([dx, dy_red])
-
-            # Get solution
-            r_sol = sla.spsolve(KKT, rhs)
-            r_x, r_yl, r_yu =  np.split(r_sol, [n, n + n_low])
-
-            r_y = np.zeros(m)
-            r_y[ind_low] = r_yl
-            r_y[ind_upp] = r_yu
-
-
-            # Extract derivatives for the constraints A, l, u
-            dl = - np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
-                for j in range(m)])
-            du = - np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
-                for j in range(m)])
-            rows, cols = A_idx
-            dA_vals = y[rows] * r_x[cols] + r_y[rows] * x[cols]
-            dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
-
-        else:
-            raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
+        # Make sure M matrix exists
+        if 'M' not in self._derivative_cache:
+            # Multiply second-third column by diag(y_u) and diag(y_l)
+            # to make the matrix symmetric
+            M = spa.bmat([
+                [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
+                [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
+                [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
+            ]).tocsc()
+            self._derivative_cache['M'] = M
+
+        # Prepare rhs
+        d_sol = np.concatenate([dx, dy_u, dy_l])
+
+        # Normalized version to compensate for diag(y_u) and diag(y_l) above
+        d_sol = np.concatenate([dx,
+                                spa.diags(y_u) @ dy_u,
+                                spa.diags(y_l) @ dy_l])
+
+        r_sol = - qdldl.Solver(self._derivative_cache['M']).solve(d_sol)
+
+        r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
+
+        # Extract derivatives for the constraints
+        rows, cols = A_idx
+        dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
+            (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
+        dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
+        du = - y_u * r_yu
+        dl = y_l * r_yl
 
         # Extract derivatives for the cost (P, q)
         rows, cols = P_idx
@@ -448,3 +389,106 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
         dq = r_x
 
         return (dP, dq, dA, dl, du)
+
+        #  if 'active' not in diff_mode:
+        #      # Make sure M matrix exists
+        #      if 'M' not in self._derivative_cache:
+        #
+        #          # Multiply second-third column by diag(y_u) and diag(y_l)
+        #          # to make the matrix symmetric
+        #          M = spa.bmat([
+        #              [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
+        #              [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
+        #              [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
+        #          ]).tocsc()
+        #          self._derivative_cache['M'] = M
+        #
+        #      # Prepare rhs
+        #      d_sol = np.concatenate([dx, dy_u, dy_l])
+        #
+        #      # Normalized version to compensate for diag(y_u) and diag(y_l) above
+        #      d_sol = np.concatenate([dx,
+        #                              spa.diags(y_u) @ dy_u,
+        #                              spa.diags(y_l) @ dy_l])
+        #
+        #
+        #      if diff_mode == 'qdldl':
+        #          r_sol = - qdldl.Solver(self._derivative_cache['M']).solve(d_sol)
+        #      elif diff_mode == 'lsqr':
+        #          r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
+        #      elif diff_mode == 'lu':
+        #          r_sol = - sla.spsolve(self._derivative_cache['M'].T, d_sol)
+        #      elif diff_mode == 'ldl':
+        #          factor = cholesky(self._derivative_cache['M'].T.tocsc())
+        #          r_sol = - factor(d_sol)
+        #      elif diff_mode == 'qr':
+        #          # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
+        #          # but use slack variables too
+        #          raise NotImplementedError
+        #      else:
+        #          raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
+        #
+        #      r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
+        #
+        #      #  print("r_yu = ", r_yu)
+        #      #  print("r_yl = ", r_yl)
+        #
+        #      #  # Restore normalization
+        #      #  r_yu = spa.diags(y_u) @ r_yu
+        #      #  r_yl = spa.diags(y_l) @ r_yl
+        #
+        #      # Extract derivatives for the constraints
+        #      rows, cols = A_idx
+        #      dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
+        #          (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
+        #      dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
+        #      du = - y_u * r_yu
+        #      dl = y_l * r_yl
+        #
+        #  elif diff_mode == 'lu_active':
+        #      # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
+        #      # Guess which linear constraints are lower-active, upper-active, free
+        #      ind_low = np.where(z - l < - y)[0]
+        #      ind_upp = np.where(u - z < y)[0]
+        #      n_low = len(ind_low)
+        #      n_upp = len(ind_upp)
+        #
+        #      # Form A_red from the assumed active constraints
+        #      A_red = spa.vstack([A[ind_low], A[ind_upp]])
+        #
+        #      # Form reduced dy
+        #      dy_red = np.concatenate([dy_l, dy_u])[np.concatenate([ind_low, ind_upp])]
+        #
+        #      # Form KKT linear system
+        #      KKT = spa.vstack([spa.hstack([P, A_red.T]),
+        #                      spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])], format='csc')
+        #      rhs = - np.hstack([dx, dy_red])
+        #
+        #      # Get solution
+        #      r_sol = sla.spsolve(KKT, rhs)
+        #      r_x, r_yl, r_yu =  np.split(r_sol, [n, n + n_low])
+        #
+        #      r_y = np.zeros(m)
+        #      r_y[ind_low] = r_yl
+        #      r_y[ind_upp] = r_yu
+        #
+        #
+        #      # Extract derivatives for the constraints A, l, u
+        #      dl = - np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
+        #          for j in range(m)])
+        #      du = - np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
+        #          for j in range(m)])
+        #      rows, cols = A_idx
+        #      dA_vals = y[rows] * r_x[cols] + r_y[rows] * x[cols]
+        #      dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
+        #
+        #  else:
+        #      raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
+        #
+        #  # Extract derivatives for the cost (P, q)
+        #  rows, cols = P_idx
+        #  dP_vals = .5 * (r_x[rows] * x[cols] + r_x[cols] * x[rows])
+        #  dP = spa.csc_matrix((dP_vals, P_idx), shape=P.shape)
+        #  dq = r_x
+        #
+        #  return (dP, dq, dA, dl, du)
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index c4437831..13069637 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -11,17 +11,12 @@
 import unittest
 
 
-diff_modes = [
-              'lsqr',
-              'lu_active',
-              'lu',
-              'ldl'
-              ]
+npr.seed(1)
 
 ATOL = 1e-3
 RTOL = 1e-3
-eps_abs = 1e-08
-eps_rel = 1e-08
+eps_abs = 1e-10
+eps_rel = 1e-10
 max_iter = 10000
 grad_precision = 1e-05
 
@@ -29,7 +24,6 @@
 class derivative_tests(unittest.TestCase):
 
     def get_prob(self, n=10, m=3, P_scale=1., A_scale=1.):
-        npr.seed(1)
         L = np.random.randn(n, n)
         P = sparse.csc_matrix(L.dot(L.T) + 5. * sparse.eye(n))
         x_0 = npr.randn(n)
diff --git a/requirements.txt b/requirements.txt
index 4059a76e..a78b5571 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 numpy >= 1.7
 scipy >= 0.13.2
+qdldl

From 4e679cbc920640d4b767a4f5b5f6cba7171d5942 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 27 Mar 2020 10:04:41 -0400
Subject: [PATCH 15/19] remove sksparse

---
 module/interface.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/module/interface.py b/module/interface.py
index 7ef4e8dc..62b92683 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -10,7 +10,6 @@
 from platform import system
 import osqp.codegen as cg
 import osqp.utils as utils
-from sksparse.cholmod import cholesky
 import sys
 import qdldl
 

From f49b62176fa70329676a4e5593a218ea3cab6025 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Tue, 28 Jul 2020 19:36:20 -0400
Subject: [PATCH 16/19] No need to install pip qdldl anymore

---
 .travis.yml  | 1 -
 appveyor.yml | 1 -
 2 files changed, 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c986c2a1..309f2a51 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,7 +53,6 @@ install:
   - source activate testenv
   - conda install conda conda-verify conda-build anaconda-client twine
   - conda info -a
-  - pip install qdldl
   # Add MKL shared libraries (for MKL pardiso) to the path
   - |
     MKL_PARDISO_LIB_DIR=`python -c 'import numpy.distutils.system_info as sysinfo; print(sysinfo.get_info("mkl")["library_dirs"][0])'`
diff --git a/appveyor.yml b/appveyor.yml
index f939f0b2..fc75b454 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,7 +40,6 @@ install:
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
   - conda info -a
-  - pip install qdldl
 
 test_script:
   - conda build --python %PYTHON_VERSION% conda-recipe --output-folder conda-bld

From efc24a1ba8883b84fb722e64cbb8cf11444b286e Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Tue, 28 Jul 2020 19:53:38 -0400
Subject: [PATCH 17/19] minor setup.py fixes

---
 conda-recipe/meta.yaml |  3 ++-
 module/interface.py    | 15 +++++++--------
 setup.py               | 43 +++++++++++++++++++++++++++++-------------
 3 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index dfb1be8d..4c06a113 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -23,6 +23,7 @@ requirements:
     - python {{ python }}
     - numpy {{ numpy }}
     - pip
+    - qdldl
   build:
     - {{ compiler('c') }}
     - cmake
@@ -30,7 +31,7 @@ requirements:
     - python
     - {{ pin_compatible('numpy') }}
     - scipy >=0.13.2
-    - future
+    - qdldl
 
 test:
   requires:
diff --git a/module/interface.py b/module/interface.py
index 62b92683..d5ef56f1 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -315,7 +315,7 @@ def codegen(self, folder, project_type='', parameters='vectors',
                    embedded, force_rewrite, float_flag, long_flag)
 
     def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
-            P_idx=None, A_idx=None):
+                           P_idx=None, A_idx=None):
         """
         Compute adjoint derivative after solve.
         """
@@ -327,15 +327,17 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
         try:
             results = self._derivative_cache['results']
         except KeyError:
-            raise ValueError("Problem has not been solved. You cannot take derivatives. Please call the solve function.")
+            raise ValueError("Problem has not been solved. "
+                             "You cannot take derivatives. "
+                             "Please call the solve function.")
 
         if results.info.status != "solved":
-            raise ValueError("Problem has not been solved to optimality. You cannot take derivatives")
+            raise ValueError("Problem has not been solved to optimality. "
+                             "You cannot take derivatives")
 
         m, n = A.shape
         x = results.x
         y = results.y
-        z = A.dot(x)
         y_u = np.maximum(y, 0)
         y_l = -np.minimum(y, 0)
 
@@ -361,10 +363,7 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
             ]).tocsc()
             self._derivative_cache['M'] = M
 
-        # Prepare rhs
-        d_sol = np.concatenate([dx, dy_u, dy_l])
-
-        # Normalized version to compensate for diag(y_u) and diag(y_l) above
+        # Normalized rhs to compensate for diag(y_u) and diag(y_l) above
         d_sol = np.concatenate([dx,
                                 spa.diags(y_u) @ dy_u,
                                 spa.diags(y_l) @ dy_l])
diff --git a/setup.py b/setup.py
index 51a9688b..e4871dca 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,6 @@
 from shutil import copyfile, copy
 from subprocess import call, check_output
 
-import numpy
 from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
 
@@ -39,7 +38,7 @@
 
 # necessary to remove OSQP args before passing to setup:
 if OSQP_ARG_MARK in sys.argv:
-  sys.argv = sys.argv[0:sys.argv.index(OSQP_ARG_MARK)]
+    sys.argv = sys.argv[0:sys.argv.index(OSQP_ARG_MARK)]
 
 # Add parameters to cmake_args and define_macros
 cmake_args = ["-DUNITTESTS=OFF"]
@@ -72,7 +71,8 @@
           "Remove long integers for numpy compatibility. See:\n" +
           " - https://github.com/numpy/numpy/issues/5906\n" +
           " - https://github.com/ContinuumIO/anaconda-issues/issues/3823\n" +
-          "You can reenable long integers by passing: --osqp --long argument.\n")
+          "You can reenable long integers by passing: "
+          "--osqp --long argument.\n")
     cmake_args += ['-DDLONG=OFF']
 
 # Pass python to compiler launched from setup.py
@@ -88,7 +88,16 @@
 osqp_build_dir = os.path.join(osqp_dir, 'build')
 qdldl_dir = os.path.join(osqp_dir, 'lin_sys', 'direct', 'qdldl')
 
+
 # Interface files
+class get_numpy_include(object):
+    """Returns Numpy's include path with lazy import.
+    """
+    def __str__(self):
+        import numpy
+        return numpy.get_include()
+
+
 include_dirs = [
     os.path.join(osqp_dir, 'include'),      # osqp.h
     os.path.join(qdldl_dir),                # qdldl_interface header to
@@ -96,7 +105,7 @@
     os.path.join(qdldl_dir, "qdldl_sources",
                             "include"),     # qdldl includes for file types
     os.path.join('extension', 'include'),   # auxiliary .h files
-    numpy.get_include()]                    # numpy header files
+    get_numpy_include()]                    # numpy header files
 
 sources_files = glob(os.path.join('extension', 'src', '*.c'))
 
@@ -145,7 +154,8 @@
            for f in os.listdir(qdldl_dir)
            if f.endswith('.c')]
 cfiles += [os.path.join(qdldl_dir, 'qdldl_sources', 'src', f)
-           for f in os.listdir(os.path.join(qdldl_dir, 'qdldl_sources', 'src'))]
+           for f in os.listdir(os.path.join(qdldl_dir, 'qdldl_sources',
+                                            'src'))]
 osqp_codegen_sources_c_dir = os.path.join(osqp_codegen_sources_dir, 'src')
 if os.path.exists(osqp_codegen_sources_c_dir):  # Create destination directory
     sh.rmtree(osqp_codegen_sources_c_dir)
@@ -156,14 +166,16 @@
 # List with OSQP H files
 hfiles = [os.path.join(osqp_dir, 'include', f)
           for f in os.listdir(os.path.join(osqp_dir, 'include'))
-          if f.endswith('.h') and f not in ('qdldl_types.h', 'osqp_configure.h',
+          if f.endswith('.h') and f not in ('qdldl_types.h',
+                                            'osqp_configure.h',
                                             'cs.h', 'ctrlc.h', 'polish.h',
                                             'lin_sys.h')]
 hfiles += [os.path.join(qdldl_dir, f)
            for f in os.listdir(qdldl_dir)
            if f.endswith('.h')]
 hfiles += [os.path.join(qdldl_dir, 'qdldl_sources', 'include', f)
-           for f in os.listdir(os.path.join(qdldl_dir, 'qdldl_sources', 'include'))
+           for f in os.listdir(os.path.join(qdldl_dir, 'qdldl_sources',
+                                            'include'))
            if f.endswith('.h')]
 osqp_codegen_sources_h_dir = os.path.join(osqp_codegen_sources_dir, 'include')
 if os.path.exists(osqp_codegen_sources_h_dir):  # Create destination directory
@@ -174,17 +186,21 @@
 
 # List with OSQP configure files
 configure_files = [os.path.join(osqp_dir, 'configure', 'osqp_configure.h.in'),
-                   os.path.join(qdldl_dir, 'qdldl_sources', 'configure', 'qdldl_types.h.in')]
-osqp_codegen_sources_configure_dir = os.path.join(osqp_codegen_sources_dir, 'configure')
-if os.path.exists(osqp_codegen_sources_configure_dir):  # Create destination directory
+                   os.path.join(qdldl_dir, 'qdldl_sources', 'configure',
+                                'qdldl_types.h.in')]
+osqp_codegen_sources_configure_dir = os.path.join(osqp_codegen_sources_dir,
+                                                  'configure')
+if os.path.exists(osqp_codegen_sources_configure_dir):
     sh.rmtree(osqp_codegen_sources_configure_dir)
 os.makedirs(osqp_codegen_sources_configure_dir)
 for f in configure_files:  # Copy configure files
     copy(f, osqp_codegen_sources_configure_dir)
 
 # Copy cmake files
-copy(os.path.join(osqp_dir, 'src',     'CMakeLists.txt'), osqp_codegen_sources_c_dir)
-copy(os.path.join(osqp_dir, 'include', 'CMakeLists.txt'), osqp_codegen_sources_h_dir)
+copy(os.path.join(osqp_dir, 'src',     'CMakeLists.txt'),
+     osqp_codegen_sources_c_dir)
+copy(os.path.join(osqp_dir, 'include', 'CMakeLists.txt'),
+     osqp_codegen_sources_h_dir)
 
 
 class build_ext_osqp(build_ext):
@@ -239,6 +255,7 @@ def readme():
     with open('README.rst') as f:
         return f.read()
 
+
 with open('requirements.txt') as f:
     requirements = f.read().splitlines()
 
@@ -251,7 +268,7 @@ def readme():
       package_dir={'osqp': 'module',
                    'osqppurepy': 'modulepurepy'},
       include_package_data=True,  # Include package data from MANIFEST.in
-      setup_requires=["numpy >= 1.7"],
+      setup_requires=["numpy >= 1.7", "qdldl"],
       install_requires=requirements,
       license='Apache 2.0',
       url="https://osqp.org/",

From 129a2c17aff7ac351a9b2b9040c617862b050216 Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 7 Aug 2020 00:20:19 -0400
Subject: [PATCH 18/19] Added symmetrization + iterative refinement

---
 .gitignore                      |   1 +
 conda-recipe/meta.yaml          |   4 +-
 module/interface.py             | 153 +++++----------------
 module/tests/derivative_test.py | 227 +++++++++++++++-----------------
 4 files changed, 146 insertions(+), 239 deletions(-)

diff --git a/.gitignore b/.gitignore
index 74b8cea3..f536e2d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,3 +122,4 @@ ENV/
 .gdb_history
 .cquery_cached_index/
 pip-wheel-metadata/
+scratch/
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 4c06a113..5ece40c0 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -23,7 +23,7 @@ requirements:
     - python {{ python }}
     - numpy {{ numpy }}
     - pip
-    - qdldl
+    - qdldl-python
   build:
     - {{ compiler('c') }}
     - cmake
@@ -31,7 +31,7 @@ requirements:
     - python
     - {{ pin_compatible('numpy') }}
     - scipy >=0.13.2
-    - qdldl
+    - qdldl-python
 
 test:
   requires:
diff --git a/module/interface.py b/module/interface.py
index d5ef56f1..cf0cdde0 100644
--- a/module/interface.py
+++ b/module/interface.py
@@ -6,7 +6,7 @@
 import osqp._osqp as _osqp  # Internal low level module
 import numpy as np
 import scipy.sparse as spa
-import scipy.sparse.linalg as sla
+from warnings import warn
 from platform import system
 import osqp.codegen as cg
 import osqp.utils as utils
@@ -136,7 +136,6 @@ def update(self, q=None, l=None, u=None,
         if "results" in self._derivative_cache.keys():
             del self._derivative_cache["results"]
 
-
     def update_settings(self, **kwargs):
         """
         Update OSQP solver settings
@@ -314,6 +313,25 @@ def codegen(self, folder, project_type='', parameters='vectors',
         cg.codegen(work, folder, python_ext_name, project_type,
                    embedded, force_rewrite, float_flag, long_flag)
 
+    def derivative_iterative_refinement(self, rhs, max_iter=20, tol=1e-12):
+        M = self._derivative_cache['M']
+
+        # Prefactor
+        solver = self._derivative_cache['solver']
+
+        sol = solver.solve(rhs)
+        for k in range(max_iter):
+            delta_sol = solver.solve(rhs - M @ sol)
+            sol = sol + delta_sol
+
+            if np.linalg.norm(M @ sol - rhs) < tol:
+                break
+
+        if k == max_iter - 1:
+            warn("max_iter iterative refinement reached.")
+
+        return sol
+
     def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
                            P_idx=None, A_idx=None):
         """
@@ -354,31 +372,33 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
 
         # Make sure M matrix exists
         if 'M' not in self._derivative_cache:
-            # Multiply second-third column by diag(y_u) and diag(y_l)
+            # Multiply second-third row by diag(y_u)^-1 and diag(y_l)^-1
             # to make the matrix symmetric
+            inv_dia_y_u = spa.diags(np.reciprocal(y_u + 1e-20))
+            inv_dia_y_l = spa.diags(np.reciprocal(y_l + 1e-20))
             M = spa.bmat([
-                [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
-                [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
-                [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
-            ]).tocsc()
+                [P,            A.T,                  -A.T],
+                [A, spa.diags(A @ x - u) @ inv_dia_y_u, None],
+                [-A, None, spa.diags(l - A @ x) @ inv_dia_y_l]
+            ], format='csc')
+            delta = spa.bmat([[0.001 * spa.eye(n), None],
+                              [None, -0.001 * spa.eye(2 * m)]], format='csc')
             self._derivative_cache['M'] = M
+            self._derivative_cache['solver'] = qdldl.Solver(M + delta)
 
-        # Normalized rhs to compensate for diag(y_u) and diag(y_l) above
-        d_sol = np.concatenate([dx,
-                                spa.diags(y_u) @ dy_u,
-                                spa.diags(y_l) @ dy_l])
+        rhs = - np.concatenate([dx, dy_u, dy_l])
 
-        r_sol = - qdldl.Solver(self._derivative_cache['M']).solve(d_sol)
+        r_sol = self.derivative_iterative_refinement(rhs)
 
         r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
 
         # Extract derivatives for the constraints
         rows, cols = A_idx
         dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
-            (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
+            (r_yu[rows] - r_yl[rows]) * x[cols]
         dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
-        du = - y_u * r_yu
-        dl = y_l * r_yl
+        du = - r_yu
+        dl = r_yl
 
         # Extract derivatives for the cost (P, q)
         rows, cols = P_idx
@@ -387,106 +407,3 @@ def adjoint_derivative(self, dx=None, dy_u=None, dy_l=None,
         dq = r_x
 
         return (dP, dq, dA, dl, du)
-
-        #  if 'active' not in diff_mode:
-        #      # Make sure M matrix exists
-        #      if 'M' not in self._derivative_cache:
-        #
-        #          # Multiply second-third column by diag(y_u) and diag(y_l)
-        #          # to make the matrix symmetric
-        #          M = spa.bmat([
-        #              [P, A.T @ spa.diags(y_u), -A.T @ spa.diags(y_l)],
-        #              [spa.diags(y_u) @ A, spa.diags(A @ x - u), None],
-        #              [-spa.diags(y_l) @ A, None, spa.diags(l - A @ x)]
-        #          ]).tocsc()
-        #          self._derivative_cache['M'] = M
-        #
-        #      # Prepare rhs
-        #      d_sol = np.concatenate([dx, dy_u, dy_l])
-        #
-        #      # Normalized version to compensate for diag(y_u) and diag(y_l) above
-        #      d_sol = np.concatenate([dx,
-        #                              spa.diags(y_u) @ dy_u,
-        #                              spa.diags(y_l) @ dy_l])
-        #
-        #
-        #      if diff_mode == 'qdldl':
-        #          r_sol = - qdldl.Solver(self._derivative_cache['M']).solve(d_sol)
-        #      elif diff_mode == 'lsqr':
-        #          r_sol = - sla.lsqr(self._derivative_cache['M'].T, d_sol)[0]
-        #      elif diff_mode == 'lu':
-        #          r_sol = - sla.spsolve(self._derivative_cache['M'].T, d_sol)
-        #      elif diff_mode == 'ldl':
-        #          factor = cholesky(self._derivative_cache['M'].T.tocsc())
-        #          r_sol = - factor(d_sol)
-        #      elif diff_mode == 'qr':
-        #          # TODO: Add something like https://github.com/oxfordcontrol/osqpth/pull/5
-        #          # but use slack variables too
-        #          raise NotImplementedError
-        #      else:
-        #          raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
-        #
-        #      r_x, r_yu, r_yl = np.split(r_sol, [n, n+m])
-        #
-        #      #  print("r_yu = ", r_yu)
-        #      #  print("r_yl = ", r_yl)
-        #
-        #      #  # Restore normalization
-        #      #  r_yu = spa.diags(y_u) @ r_yu
-        #      #  r_yl = spa.diags(y_l) @ r_yl
-        #
-        #      # Extract derivatives for the constraints
-        #      rows, cols = A_idx
-        #      dA_vals = (y_u[rows] - y_l[rows]) * r_x[cols] + \
-        #          (y_u[rows] * r_yu[rows] - y_l[rows] * r_yl[rows]) * x[cols]
-        #      dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
-        #      du = - y_u * r_yu
-        #      dl = y_l * r_yl
-        #
-        #  elif diff_mode == 'lu_active':
-        #      # Taken from https://github.com/oxfordcontrol/osqp-python/blob/0363d028b2321017049360d2eb3c0cf206028c43/modulepurepy/_osqp.py#L1717
-        #      # Guess which linear constraints are lower-active, upper-active, free
-        #      ind_low = np.where(z - l < - y)[0]
-        #      ind_upp = np.where(u - z < y)[0]
-        #      n_low = len(ind_low)
-        #      n_upp = len(ind_upp)
-        #
-        #      # Form A_red from the assumed active constraints
-        #      A_red = spa.vstack([A[ind_low], A[ind_upp]])
-        #
-        #      # Form reduced dy
-        #      dy_red = np.concatenate([dy_l, dy_u])[np.concatenate([ind_low, ind_upp])]
-        #
-        #      # Form KKT linear system
-        #      KKT = spa.vstack([spa.hstack([P, A_red.T]),
-        #                      spa.hstack([A_red, spa.csc_matrix((n_low + n_upp, n_low + n_upp))])], format='csc')
-        #      rhs = - np.hstack([dx, dy_red])
-        #
-        #      # Get solution
-        #      r_sol = sla.spsolve(KKT, rhs)
-        #      r_x, r_yl, r_yu =  np.split(r_sol, [n, n + n_low])
-        #
-        #      r_y = np.zeros(m)
-        #      r_y[ind_low] = r_yl
-        #      r_y[ind_upp] = r_yu
-        #
-        #
-        #      # Extract derivatives for the constraints A, l, u
-        #      dl = - np.hstack([r_yl[np.where(ind_low == j)[0]] if j in ind_low else 0
-        #          for j in range(m)])
-        #      du = - np.hstack([r_yu[np.where(ind_upp == j)[0]] if j in ind_upp else 0
-        #          for j in range(m)])
-        #      rows, cols = A_idx
-        #      dA_vals = y[rows] * r_x[cols] + r_y[rows] * x[cols]
-        #      dA = spa.csc_matrix((dA_vals, (rows, cols)), shape=A.shape)
-        #
-        #  else:
-        #      raise RuntimeError("Unrecognized differentiation mode: {}".format(diff_mode))
-        #
-        #  # Extract derivatives for the cost (P, q)
-        #  rows, cols = P_idx
-        #  dP_vals = .5 * (r_x[rows] * x[cols] + r_x[cols] * x[rows])
-        #  dP = spa.csc_matrix((dP_vals, P_idx), shape=P.shape)
-        #  dq = r_x
-        #
-        #  return (dP, dq, dA, dl, du)
diff --git a/module/tests/derivative_test.py b/module/tests/derivative_test.py
index 13069637..2bff645c 100644
--- a/module/tests/derivative_test.py
+++ b/module/tests/derivative_test.py
@@ -4,7 +4,7 @@
 import numpy as np
 import numpy.random as npr
 from scipy import sparse
-from scipy.optimize import check_grad, approx_fprime
+from scipy.optimize import approx_fprime
 import numpy.testing as npt
 
 # Unit Test
@@ -18,7 +18,7 @@
 eps_abs = 1e-10
 eps_rel = 1e-10
 max_iter = 10000
-grad_precision = 1e-05
+grad_precision = 1e-08
 
 
 class derivative_tests(unittest.TestCase):
@@ -36,7 +36,7 @@ def get_prob(self, n=10, m=3, P_scale=1., A_scale=1.):
 
         return [P, q,  A, l, u, true_x]
 
-    def get_grads(self, P, q, A, l, u, true_x, diff_mode=diff_modes[0]):
+    def get_grads(self, P, q, A, l, u, true_x):
         # Get gradients by solving with osqp
         m = osqp.OSQP()
         m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
@@ -44,173 +44,162 @@ def get_grads(self, P, q, A, l, u, true_x, diff_mode=diff_modes[0]):
         if results.info.status != "solved":
             raise ValueError("Problem not solved!")
         x = results.x
-        grads = m.adjoint_derivative(dx=x - true_x, diff_mode=diff_mode)
+        grads = m.adjoint_derivative(dx=x - true_x)
 
         return grads
 
     def test_dl_dq(self, verbose=True):
         n, m = 5, 5
-        for diff_mode in diff_modes:
 
-            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
-            P, q, A, l, u, true_x = prob
+        prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+        P, q, A, l, u, true_x = prob
 
-            def grad(q):
-                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
-                return dq
+        def grad(q):
+            [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x)
+            return dq
 
-            def f(q):
-                m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
-                res = m.solve()
-                if res.info.status != "solved":
-                    raise ValueError("Problem not solved!")
-                x_hat = res.x
+        def f(q):
+            m = osqp.OSQP()
+            m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
+            res = m.solve()
+            if res.info.status != "solved":
+                raise ValueError("Problem not solved!")
+            x_hat = res.x
 
-                return 0.5 * np.sum(np.square(x_hat - true_x))
+            return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dq = grad(q)
-            dq_fd = approx_fprime(q, f, grad_precision)
+        dq = grad(q)
+        dq_fd = approx_fprime(q, f, grad_precision)
 
-            if verbose:
-                print('--- ' + diff_mode)
-                print('dq_fd: ', np.round(dq_fd, decimals=4))
-                print('dq: ', np.round(dq, decimals=4))
-
-            npt.assert_allclose(dq_fd, dq, rtol=RTOL, atol=ATOL)
+        if verbose:
+            print('dq_fd: ', np.round(dq_fd, decimals=4))
+            print('dq: ', np.round(dq, decimals=4))
 
+        npt.assert_allclose(dq_fd, dq, rtol=RTOL, atol=ATOL)
 
     def test_dl_dP(self, verbose=True):
         n, m = 3, 3
-        for diff_mode in diff_modes:
 
-            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
-            P, q, A, l, u, true_x = prob
-            P_idx = P.nonzero()
+        prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+        P, q, A, l, u, true_x = prob
+        P_idx = P.nonzero()
 
-            def grad(P_val):
-                P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
-                [dP, dq, dA, dl, du] = self.get_grads(P_qp, q, A, l, u, true_x, diff_mode=diff_mode)
-                return dP
+        def grad(P_val):
+            P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
+            [dP, dq, dA, dl, du] = self.get_grads(P_qp, q, A, l, u, true_x)
+            return dP
 
-            def f(P_val):
-                P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
-                m = osqp.OSQP()
-                m.setup(P_qp, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
-                res = m.solve()
-                if res.info.status != "solved":
-                    raise ValueError("Problem not solved!")
-                x_hat = res.x
+        def f(P_val):
+            P_qp = sparse.csc_matrix((P_val, P_idx), shape=P.shape)
+            m = osqp.OSQP()
+            m.setup(P_qp, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
+            res = m.solve()
+            if res.info.status != "solved":
+                raise ValueError("Problem not solved!")
+            x_hat = res.x
 
-                return 0.5 * np.sum(np.square(x_hat - true_x))
+            return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dP = grad(P.data)
-            dP_fd_val = approx_fprime(P.data, f, grad_precision)
-            dP_fd = sparse.csc_matrix((dP_fd_val, P_idx), shape=P.shape)
-            dP_fd = (dP_fd + dP_fd.T)/2
+        dP = grad(P.data)
+        dP_fd_val = approx_fprime(P.data, f, grad_precision)
+        dP_fd = sparse.csc_matrix((dP_fd_val, P_idx), shape=P.shape)
+        dP_fd = (dP_fd + dP_fd.T)/2
 
-            if verbose:
-                print('--- ' + diff_mode)
-                print('dP_fd: ', np.round(dP_fd.data, decimals=4))
-                print('dA: ', np.round(dP.data, decimals=4))
+        if verbose:
+            print('dP_fd: ', np.round(dP_fd.data, decimals=4))
+            print('dA: ', np.round(dP.data, decimals=4))
 
-            npt.assert_allclose(dP.todense(), dP_fd.todense(), rtol=RTOL, atol=ATOL)
+        npt.assert_allclose(dP.todense(), dP_fd.todense(), rtol=RTOL, atol=ATOL)
 
 
     def test_dl_dA(self, verbose=True):
         n, m = 3, 3
-        for diff_mode in diff_modes:
 
-            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
-            P, q, A, l, u, true_x = prob
-            A_idx = A.nonzero()
+        prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+        P, q, A, l, u, true_x = prob
+        A_idx = A.nonzero()
 
-            def grad(A_val):
-                A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
-                [dP, dq, dA, dl, du] = self.get_grads(P, q, A_qp, l, u, true_x, diff_mode=diff_mode)
-                return dA
+        def grad(A_val):
+            A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
+            [dP, dq, dA, dl, du] = self.get_grads(P, q, A_qp, l, u, true_x)
+            return dA
 
-            def f(A_val):
-                A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
-                m = osqp.OSQP()
-                m.setup(P, q, A_qp, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
-                res = m.solve()
-                if res.info.status != "solved":
-                    raise ValueError("Problem not solved!")
-                x_hat = res.x
+        def f(A_val):
+            A_qp = sparse.csc_matrix((A_val, A_idx), shape=A.shape)
+            m = osqp.OSQP()
+            m.setup(P, q, A_qp, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
+            res = m.solve()
+            if res.info.status != "solved":
+                raise ValueError("Problem not solved!")
+            x_hat = res.x
 
-                return 0.5 * np.sum(np.square(x_hat - true_x))
+            return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dA = grad(A.data)
-            dA_fd_val = approx_fprime(A.data, f, grad_precision)
-            dA_fd = sparse.csc_matrix((dA_fd_val, A_idx), shape=A.shape)
+        dA = grad(A.data)
+        dA_fd_val = approx_fprime(A.data, f, grad_precision)
+        dA_fd = sparse.csc_matrix((dA_fd_val, A_idx), shape=A.shape)
 
-            if verbose:
-                print('--- ' + diff_mode)
-                print('dA_fd: ', np.round(dA_fd.data, decimals=4))
-                print('dA: ', np.round(dA.data, decimals=4))
+        if verbose:
+            print('dA_fd: ', np.round(dA_fd.data, decimals=4))
+            print('dA: ', np.round(dA.data, decimals=4))
 
-            npt.assert_allclose(dA.todense(), dA_fd.todense(), rtol=RTOL, atol=ATOL)
+        npt.assert_allclose(dA.todense(), dA_fd.todense(), rtol=RTOL, atol=ATOL)
 
     def test_dl_dl(self, verbose=True):
         n, m = 10, 10
-        for diff_mode in diff_modes:
 
-            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
-            P, q, A, l, u, true_x = prob
+        prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+        P, q, A, l, u, true_x = prob
 
-            def grad(l):
-                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
-                return dl
+        def grad(l):
+            [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x)
+            return dl
 
-            def f(l):
-                m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
-                res = m.solve()
-                if res.info.status != "solved":
-                    raise ValueError("Problem not solved!")
-                x_hat = res.x
+        def f(l):
+            m = osqp.OSQP()
+            m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
+            res = m.solve()
+            if res.info.status != "solved":
+                raise ValueError("Problem not solved!")
+            x_hat = res.x
 
-                return 0.5 * np.sum(np.square(x_hat - true_x))
+            return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            dl = grad(l)
-            dl_fd = approx_fprime(l, f, grad_precision)
+        dl = grad(l)
+        dl_fd = approx_fprime(l, f, grad_precision)
 
-            if verbose:
-                print('--- ' + diff_mode)
-                print('dl_fd: ', np.round(dl_fd, decimals=4))
-                print('dl: ', np.round(dl, decimals=4))
+        if verbose:
+            print('dl_fd: ', np.round(dl_fd, decimals=4))
+            print('dl: ', np.round(dl, decimals=4))
 
-            npt.assert_allclose(dl_fd, dl, rtol=RTOL, atol=ATOL)
+        npt.assert_allclose(dl_fd, dl, rtol=RTOL, atol=ATOL)
 
     def test_dl_du(self, verbose=True):
         n, m = 5, 5
-        for diff_mode in diff_modes:
 
-            prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
-            P, q, A, l, u, true_x = prob
+        prob = self.get_prob(n=n, m=m, P_scale=100., A_scale=100.)
+        P, q, A, l, u, true_x = prob
 
-            def grad(u):
-                [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x, diff_mode=diff_mode)
-                return du
+        def grad(u):
+            [dP, dq, dA, dl, du] = self.get_grads(P, q, A, l, u, true_x)
+            return du
 
-            def f(u):
-                m = osqp.OSQP()
-                m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
-                res = m.solve()
-                if res.info.status != "solved":
-                    raise ValueError("Problem not solved!")
-                x_hat = res.x
+        def f(u):
+            m = osqp.OSQP()
+            m.setup(P, q, A, l, u, eps_abs=eps_abs, eps_rel=eps_rel, max_iter=max_iter, verbose=False)
+            res = m.solve()
+            if res.info.status != "solved":
+                raise ValueError("Problem not solved!")
+            x_hat = res.x
 
-                return 0.5 * np.sum(np.square(x_hat - true_x))
+            return 0.5 * np.sum(np.square(x_hat - true_x))
 
-            du = grad(u)
-            du_fd = approx_fprime(u, f, grad_precision)
+        du = grad(u)
+        du_fd = approx_fprime(u, f, grad_precision)
 
-            if verbose:
-                print('--- ' + diff_mode)
-                print('du_fd: ', np.round(du_fd, decimals=4))
-                print('du: ', np.round(du, decimals=4))
+        if verbose:
+            print('du_fd: ', np.round(du_fd, decimals=4))
+            print('du: ', np.round(du, decimals=4))
 
-            npt.assert_allclose(du_fd, du, rtol=RTOL, atol=ATOL)
+        npt.assert_allclose(du_fd, du, rtol=RTOL, atol=ATOL)
 

From 46dff11df697249f3ad3f06ef8d057379887164e Mon Sep 17 00:00:00 2001
From: Bartolomeo Stellato <bartolomeo.stellato@gmail.com>
Date: Fri, 7 Aug 2020 00:31:14 -0400
Subject: [PATCH 19/19] ci

---
 .travis.yml                          | 2 ++
 appveyor.yml                         | 4 ++--
 conda-recipe/conda_build_config.yaml | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 309f2a51..7c062f65 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -49,6 +49,8 @@ install:
   - bash miniconda.sh -b -p $HOME/miniconda
   - source $HOME/miniconda/bin/activate
   - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
+  - conda config --add channels conda-forge
+  - conda config --set channel_priority strict
   - conda create -n testenv python=$PYTHON_VERSION pytest numpy
   - source activate testenv
   - conda install conda conda-verify conda-build anaconda-client twine
diff --git a/appveyor.yml b/appveyor.yml
index fc75b454..6dfe580a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,5 @@
 platform:
   - x64
-  - x86
 
 image: Visual Studio 2015
 
@@ -35,7 +34,8 @@ install:
   - if "%PLATFORM%"=="x64" set MINICONDA=C:\Miniconda3-x64
   # Load the conda root environment, configure and install some packages
   - '"%MINICONDA%\Scripts\activate.bat"'
-  - conda config --append channels conda-forge
+  - conda config --add channels conda-forge
+  - conda config --set channel_priority strict
   - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
   - conda create -n testenv python=%PYTHON_VERSION% pytest numpy cmake conda conda-verify conda-build anaconda-client twine
   - activate testenv
diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml
index b43b49a6..70916fa3 100644
--- a/conda-recipe/conda_build_config.yaml
+++ b/conda-recipe/conda_build_config.yaml
@@ -1,5 +1,5 @@
-numpy:
-  - 1.11
+# numpy:
+#   - 1.11
 
 CONDA_BUILD_SYSROOT:
   - /var/tmp/MacOSX-SDKs/MacOSX10.9.sdk   # [osx]