diff --git a/skglm/experimental/sqrt_lasso.py b/skglm/experimental/sqrt_lasso.py
index 97c10105d..55ba908ae 100644
--- a/skglm/experimental/sqrt_lasso.py
+++ b/skglm/experimental/sqrt_lasso.py
@@ -101,10 +101,13 @@ class SqrtLasso(LinearModel, RegressorMixin):
 
     verbose : bool, default False
         Amount of verbosity. 0/False is silent.
+
+    fit_intercept: bool, optional (default=True)
+        Whether or not to fit an intercept.
     """
 
     def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
-                 tol=1e-4, verbose=0):
+                 tol=1e-4, verbose=0, fit_intercept=True):
         super().__init__()
         self.alpha = alpha
         self.max_iter = max_iter
@@ -113,6 +116,7 @@ def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10,
         self.p0 = p0
         self.tol = tol
         self.verbose = verbose
+        self.fit_intercept = fit_intercept
 
     def fit(self, X, y):
         """Fit the model according to the given training data.
@@ -132,7 +136,10 @@ def fit(self, X, y):
             Fitted estimator.
         """
         self.coef_ = self.path(X, y, alphas=[self.alpha])[1][0]
-        self.intercept_ = 0.  # TODO handle fit_intercept
+        if self.fit_intercept:
+            self.intercept_ = self.coef_[-1]
+        else:
+            self.intercept_ = 0.
         return self
 
     def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
@@ -169,7 +176,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         if not hasattr(self, "solver_"):
             self.solver_ = ProxNewton(
                 tol=self.tol, max_iter=self.max_iter, verbose=self.verbose,
-                fit_intercept=False)
+                fit_intercept=self.fit_intercept)
         # build path
         if alphas is None:
             alpha_max = norm(X.T @ y, ord=np.inf) / (np.sqrt(len(y)) * norm(y))
@@ -182,7 +189,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
         sqrt_quadratic = compiled_clone(SqrtQuadratic())
         l1_penalty = compiled_clone(L1(1.))  # alpha is set along the path
 
-        coefs = np.zeros((n_alphas, n_features))
+        coefs = np.zeros((n_alphas, n_features + self.fit_intercept))
 
         for i in range(n_alphas):
             if self.verbose:
@@ -193,12 +200,14 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
 
             l1_penalty.alpha = alphas[i]
             # no warm start for the first alpha
-            coef_init = coefs[i].copy() if i else np.zeros(n_features)
+            coef_init = coefs[i].copy() if i else np.zeros(n_features
+                                                           + self.fit_intercept)
 
             try:
                 coef, _, _ = self.solver_.solve(
                     X, y, sqrt_quadratic, l1_penalty,
-                    w_init=coef_init, Xw_init=X @ coef_init)
+                    w_init=coef_init, Xw_init=X @ coef_init[:-1] + coef_init[-1]
+                    if self.fit_intercept else X @ coef_init)
                 coefs[i] = coef
             except ValueError as val_exception:
                 # make sure to catch residual error
@@ -209,7 +218,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
                 # save coef despite not converging
                 # coef_init holds a ref to coef
                 coef = coef_init
-                res_norm = norm(y - X @ coef)
+                X_coef = X @ coef[:-1] + coef[-1] if self.fit_intercept else X @ coef
+                res_norm = norm(y - X_coef)
                 warnings.warn(
                     f"Small residuals prevented the solver from converging "
                     f"at alpha={alphas[i]:.2e} (residuals' norm: {res_norm:.4e}). "
diff --git a/skglm/experimental/tests/test_sqrt_lasso.py b/skglm/experimental/tests/test_sqrt_lasso.py
index 91722abea..ae0d77935 100644
--- a/skglm/experimental/tests/test_sqrt_lasso.py
+++ b/skglm/experimental/tests/test_sqrt_lasso.py
@@ -16,7 +16,10 @@ def test_alpha_max():
 
     sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y)
 
-    np.testing.assert_equal(sqrt_lasso.coef_, 0)
+    if sqrt_lasso.fit_intercept:
+        np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0)
+    else:
+        np.testing.assert_equal(sqrt_lasso.coef_, 0)
 
 
 def test_vs_statsmodels():
@@ -31,7 +34,7 @@ def test_vs_statsmodels():
     n_alphas = 3
     alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:]
 
-    sqrt_lasso = SqrtLasso(tol=1e-9)
+    sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False)
     coefs_skglm = sqrt_lasso.path(X, y, alphas)[1]
 
     coefs_statsmodels = np.zeros((len(alphas), n_features))
@@ -54,7 +57,7 @@ def test_prox_newton_cp():
 
     alpha_max = norm(X.T @ y, ord=np.inf) / norm(y)
     alpha = alpha_max / 10
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
     w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000)
     np.testing.assert_allclose(clf.coef_, w)
 
@@ -70,7 +73,7 @@ def test_PDCD_WS(with_dual_init):
     dual_init = y / norm(y) if with_dual_init else None
 
     w = PDCD_WS(dual_init=dual_init).solve(X, y, SqrtQuadratic(), L1(alpha))[0]
-    clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y)
+    clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y)
     np.testing.assert_allclose(clf.coef_, w, atol=1e-6)