diff --git a/skglm/experimental/sqrt_lasso.py b/skglm/experimental/sqrt_lasso.py index 97c10105d..55ba908ae 100644 --- a/skglm/experimental/sqrt_lasso.py +++ b/skglm/experimental/sqrt_lasso.py @@ -101,10 +101,13 @@ class SqrtLasso(LinearModel, RegressorMixin): verbose : bool, default False Amount of verbosity. 0/False is silent. + + fit_intercept: bool, optional (default=True) + Whether or not to fit an intercept. """ def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10, - tol=1e-4, verbose=0): + tol=1e-4, verbose=0, fit_intercept=True): super().__init__() self.alpha = alpha self.max_iter = max_iter @@ -113,6 +116,7 @@ def __init__(self, alpha=1., max_iter=100, max_pn_iter=100, p0=10, self.p0 = p0 self.tol = tol self.verbose = verbose + self.fit_intercept = fit_intercept def fit(self, X, y): """Fit the model according to the given training data. @@ -132,7 +136,10 @@ def fit(self, X, y): Fitted estimator. """ self.coef_ = self.path(X, y, alphas=[self.alpha])[1][0] - self.intercept_ = 0. # TODO handle fit_intercept + if self.fit_intercept: + self.intercept_ = self.coef_[-1] + else: + self.intercept_ = 0. return self def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10): @@ -169,7 +176,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10): if not hasattr(self, "solver_"): self.solver_ = ProxNewton( tol=self.tol, max_iter=self.max_iter, verbose=self.verbose, - fit_intercept=False) + fit_intercept=self.fit_intercept) # build path if alphas is None: alpha_max = norm(X.T @ y, ord=np.inf) / (np.sqrt(len(y)) * norm(y)) @@ -182,7 +189,7 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10): sqrt_quadratic = compiled_clone(SqrtQuadratic()) l1_penalty = compiled_clone(L1(1.)) # alpha is set along the path - coefs = np.zeros((n_alphas, n_features)) + coefs = np.zeros((n_alphas, n_features + self.fit_intercept)) for i in range(n_alphas): if self.verbose: @@ -193,12 +200,14 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10): l1_penalty.alpha = alphas[i] # no warm start for the first alpha - coef_init = coefs[i].copy() if i else np.zeros(n_features) + coef_init = coefs[i].copy() if i else np.zeros(n_features + + self.fit_intercept) try: coef, _, _ = self.solver_.solve( X, y, sqrt_quadratic, l1_penalty, - w_init=coef_init, Xw_init=X @ coef_init) + w_init=coef_init, Xw_init=X @ coef_init[:-1] + coef_init[-1] + if self.fit_intercept else X @ coef_init) coefs[i] = coef except ValueError as val_exception: # make sure to catch residual error @@ -209,7 +218,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10): # save coef despite not converging # coef_init holds a ref to coef coef = coef_init - res_norm = norm(y - X @ coef) + X_coef = X @ coef[:-1] + coef[-1] if self.fit_intercept else X @ coef + res_norm = norm(y - X_coef) warnings.warn( f"Small residuals prevented the solver from converging " f"at alpha={alphas[i]:.2e} (residuals' norm: {res_norm:.4e}). " diff --git a/skglm/experimental/tests/test_sqrt_lasso.py b/skglm/experimental/tests/test_sqrt_lasso.py index 91722abea..ae0d77935 100644 --- a/skglm/experimental/tests/test_sqrt_lasso.py +++ b/skglm/experimental/tests/test_sqrt_lasso.py @@ -16,7 +16,10 @@ def test_alpha_max(): sqrt_lasso = SqrtLasso(alpha=alpha_max).fit(X, y) - np.testing.assert_equal(sqrt_lasso.coef_, 0) + if sqrt_lasso.fit_intercept: + np.testing.assert_equal(sqrt_lasso.coef_[:-1], 0) + else: + np.testing.assert_equal(sqrt_lasso.coef_, 0) def test_vs_statsmodels(): @@ -31,7 +34,7 @@ def test_vs_statsmodels(): n_alphas = 3 alphas = alpha_max * np.geomspace(1, 1e-2, n_alphas+1)[1:] - sqrt_lasso = SqrtLasso(tol=1e-9) + sqrt_lasso = SqrtLasso(tol=1e-9, fit_intercept=False) coefs_skglm = sqrt_lasso.path(X, y, alphas)[1] coefs_statsmodels = np.zeros((len(alphas), n_features)) @@ -54,7 +57,7 @@ def test_prox_newton_cp(): alpha_max = norm(X.T @ y, ord=np.inf) / norm(y) alpha = alpha_max / 10 - clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y) + clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y) w, _, _ = _chambolle_pock_sqrt(X, y, alpha, max_iter=1000) np.testing.assert_allclose(clf.coef_, w) @@ -70,7 +73,7 @@ def test_PDCD_WS(with_dual_init): dual_init = y / norm(y) if with_dual_init else None w = PDCD_WS(dual_init=dual_init).solve(X, y, SqrtQuadratic(), L1(alpha))[0] - clf = SqrtLasso(alpha=alpha, tol=1e-12).fit(X, y) + clf = SqrtLasso(alpha=alpha, fit_intercept=False, tol=1e-12).fit(X, y) np.testing.assert_allclose(clf.coef_, w, atol=1e-6)