Skip to content

Commit

Permalink
add UT + rename to QuadraticHessian + rm estimator class
Browse files Browse the repository at this point in the history
  • Loading branch information
mathurinm committed Nov 7, 2024
1 parent ec4be21 commit f2ac94e
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 65 deletions.
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Datafits
Poisson
Quadratic
QuadraticGroup
QuadraticHessian
QuadraticSVC
WeightedQuadratic

Expand Down Expand Up @@ -102,4 +103,4 @@ Experimental
PDCD_WS
Pinball
SqrtQuadratic
SqrtLasso
SqrtLasso
4 changes: 2 additions & 2 deletions skglm/datafits/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .base import BaseDatafit, BaseMultitaskDatafit
from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma,
Cox, WeightedQuadratic, HessianQuadratic,)
Cox, WeightedQuadratic, QuadraticHessian,)
from .multi_task import QuadraticMultiTask
from .group import QuadraticGroup, LogisticGroup

Expand All @@ -10,5 +10,5 @@
Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox,
QuadraticMultiTask,
QuadraticGroup, LogisticGroup, WeightedQuadratic,
HessianQuadratic
QuadraticHessian
]
19 changes: 9 additions & 10 deletions skglm/datafits/single_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,15 +240,16 @@ def intercept_update_step(self, y, Xw):
return np.sum(self.sample_weights * (Xw - y)) / self.sample_weights.sum()


class HessianQuadratic(BaseDatafit):
r"""_summary_
class QuadraticHessian(BaseDatafit):
r"""Quadratic datafit where we pass the Hessian A directly.
The datafit reads:
.. math:: 1 / 2 x^\\top A x + \\langle b, x \\rangle
for A symmetric
.. math:: 1 / 2 x^(\top) A x + \langle b, x \rangle
For a symmetric A. Up to a constant, it is the same as a Quadratic, with
:math:`A = 1 / (n_"samples") X^(\top)X` and :math:`b = - 1 / n_"samples" X^(\top)y`.
When the Hessian is available, this datafit is more efficient than using Quadratic.
"""

def __init__(self):
Expand All @@ -264,7 +265,7 @@ def get_lipschitz(self, A, b):
n_features = A.shape[0]
lipschitz = np.zeros(n_features, dtype=A.dtype)
for j in range(n_features):
lipschitz[j] = np.sqrt((A[:, j]**2).sum())
lipschitz[j] = A[j, j]
return lipschitz

def gradient_scalar(self, A, b, w, Ax, j):
Expand Down Expand Up @@ -887,8 +888,7 @@ def _A_dot_vec(self, vec):
for idx in range(n_H):
current_H_idx = self.H_indices[self.H_indptr[idx]: self.H_indptr[idx+1]]
size_current_H = current_H_idx.shape[0]
frac_range = np.arange(
size_current_H, dtype=vec.dtype) / size_current_H
frac_range = np.arange(size_current_H, dtype=vec.dtype) / size_current_H

sum_vec_H = np.sum(vec[current_H_idx])
out[current_H_idx] = sum_vec_H * frac_range
Expand All @@ -903,8 +903,7 @@ def _AT_dot_vec(self, vec):
for idx in range(n_H):
current_H_idx = self.H_indices[self.H_indptr[idx]: self.H_indptr[idx+1]]
size_current_H = current_H_idx.shape[0]
frac_range = np.arange(
size_current_H, dtype=vec.dtype) / size_current_H
frac_range = np.arange(size_current_H, dtype=vec.dtype) / size_current_H

weighted_sum_vec_H = vec[current_H_idx] @ frac_range
out[current_H_idx] = weighted_sum_vec_H * np.ones(size_current_H)
Expand Down
59 changes: 8 additions & 51 deletions skglm/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from skglm.utils.jit_compilation import compiled_clone
from skglm.solvers import AndersonCD, MultiTaskBCD, GroupBCD
from skglm.datafits import (Cox, Quadratic, Logistic, QuadraticSVC,
QuadraticMultiTask, QuadraticGroup, HessianQuadratic)
QuadraticMultiTask, QuadraticGroup, QuadraticHessian)
from skglm.penalties import (L1, WeightedL1, L1_plus_L2, L2, WeightedGroupL2,
MCPenalty, WeightedMCPenalty, IndicatorBox, L2_1)
from skglm.utils.data import grp_converter
Expand Down Expand Up @@ -126,8 +126,7 @@ def _glm_fit(X, y, model, datafit, penalty, solver):
w = np.zeros(n_features + fit_intercept, dtype=X_.dtype)
Xw = np.zeros(n_samples, dtype=X_.dtype)
else: # multitask
w = np.zeros((n_features + fit_intercept,
y.shape[1]), dtype=X_.dtype)
w = np.zeros((n_features + fit_intercept, y.shape[1]), dtype=X_.dtype)
Xw = np.zeros(y.shape, dtype=X_.dtype)

# check consistency of weights for WeightedL1
Expand Down Expand Up @@ -450,42 +449,6 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
return solver.path(X, y, datafit, penalty, alphas, coef_init, return_n_iter)


class L1PenalizedQP(BaseEstimator):
def __init__(self, alpha=1., max_iter=50, max_epochs=50_000, p0=10, verbose=0,
tol=1e-4, positive=False, fit_intercept=True, warm_start=False,
ws_strategy="subdiff"):
super().__init__()
self.alpha = alpha
self.tol = tol
self.max_iter = max_iter
self.max_epochs = max_epochs
self.p0 = p0
self.ws_strategy = ws_strategy
self.positive = positive
self.fit_intercept = fit_intercept
self.warm_start = warm_start
self.verbose = verbose

def fit(self, A, b):
"""Fit the model according to the given training data.
Parameters
----------
A : array-like, shape (n_features, n_features)
b : array-like, shape (n_samples,)
Returns
-------
self :
Fitted estimator.
"""
solver = AndersonCD(
self.max_iter, self.max_epochs, self.p0, tol=self.tol,
ws_strategy=self.ws_strategy, fit_intercept=self.fit_intercept,
warm_start=self.warm_start, verbose=self.verbose)
return _glm_fit(A, b, self, HessianQuadratic(), L1(self.alpha, self.positive), solver)


class WeightedLasso(LinearModel, RegressorMixin):
r"""WeightedLasso estimator based on Celer solver and primal extrapolation.
Expand Down Expand Up @@ -613,8 +576,7 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
raise ValueError("The number of weights must match the number of \
features. Got %s, expected %s." % (
len(weights), X.shape[1]))
penalty = compiled_clone(WeightedL1(
self.alpha, weights, self.positive))
penalty = compiled_clone(WeightedL1(self.alpha, weights, self.positive))
datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
solver = AndersonCD(
self.max_iter, self.max_epochs, self.p0, tol=self.tol,
Expand Down Expand Up @@ -952,8 +914,7 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **params):
f"Got {len(self.weights)}, expected {X.shape[1]}."
)
penalty = compiled_clone(
WeightedMCPenalty(self.alpha, self.gamma,
self.weights, self.positive)
WeightedMCPenalty(self.alpha, self.gamma, self.weights, self.positive)
)
datafit = compiled_clone(Quadratic(), to_float32=X.dtype == np.float32)
solver = AndersonCD(
Expand Down Expand Up @@ -1348,8 +1309,7 @@ def fit(self, X, y):
# copy/paste from https://github.com/scikit-learn/scikit-learn/blob/ \
# 23ff51c07ebc03c866984e93c921a8993e96d1f9/sklearn/utils/ \
# estimator_checks.py#L3886
raise ValueError(
"requires y to be passed, but the target y is None")
raise ValueError("requires y to be passed, but the target y is None")
y = check_array(
y,
accept_sparse=False,
Expand Down Expand Up @@ -1390,8 +1350,7 @@ def fit(self, X, y):

# init solver
if self.l1_ratio == 0.:
solver = LBFGS(max_iter=self.max_iter,
tol=self.tol, verbose=self.verbose)
solver = LBFGS(max_iter=self.max_iter, tol=self.tol, verbose=self.verbose)
else:
solver = ProxNewton(
max_iter=self.max_iter, tol=self.tol, verbose=self.verbose,
Expand Down Expand Up @@ -1529,8 +1488,7 @@ def fit(self, X, Y):
if not self.warm_start or not hasattr(self, "coef_"):
self.coef_ = None

datafit_jit = compiled_clone(
QuadraticMultiTask(), X.dtype == np.float32)
datafit_jit = compiled_clone(QuadraticMultiTask(), X.dtype == np.float32)
penalty_jit = compiled_clone(L2_1(self.alpha), X.dtype == np.float32)

solver = MultiTaskBCD(
Expand Down Expand Up @@ -1710,8 +1668,7 @@ def fit(self, X, y):
"The total number of group members must equal the number of features. "
f"Got {n_features}, expected {X.shape[1]}.")

weights = np.ones(
len(group_sizes)) if self.weights is None else self.weights
weights = np.ones(len(group_sizes)) if self.weights is None else self.weights
group_penalty = WeightedGroupL2(alpha=self.alpha, grp_ptr=grp_ptr,
grp_indices=grp_indices, weights=weights,
positive=self.positive)
Expand Down
20 changes: 19 additions & 1 deletion skglm/tests/test_datafits.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from numpy.testing import assert_allclose, assert_array_less

from skglm.datafits import (Huber, Logistic, Poisson, Gamma, Cox, WeightedQuadratic,
Quadratic,)
Quadratic, QuadraticHessian)
from skglm.penalties import L1, WeightedL1
from skglm.solvers import AndersonCD, ProxNewton
from skglm import GeneralizedLinearEstimator
Expand Down Expand Up @@ -219,5 +219,23 @@ def test_sample_weights(fit_intercept):
# np.testing.assert_equal(n_iter, n_iter_overs)


def test_HessianQuadratic():
n_samples = 20
n_features = 10
X, y, _ = make_correlated_data(
n_samples=n_samples, n_features=n_features, random_state=0)
A = X.T @ X / n_samples
b = -X.T @ y / n_samples
alpha = np.max(np.abs(b)) / 10

pen = L1(alpha)
solv = AndersonCD(warm_start=False, verbose=2, fit_intercept=False)
lasso = GeneralizedLinearEstimator(Quadratic(), pen, solv).fit(X, y)
qpl1 = GeneralizedLinearEstimator(QuadraticHessian(), pen, solv).fit(A, b)

np.testing.assert_allclose(lasso.coef_, qpl1.coef_)
# check that it's not just because we got alpha too high and thus 0 coef
np.testing.assert_array_less(0.1, np.max(np.abs(qpl1.coef_)))

if __name__ == '__main__':
pass

0 comments on commit f2ac94e

Please sign in to comment.