From 29c3f6892387c738bed56e8e75ca42ab0c25499e Mon Sep 17 00:00:00 2001 From: Matteo Interlandi Date: Fri, 4 Nov 2022 14:31:51 -0700 Subject: [PATCH] added support for Tweedie, Poisson and Gamma regressors (#650) --- .../_linear_implementations.py | 6 +++- .../ml/operator_converters/sklearn/linear.py | 10 +++++- hummingbird/ml/supported.py | 9 +++++ tests/test_sklearn_linear_converter.py | 33 +++++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/hummingbird/ml/operator_converters/_linear_implementations.py b/hummingbird/ml/operator_converters/_linear_implementations.py index 80c34b2b4..3418696f2 100644 --- a/hummingbird/ml/operator_converters/_linear_implementations.py +++ b/hummingbird/ml/operator_converters/_linear_implementations.py @@ -32,7 +32,9 @@ def __init__( self.multi_class = multi_class self.regression = is_linear_regression self.classification = not is_linear_regression - self.loss = loss if loss is not None else "log" + self.loss = loss + if self.loss is None and self.classification: + self.loss = "log" self.perform_class_select = False if min(classes) != 0 or max(classes) != len(classes) - 1: @@ -48,6 +50,8 @@ def forward(self, x): if self.multi_class == "multinomial": output = torch.softmax(output, dim=1) elif self.regression: + if self.loss == "log": + return torch.exp(output) return output else: if self.loss == "modified_huber": diff --git a/hummingbird/ml/operator_converters/sklearn/linear.py b/hummingbird/ml/operator_converters/sklearn/linear.py index 232476520..bb3d21a7d 100644 --- a/hummingbird/ml/operator_converters/sklearn/linear.py +++ b/hummingbird/ml/operator_converters/sklearn/linear.py @@ -10,6 +10,7 @@ import numpy as np from onnxconverter_common.registration import register_converter +from sklearn._loss.link import LogLink from .._linear_implementations import LinearModel @@ -81,6 +82,7 @@ def convert_sklearn_linear_regression_model(operator, device, extra_config): """ assert operator is not None, "Cannot convert None operator" + loss = None coefficients = operator.raw_operator.coef_.transpose().astype("float32") if len(coefficients.shape) == 1: coefficients = coefficients.reshape(-1, 1) @@ -91,7 +93,10 @@ def convert_sklearn_linear_regression_model(operator, device, extra_config): else: intercepts = intercepts.reshape(1, -1).astype("float32") - return LinearModel(operator, coefficients, intercepts, device, is_linear_regression=True) + if hasattr(operator.raw_operator, "_base_loss") and type(operator.raw_operator._base_loss.link) == LogLink: + loss = "log" + + return LinearModel(operator, coefficients, intercepts, device, loss=loss, is_linear_regression=True) register_converter("SklearnLinearRegression", convert_sklearn_linear_regression_model) @@ -104,3 +109,6 @@ def convert_sklearn_linear_regression_model(operator, device, extra_config): register_converter("SklearnSGDClassifier", convert_sklearn_linear_model) register_converter("SklearnLogisticRegressionCV", convert_sklearn_linear_model) register_converter("SklearnRidgeCV", convert_sklearn_linear_regression_model) +register_converter("SklearnTweedieRegressor", convert_sklearn_linear_regression_model) +register_converter("SklearnPoissonRegressor", convert_sklearn_linear_regression_model) +register_converter("SklearnGammaRegressor", convert_sklearn_linear_regression_model) diff --git a/hummingbird/ml/supported.py b/hummingbird/ml/supported.py index 086748f19..03264b1e8 100644 --- a/hummingbird/ml/supported.py +++ b/hummingbird/ml/supported.py @@ -21,6 +21,7 @@ ExtraTreesClassifier, ExtraTreesRegressor, FastICA, +GammaRegressor, GaussianNB, GradientBoostingClassifier, GradientBoostingRegressor, @@ -51,6 +52,7 @@ OneHotEncoder, PCA, PLSRegression, +PoissonRegressor, PolynomialFeatures, RandomForestClassifier, RandomForestRegressor, @@ -64,6 +66,7 @@ TreeEnsembleClassifier, TreeEnsembleRegressor, TruncatedSVD, +TweedieRegressor, VarianceThreshold, **Supported Operators (LGBM)** @@ -145,6 +148,9 @@ def _build_sklearn_operator_list(): ElasticNet, Ridge, Lasso, + TweedieRegressor, + PoissonRegressor, + GammaRegressor, ) # SVM-based models @@ -223,6 +229,9 @@ def _build_sklearn_operator_list(): Lasso, ElasticNet, Ridge, + TweedieRegressor, + PoissonRegressor, + GammaRegressor, # Clustering KMeans, MeanShift, diff --git a/tests/test_sklearn_linear_converter.py b/tests/test_sklearn_linear_converter.py index b31c5ddc2..d85da913a 100644 --- a/tests/test_sklearn_linear_converter.py +++ b/tests/test_sklearn_linear_converter.py @@ -16,6 +16,9 @@ Lasso, ElasticNet, Ridge, + TweedieRegressor, + PoissonRegressor, + GammaRegressor, ) from sklearn import datasets @@ -495,6 +498,36 @@ def test_lr_tvm(self): np.testing.assert_allclose(model.predict(X), tvm_model.predict(X), rtol=1e-6, atol=1e-3) + def test_tweedie_regressor(self): + clf = TweedieRegressor() + X = [[1, 2], [2, 3], [3, 4], [4, 3]] + y = [2, 3.5, 5, 5.5] + + clf.fit(X, y) + hb_model = hummingbird.ml.convert(clf, "torch") + + np.testing.assert_allclose(clf.predict([[1, 1], [3, 4]]), hb_model.predict([[1, 1], [3, 4]]), rtol=1e-6, atol=1e-3) + + def test_poisson_regressor(self): + clf = PoissonRegressor() + X = [[1, 2], [2, 3], [3, 4], [4, 3]] + y = [12, 17, 22, 21] + + clf.fit(X, y) + hb_model = hummingbird.ml.convert(clf, "torch") + + np.testing.assert_allclose(clf.predict([[1, 1], [3, 4]]), hb_model.predict([[1, 1], [3, 4]]), rtol=1e-6, atol=1e-3) + + def test_gamma_regressor(self): + clf = GammaRegressor() + X = [[1, 2], [2, 3], [3, 4], [4, 3]] + y = [19, 26, 33, 30] + + clf.fit(X, y) + hb_model = hummingbird.ml.convert(clf, "torch") + + np.testing.assert_allclose(clf.predict([[1, 1], [3, 4]]), hb_model.predict([[1, 1], [3, 4]]), rtol=1e-6, atol=1e-3) + if __name__ == "__main__": unittest.main()