diff --git a/CHANGELOG.md b/CHANGELOG.md index 69f84d398..41392b949 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#474](https://github.com/etna-team/etna/pull/474)) - Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515)) -- +- Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523)) - - - diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py index 508f50c04..48d33d098 100644 --- a/etna/metrics/__init__.py +++ b/etna/metrics/__init__.py @@ -1,6 +1,5 @@ """Module with metrics of forecasting quality.""" -from sklearn.metrics import mean_absolute_error as mae from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index e3966c597..7c185150d 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -6,7 +6,6 @@ from typing import Union import numpy as np -from sklearn.metrics import mean_absolute_error as mae from sklearn.metrics import mean_squared_error as mse_sklearn from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae @@ -88,6 +87,52 @@ def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr return result +def mae(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: + """Mean absolute error with missing values handling. + + .. math:: + MAE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{\\mid y\_true_i - y\_pred_i \\mid}}{n} + + The nans are ignored during computation. If all values are nans, the result is NaN. + + Parameters + ---------- + y_true: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Ground truth (correct) target values. + + y_pred: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Estimated target values. + + multioutput: + Defines aggregating of multiple output values + (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`). + + Returns + ------- + : + A non-negative floating point value (the best value is 0.0), or an array of floating point values, + one for each individual target. + """ + y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred) + + if len(y_true_array.shape) != len(y_pred_array.shape): + raise ValueError("Shapes of the labels must be the same") + + axis = _get_axis_by_multioutput(multioutput) + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Mean of empty slice", + action="ignore", + ) + result = np.nanmean(np.abs(y_true_array - y_pred_array), axis=axis) + return result + + def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Mean absolute percentage error. diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index e6b3e82bd..8e165d199 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -16,29 +16,47 @@ from etna.metrics.functional_metrics import wape -class MAE(Metric): +class MAE(MetricWithMissingHandling): """Mean absolute error metric with multi-segment computation support. .. math:: MAE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{\\mid y\_true_i - y\_pred_i \\mid}}{n} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = "per-segment", **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ mae_per_output = partial(mae, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=mae_per_output, metric_fn_signature="matrix_to_array", **kwargs) + super().__init__( + mode=mode, + metric_fn=mae_per_output, + metric_fn_signature="matrix_to_array", + missing_mode=missing_mode, + **kwargs, + ) @property def greater_is_better(self) -> bool: @@ -83,8 +101,8 @@ def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwa super().__init__( mode=mode, metric_fn=mse_per_output, - missing_mode=missing_mode, metric_fn_signature="matrix_to_array", + missing_mode=missing_mode, **kwargs, ) diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index b4dca0017..2c0a237c5 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -187,6 +187,68 @@ def test_mse_ok(y_true, y_pred, multioutput, expected): npt.assert_allclose(result, expected) +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 4 / 3), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 1.5), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 1.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 1.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + # 2d + (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 1.5), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 2.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 1.5, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([4 / 3, 5 / 3]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([2.0, 2.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 1.5]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_mae_ok(y_true, y_pred, multioutput, expected): + result = mae(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) + + @pytest.mark.parametrize( "y_true, y_pred, multioutput, expected", [ diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index f22835175..7661f587a 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -39,10 +39,11 @@ @pytest.mark.parametrize( "metric, expected_repr", ( - (MAE(), "MAE(mode = 'per-segment', )"), - (MAE(mode="macro"), "MAE(mode = 'macro', )"), + (MAE(), "MAE(mode = 'per-segment', missing_mode = 'error', )"), + (MAE(mode="macro"), "MAE(mode = 'macro', missing_mode = 'error', )"), + (MAE(missing_mode="ignore"), "MAE(mode = 'per-segment', missing_mode = 'ignore', )"), + (MAE(mode="macro", missing_mode="ignore"), "MAE(mode = 'macro', missing_mode = 'ignore', )"), (MSE(), "MSE(mode = 'per-segment', missing_mode = 'error', )"), - (MSE(missing_mode="ignore"), "MSE(mode = 'per-segment', missing_mode = 'ignore', )"), (RMSE(), "RMSE(mode = 'per-segment', )"), (MedAE(), "MedAE(mode = 'per-segment', )"), (MSLE(), "MSLE(mode = 'per-segment', )"), @@ -178,7 +179,7 @@ def test_invalid_nans_pred(metric_class, train_test_dfs): @pytest.mark.parametrize( "metric", ( - MAE(), + MAE(missing_mode="error"), MSE(missing_mode="error"), RMSE(), MedAE(), @@ -202,7 +203,7 @@ def test_invalid_nans_true(metric, train_test_dfs): @pytest.mark.parametrize( "metric", - (MSE(missing_mode="ignore"), MissingCounter()), + (MSE(missing_mode="ignore"), MAE(missing_mode="ignore"), MissingCounter()), ) def test_invalid_single_nan_ignore(metric, train_test_dfs): """Check metrics behavior in case of ignoring one nan in true values.""" @@ -217,7 +218,11 @@ def test_invalid_single_nan_ignore(metric, train_test_dfs): @pytest.mark.parametrize( "metric, expected_type", - ((MSE(mode="per-segment", missing_mode="ignore"), type(None)), (MissingCounter(mode="per-segment"), float)), + ( + (MSE(mode="per-segment", missing_mode="ignore"), type(None)), + (MAE(mode="per-segment", missing_mode="ignore"), type(None)), + (MissingCounter(mode="per-segment"), float), + ), ) def test_invalid_segment_nans_ignore_per_segment(metric, expected_type, train_test_dfs): """Check per-segment metrics behavior in case of ignoring segment of all nans in true values.""" @@ -238,7 +243,7 @@ def test_invalid_segment_nans_ignore_per_segment(metric, expected_type, train_te @pytest.mark.parametrize( "metric", - (MSE(mode="macro", missing_mode="ignore"), MissingCounter(mode="macro")), + (MSE(mode="macro", missing_mode="ignore"), MAE(mode="macro", missing_mode="ignore"), MissingCounter(mode="macro")), ) def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): """Check macro metrics behavior in case of ignoring segment of all nans in true values.""" @@ -250,7 +255,11 @@ def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): @pytest.mark.parametrize( "metric, expected_type", - ((MSE(mode="macro", missing_mode="ignore"), type(None)), (MissingCounter(mode="macro"), float)), + ( + (MSE(mode="macro", missing_mode="ignore"), type(None)), + (MAE(mode="macro", missing_mode="ignore"), type(None)), + (MissingCounter(mode="macro"), float), + ), ) def test_invalid_all_nans_ignore_macro(metric, expected_type, train_test_dfs): """Check macro metrics behavior in case of all nan values in true values.""" diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py index 8872ad7af..49c7f4cb5 100644 --- a/tests/test_metrics/test_metrics_utils.py +++ b/tests/test_metrics/test_metrics_utils.py @@ -14,8 +14,8 @@ def test_compute_metrics(train_test_dfs: Tuple[TSDataset, TSDataset]): forecast_df, true_df = train_test_dfs metrics = [MAE("per-segment"), MAE(mode="macro"), MSE("per-segment"), MAPE(mode="macro", eps=1e-5)] expected_keys = [ - "MAE(mode = 'per-segment', )", - "MAE(mode = 'macro', )", + "MAE(mode = 'per-segment', missing_mode = 'error', )", + "MAE(mode = 'macro', missing_mode = 'error', )", "MSE(mode = 'per-segment', missing_mode = 'error', )", "MAPE(mode = 'macro', eps = 1e-05, )", ]