diff --git a/CHANGELOG.md b/CHANGELOG.md index e44d0ff32..dc8f73cc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#474](https://github.com/etna-team/etna/pull/474)) -- +- Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515)) - - - diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py index 24f2e679a..38235e2fd 100644 --- a/etna/metrics/__init__.py +++ b/etna/metrics/__init__.py @@ -1,7 +1,6 @@ """Module with metrics of forecasting quality.""" from sklearn.metrics import mean_absolute_error as mae -from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score diff --git a/etna/metrics/base.py b/etna/metrics/base.py index 0dfc0304e..32373a145 100644 --- a/etna/metrics/base.py +++ b/etna/metrics/base.py @@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment, + mode: str = "per-segment", metric_fn_signature: str = "array_to_scalar", **kwargs, ): @@ -146,6 +146,8 @@ def __init__( * if "per-segment" -- does not aggregate metrics + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + metric_fn_signature: type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`) kwargs: @@ -385,7 +387,7 @@ class MetricWithMissingHandling(Metric): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment, + mode: str = "per-segment", metric_fn_signature: str = "array_to_scalar", missing_mode: str = "error", **kwargs, @@ -404,6 +406,8 @@ def __init__( * if "per-segment" -- does not aggregate metrics + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + metric_fn_signature: type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`) missing_mode: @@ -421,7 +425,8 @@ def __init__( If non-existent ``missing_mode`` is used. """ super().__init__(metric_fn=metric_fn, mode=mode, metric_fn_signature=metric_fn_signature, **kwargs) - self.missing_mode = MetricMissingMode(missing_mode) + self.missing_mode = missing_mode + self._missing_mode_enum = MetricMissingMode(missing_mode) def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset): """Check that ``y_true`` and ``y_pred`` doesn't have NaNs depending on ``missing_mode``. @@ -442,7 +447,7 @@ def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset): df_pred = y_pred.df.loc[:, pd.IndexSlice[:, "target"]] df_true_isna_sum = df_true.isna().sum() - if self.missing_mode is MetricMissingMode.error and (df_true_isna_sum > 0).any(): + if self._missing_mode_enum is MetricMissingMode.error and (df_true_isna_sum > 0).any(): error_segments = set(df_true_isna_sum[df_true_isna_sum > 0].index.droplevel("feature").tolist()) raise ValueError(f"There are NaNs in y_true! Segments with NaNs: {reprlib.repr(error_segments)}.") diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index 404f1a757..7feb319ad 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -1,3 +1,4 @@ +import warnings from enum import Enum from functools import partial from typing import Optional @@ -6,7 +7,7 @@ import numpy as np from sklearn.metrics import mean_absolute_error as mae -from sklearn.metrics import mean_squared_error as mse +from sklearn.metrics import mean_squared_error as mse_sklearn from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score @@ -41,11 +42,59 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]: assert_never(multioutput_enum) +def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: + """Mean squared error with missing values handling. + + .. math:: + MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n} + + The nans are ignored during computation. If all values are nans, the result is NaN. + + Parameters + ---------- + y_true: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Ground truth (correct) target values. + + y_pred: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Estimated target values. + + multioutput: + Defines aggregating of multiple output values + (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`). + + Returns + ------- + : + A non-negative floating point value (the best value is 0.0), or an array of floating point values, + one for each individual target. + """ + y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred) + + if len(y_true_array.shape) != len(y_pred_array.shape): + raise ValueError("Shapes of the labels must be the same") + + axis = _get_axis_by_multioutput(multioutput) + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Mean of empty slice", + action="ignore", + ) + result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis) + return result + + def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Mean absolute percentage error. - `Wikipedia entry on the Mean absolute percentage error - `_ + .. math:: + MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon} + + `Scale-dependent errors `_ Parameters ---------- @@ -88,11 +137,8 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Symmetric mean absolute percentage error. - `Wikipedia entry on the Symmetric mean absolute percentage error - `_ - .. math:: - SMAPE = \dfrac{100}{n}\sum_{t=1}^{n}\dfrac{|ytrue_{t}-ypred_{t}|}{(|ypred_{t}|+|ytrue_{t}|) / 2} + SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid} Parameters ---------- @@ -136,7 +182,7 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar """Sign error metric. .. math:: - Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=0}^{n - 1}{sign(y\_true_i - y\_pred_i)} + Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)} Parameters ---------- @@ -173,6 +219,9 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: """Max Deviation metric. + .. math:: + MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i} + Parameters ---------- y_true: @@ -206,14 +255,14 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join return np.max(np.abs(prefix_error_sum), axis=axis) -rmse = partial(mse, squared=False) +rmse = partial(mse_sklearn, squared=False) def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: """Weighted average percentage Error metric. .. math:: - WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|} + WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} Parameters ---------- diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py index 6e70525af..10284460e 100644 --- a/etna/metrics/intervals_metrics.py +++ b/etna/metrics/intervals_metrics.py @@ -9,7 +9,6 @@ from etna.datasets import TSDataset from etna.metrics.base import Metric -from etna.metrics.base import MetricAggregationMode from etna.metrics.functional_metrics import ArrayLike @@ -56,7 +55,7 @@ class Coverage(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment, + mode: str = "per-segment", upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, @@ -67,8 +66,14 @@ def __init__( ---------- quantiles: lower and upper quantiles - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. upper_name: name of column with upper border of the interval lower_name: @@ -169,7 +174,7 @@ class Width(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment, + mode: str = "per-segment", upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, @@ -180,8 +185,14 @@ def __init__( ---------- quantiles: lower and upper quantiles - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. upper_name: name of column with upper border of the interval lower_name: diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index d5388d7ce..67088a811 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -1,7 +1,7 @@ from functools import partial from etna.metrics.base import Metric -from etna.metrics.base import MetricAggregationMode +from etna.metrics.base import MetricWithMissingHandling from etna.metrics.functional_metrics import mae from etna.metrics.functional_metrics import mape from etna.metrics.functional_metrics import max_deviation @@ -26,7 +26,7 @@ class MAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -45,29 +45,47 @@ def greater_is_better(self) -> bool: return False -class MSE(Metric): +class MSE(MetricWithMissingHandling): """Mean squared error metric with multi-segment computation support. .. math:: MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ mse_per_output = partial(mse, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=mse_per_output, metric_fn_signature="matrix_to_array", **kwargs) + super().__init__( + mode=mode, + metric_fn=mse_per_output, + missing_mode=missing_mode, + metric_fn_signature="matrix_to_array", + **kwargs, + ) @property def greater_is_better(self) -> bool: @@ -86,13 +104,19 @@ class RMSE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -115,13 +139,19 @@ class R2(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -145,13 +175,19 @@ class MAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -175,13 +211,19 @@ class SMAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -205,13 +247,19 @@ class MedAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -235,13 +283,19 @@ class MSLE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments @@ -266,13 +320,19 @@ class Sign(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -296,13 +356,19 @@ class MaxDeviation(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -320,18 +386,25 @@ class WAPE(Metric): .. math:: WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index f9198cf11..ba0e2436c 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -1,3 +1,4 @@ +import numpy as np import numpy.testing as npt import pytest @@ -58,6 +59,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d): @pytest.mark.parametrize( "metric", ( + mse, mape, smape, sign, @@ -116,3 +118,65 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d) ) def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2d, y_pred_2d): npt.assert_almost_equal(metric(y_true_2d, y_pred_2d, **params), right_metrics_value) + + +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.5), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 4.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + # 2d + (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 2.5), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 4.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 2.5, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([2.0, 3.0]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([4.0, 4.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 2.5]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_values_ok(y_true, y_pred, multioutput, expected): + result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index 1d02d5b98..58074ead9 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -35,31 +35,28 @@ @pytest.mark.parametrize( - "metric_class, metric_class_repr, metric_params, param_repr", + "metric, expected_repr", ( - (MAE, "MAE", {}, ""), - (MSE, "MSE", {}, ""), - (RMSE, "RMSE", {}, ""), - (MedAE, "MedAE", {}, ""), - (MSLE, "MSLE", {}, ""), - (MAPE, "MAPE", {}, ""), - (SMAPE, "SMAPE", {}, ""), - (R2, "R2", {}, ""), - (Sign, "Sign", {}, ""), - (MaxDeviation, "MaxDeviation", {}, ""), - (DummyMetric, "DummyMetric", {"alpha": 1.0}, "alpha = 1.0, "), - (WAPE, "WAPE", {}, ""), + (MAE(), "MAE(mode = 'per-segment', )"), + (MAE(mode="macro"), "MAE(mode = 'macro', )"), + (MSE(), "MSE(mode = 'per-segment', missing_mode = 'error', )"), + (MSE(missing_mode="ignore"), "MSE(mode = 'per-segment', missing_mode = 'ignore', )"), + (RMSE(), "RMSE(mode = 'per-segment', )"), + (MedAE(), "MedAE(mode = 'per-segment', )"), + (MSLE(), "MSLE(mode = 'per-segment', )"), + (MAPE(), "MAPE(mode = 'per-segment', )"), + (SMAPE(), "SMAPE(mode = 'per-segment', )"), + (R2(), "R2(mode = 'per-segment', )"), + (Sign(), "Sign(mode = 'per-segment', )"), + (MaxDeviation(), "MaxDeviation(mode = 'per-segment', )"), + (DummyMetric(), "DummyMetric(mode = 'per-segment', alpha = 1.0, )"), + (WAPE(), "WAPE(mode = 'per-segment', )"), ), ) -def test_repr(metric_class, metric_class_repr, metric_params, param_repr): +def test_repr(metric, expected_repr): """Check metrics __repr__ method""" - metric_mode = "per-segment" - kwargs = {**metric_params, "kwarg_1": "value_1", "kwarg_2": "value_2"} - kwargs_repr = param_repr + "kwarg_1 = 'value_1', kwarg_2 = 'value_2'" - metric = metric_class(mode=metric_mode, **kwargs) metric_repr = metric.__repr__() - true_repr = f"{metric_class_repr}(mode = '{metric_mode}', {kwargs_repr}, )" - assert metric_repr == true_repr + assert metric_repr == expected_repr @pytest.mark.parametrize( @@ -168,17 +165,86 @@ def test_invalid_nans_pred(metric_class, train_test_dfs): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric", + ( + MAE(), + MSE(missing_mode="error"), + RMSE(), + MedAE(), + MSLE(), + MAPE(), + SMAPE(), + R2(), + Sign(), + MaxDeviation(), + DummyMetric(), + WAPE(), + ), ) -def test_invalid_nans_true(metric_class, train_test_dfs): +def test_invalid_nans_true(metric, train_test_dfs): """Check metrics behavior in case of nans in true values.""" forecast_df, true_df = train_test_dfs true_df.df.iloc[0, 0] = np.NaN - metric = metric_class() with pytest.raises(ValueError, match="There are NaNs in y_true"): _ = metric(y_true=true_df, y_pred=forecast_df) +@pytest.mark.parametrize( + "metric", + (MSE(missing_mode="ignore"),), +) +def test_invalid_single_nan_ignore(metric, train_test_dfs): + """Check metrics behavior in case of ignoring one nan in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[0, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, dict) + segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist()) + assert value.keys() == segments + assert all(isinstance(cur_value, float) for cur_value in value.values()) + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="per-segment", missing_mode="ignore"),), +) +def test_invalid_segment_nans_ignore_per_segment(metric, train_test_dfs): + """Check per-segment metrics behavior in case of ignoring segment of all nans in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, dict) + segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist()) + assert value.keys() == segments + empty_segment = true_df.df.columns.get_level_values("segment").unique()[0] + assert all(isinstance(cur_value, float) for cur_segment, cur_value in value.items() if cur_segment != empty_segment) + assert value[empty_segment] is None + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="macro", missing_mode="ignore"),), +) +def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): + """Check macro metrics behavior in case of ignoring segment of all nans in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, float) + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="macro", missing_mode="ignore"),), +) +def test_invalid_all_nans_ignore_macro(metric, train_test_dfs): + """Check macro metrics behavior in case of all nan values in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, :] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert value is None + + @pytest.mark.parametrize( "metric_class, metric_fn", ( diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py index d8123081e..8872ad7af 100644 --- a/tests/test_metrics/test_metrics_utils.py +++ b/tests/test_metrics/test_metrics_utils.py @@ -16,7 +16,7 @@ def test_compute_metrics(train_test_dfs: Tuple[TSDataset, TSDataset]): expected_keys = [ "MAE(mode = 'per-segment', )", "MAE(mode = 'macro', )", - "MSE(mode = 'per-segment', )", + "MSE(mode = 'per-segment', missing_mode = 'error', )", "MAPE(mode = 'macro', eps = 1e-05, )", ] result = compute_metrics(metrics=metrics, y_true=true_df, y_pred=forecast_df) diff --git a/tests/utils.py b/tests/utils.py index e9c6f0d5e..85fa5b06c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -92,7 +92,7 @@ class DummyMetric(Metric): We change the name property here. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, alpha: float = 1.0, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, alpha: float = 1.0, **kwargs): self.alpha = alpha super().__init__(mode=mode, metric_fn=create_dummy_functional_metric(alpha), **kwargs)