From 064e6455fad96cc3d117dd1207413ddce78a4f2e Mon Sep 17 00:00:00 2001 From: d-a-bunin <142778107+d-a-bunin@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:22:03 +0300 Subject: [PATCH] Update `Sign`, `WAPE`, `MaxDeviation` to handle missing values (#530) --- CHANGELOG.md | 1 + etna/metrics/functional_metrics.py | 60 ++++- etna/metrics/metrics.py | 51 +++- tests/test_metrics/test_functional_metrics.py | 223 ++++++++++++++++++ tests/test_metrics/test_metrics.py | 24 +- 5 files changed, 338 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e3d5c379..be2ffddbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515)) - Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523)) - Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524)) +- Add parameter `missing_mode` into `Sign`, `WAPE` and `MaxDeviation` metrics ([#530](https://github.com/etna-team/etna/pull/530)) - - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522)) - diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index 2b3693f91..d6e086e94 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -195,6 +195,8 @@ def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: .. math:: SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid} + The nans are ignored during computation. If all values are nans, the result is NaN. + Parameters ---------- y_true: @@ -247,6 +249,8 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar .. math:: Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)} + The nans are ignored during computation. If all values are nans, the result is NaN. + Parameters ---------- y_true: @@ -275,8 +279,15 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar raise ValueError("Shapes of the labels must be the same") axis = _get_axis_by_multioutput(multioutput) + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Mean of empty slice", + action="ignore", + ) + result = np.nanmean(np.sign(y_true_array - y_pred_array), axis=axis) - return np.mean(np.sign(y_true_array - y_pred_array), axis=axis) + return result def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: @@ -285,6 +296,8 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join .. math:: MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i} + The nans are ignored during computation. If all values are nans, the result is NaN. + Parameters ---------- y_true: @@ -313,9 +326,15 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join raise ValueError("Shapes of the labels must be the same") axis = _get_axis_by_multioutput(multioutput) - - prefix_error_sum = np.cumsum(y_pred_array - y_true_array, axis=axis) - return np.max(np.abs(prefix_error_sum), axis=axis) + diff = y_pred_array - y_true_array + prefix_error_sum = np.nancumsum(diff, axis=axis) + isnan = np.all(np.isnan(diff), axis=axis) + result = np.max(np.abs(prefix_error_sum), axis=axis) + result = np.where(isnan, np.NaN, result) + try: + return result.item() + except ValueError as e: + return result # type: ignore rmse = partial(mse_sklearn, squared=False) @@ -327,6 +346,8 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar .. math:: WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} + The nans are ignored during computation. If all values are nans, the result is NaN. + Parameters ---------- y_true: @@ -355,8 +376,35 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar raise ValueError("Shapes of the labels must be the same") axis = _get_axis_by_multioutput(multioutput) - - return np.sum(np.abs(y_true_array - y_pred_array), axis=axis) / np.sum(np.abs(y_true_array), axis=axis) # type: ignore + diff = y_true_array - y_pred_array + numerator = np.nansum(np.abs(diff), axis=axis) + isnan = np.isnan(diff) + denominator = np.nansum(np.abs(y_true_array * (~isnan)), axis=axis) + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="invalid value encountered in scalar divide", + action="ignore", + ) + warnings.filterwarnings( + message="invalid value encountered in divide", + action="ignore", + ) + warnings.filterwarnings( + message="divide by zero encountered in scalar divide", + action="ignore", + ) + warnings.filterwarnings( + message="divide by zero encountered in divide", + action="ignore", + ) + isnan = np.all(isnan, axis=axis) + result = np.where(denominator == 0, np.NaN, numerator / denominator) + result = np.where(isnan, np.NaN, result) + try: + return result.item() + except ValueError as e: + return result # type: ignore def count_missing_values(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index 9e6eef2dd..640e2cedf 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -350,18 +350,21 @@ def greater_is_better(self) -> bool: return False -class Sign(Metric): +class Sign(MetricWithMissingHandling): """Sign error metric with multi-segment computation support. .. math:: Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = "per-segment", **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters @@ -374,11 +377,19 @@ def __init__(self, mode: str = "per-segment", **kwargs): * if "per-segment" -- does not aggregate metrics See :py:class:`~etna.metrics.base.MetricAggregationMode`. + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ sign_per_output = partial(sign, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=sign_per_output, metric_fn_signature="matrix_to_array", **kwargs) + super().__init__( + mode=mode, + metric_fn=sign_per_output, + metric_fn_signature="matrix_to_array", + missing_mode=missing_mode, + **kwargs, + ) @property def greater_is_better(self) -> None: @@ -386,18 +397,21 @@ def greater_is_better(self) -> None: return None -class MaxDeviation(Metric): +class MaxDeviation(MetricWithMissingHandling): """Max Deviation metric with multi-segment computation support (maximum deviation value of cumulative sums). .. math:: MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = "per-segment", **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters @@ -410,11 +424,19 @@ def __init__(self, mode: str = "per-segment", **kwargs): * if "per-segment" -- does not aggregate metrics See :py:class:`~etna.metrics.base.MetricAggregationMode`. + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ max_deviation_per_output = partial(max_deviation, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=max_deviation_per_output, metric_fn_signature="matrix_to_array", **kwargs) + super().__init__( + mode=mode, + metric_fn=max_deviation_per_output, + metric_fn_signature="matrix_to_array", + missing_mode=missing_mode, + **kwargs, + ) @property def greater_is_better(self) -> bool: @@ -422,18 +444,21 @@ def greater_is_better(self) -> bool: return False -class WAPE(Metric): +class WAPE(MetricWithMissingHandling): """Weighted average percentage Error metric with multi-segment computation support. .. math:: WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = "per-segment", **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters @@ -446,11 +471,19 @@ def __init__(self, mode: str = "per-segment", **kwargs): * if "per-segment" -- does not aggregate metrics See :py:class:`~etna.metrics.base.MetricAggregationMode`. + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ wape_per_output = partial(wape, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=wape_per_output, metric_fn_signature="matrix_to_array", **kwargs) + super().__init__( + mode=mode, + metric_fn=wape_per_output, + metric_fn_signature="matrix_to_array", + missing_mode=missing_mode, + **kwargs, + ) @property def greater_is_better(self) -> bool: diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index 81d634342..cfe87866e 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -136,6 +136,9 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2 (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5), (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 4.0), (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), # 2d (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 2.5), ( @@ -198,6 +201,9 @@ def test_mse_ok(y_true, y_pred, multioutput, expected): (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 1.5), (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2.0), (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), # 2d (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 1.5), ( @@ -261,6 +267,7 @@ def test_mae_ok(y_true, y_pred, multioutput, expected): (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 1.0), (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", 2.0), (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", 3.0), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", 3.0), # 2d (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 0.0), ( @@ -323,6 +330,9 @@ def test_count_missing_values_ok(y_true, y_pred, multioutput, expected): (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 7.0 / 6.0 * 100), (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2.0 * 100), (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), # 2d ( np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, @@ -390,6 +400,9 @@ def test_mape_ok(y_true, y_pred, multioutput, expected): (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 7.0 / 10.0 * 100), (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 1.0 * 100), (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), # 2d ( np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, @@ -444,3 +457,213 @@ def test_mape_ok(y_true, y_pred, multioutput, expected): def test_smape_ok(y_true, y_pred, multioutput, expected): result = smape(y_true=y_true, y_pred=y_pred, multioutput=multioutput) npt.assert_allclose(result, expected) + + +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 1 / 3), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 0.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 0.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", -1.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + # 2d + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 1 / 3, + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + -1.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 0.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([1 / 3, 1 / 3]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([-1.0, -1.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 0.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_sign_ok(y_true, y_pred, multioutput, expected): + result = sign(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) + + +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 4 / 6), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 3 / 4), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 3 / 4), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 3 / 4), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2 / 1), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + # 2d + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 9 / 18, + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 4 / 4, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 3 / 8, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([4 / 6, 5 / 12]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([2 / 1, 2 / 3]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 3 / 8]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_wape_ok(y_true, y_pred, multioutput, expected): + result = wape(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) + + +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN), + (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN), + # 2d + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 4.0, + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 4.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 2.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([2.0, 2.0]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([2.0, 2.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 2.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_max_deviation(y_true, y_pred, multioutput, expected): + result = max_deviation(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index bdc232e2c..e70559926 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -50,10 +50,10 @@ (MAPE(), "MAPE(mode = 'per-segment', missing_mode = 'error', )"), (SMAPE(), "SMAPE(mode = 'per-segment', missing_mode = 'error', )"), (R2(), "R2(mode = 'per-segment', )"), - (Sign(), "Sign(mode = 'per-segment', )"), - (MaxDeviation(), "MaxDeviation(mode = 'per-segment', )"), + (Sign(), "Sign(mode = 'per-segment', missing_mode = 'error', )"), + (MaxDeviation(), "MaxDeviation(mode = 'per-segment', missing_mode = 'error', )"), (DummyMetric(), "DummyMetric(mode = 'per-segment', alpha = 1.0, )"), - (WAPE(), "WAPE(mode = 'per-segment', )"), + (WAPE(), "WAPE(mode = 'per-segment', missing_mode = 'error', )"), (MissingCounter(), "MissingCounter(mode = 'per-segment', )"), ), ) @@ -187,10 +187,10 @@ def test_invalid_nans_pred(metric_class, train_test_dfs): MAPE(missing_mode="error"), SMAPE(missing_mode="error"), R2(), - Sign(), - MaxDeviation(), + Sign(missing_mode="error"), + MaxDeviation(missing_mode="error"), DummyMetric(), - WAPE(), + WAPE(missing_mode="error"), ), ) def test_invalid_nans_true(metric, train_test_dfs): @@ -208,6 +208,9 @@ def test_invalid_nans_true(metric, train_test_dfs): MAE(missing_mode="ignore"), MAPE(missing_mode="ignore"), SMAPE(missing_mode="ignore"), + Sign(missing_mode="ignore"), + WAPE(missing_mode="ignore"), + MaxDeviation(missing_mode="ignore"), MissingCounter(), ), ) @@ -229,6 +232,9 @@ def test_invalid_single_nan_ignore(metric, train_test_dfs): (MAE(mode="per-segment", missing_mode="ignore"), type(None)), (MAPE(mode="per-segment", missing_mode="ignore"), type(None)), (SMAPE(mode="per-segment", missing_mode="ignore"), type(None)), + (Sign(mode="per-segment", missing_mode="ignore"), type(None)), + (WAPE(mode="per-segment", missing_mode="ignore"), type(None)), + (MaxDeviation(mode="per-segment", missing_mode="ignore"), type(None)), (MissingCounter(mode="per-segment"), float), ), ) @@ -256,6 +262,9 @@ def test_invalid_segment_nans_ignore_per_segment(metric, expected_type, train_te MAE(mode="macro", missing_mode="ignore"), MAPE(mode="macro", missing_mode="ignore"), SMAPE(mode="macro", missing_mode="ignore"), + Sign(mode="macro", missing_mode="ignore"), + WAPE(mode="macro", missing_mode="ignore"), + MaxDeviation(mode="macro", missing_mode="ignore"), MissingCounter(mode="macro"), ), ) @@ -274,6 +283,9 @@ def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): (MAE(mode="macro", missing_mode="ignore"), type(None)), (MAPE(mode="macro", missing_mode="ignore"), type(None)), (SMAPE(mode="macro", missing_mode="ignore"), type(None)), + (Sign(mode="macro", missing_mode="ignore"), type(None)), + (WAPE(mode="macro", missing_mode="ignore"), type(None)), + (MaxDeviation(mode="macro", missing_mode="ignore"), type(None)), (MissingCounter(mode="macro"), float), ), )