Skip to content

Commit

Permalink
Add mean_squared_error functional metric (#515)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-a-bunin authored Dec 6, 2024
1 parent 0b1b29d commit a7e20f4
Show file tree
Hide file tree
Showing 10 changed files with 349 additions and 82 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
- Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#474](https://github.com/etna-team/etna/pull/474))
-
- Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515))
-
-
-
Expand Down
1 change: 0 additions & 1 deletion etna/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Module with metrics of forecasting quality."""

from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_squared_log_error as msle
from sklearn.metrics import median_absolute_error as medae
from sklearn.metrics import r2_score
Expand Down
13 changes: 9 additions & 4 deletions etna/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin):
def __init__(
self,
metric_fn: MetricFunction,
mode: str = MetricAggregationMode.per_segment,
mode: str = "per-segment",
metric_fn_signature: str = "array_to_scalar",
**kwargs,
):
Expand All @@ -146,6 +146,8 @@ def __init__(
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
metric_fn_signature:
type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`)
kwargs:
Expand Down Expand Up @@ -385,7 +387,7 @@ class MetricWithMissingHandling(Metric):
def __init__(
self,
metric_fn: MetricFunction,
mode: str = MetricAggregationMode.per_segment,
mode: str = "per-segment",
metric_fn_signature: str = "array_to_scalar",
missing_mode: str = "error",
**kwargs,
Expand All @@ -404,6 +406,8 @@ def __init__(
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
metric_fn_signature:
type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`)
missing_mode:
Expand All @@ -421,7 +425,8 @@ def __init__(
If non-existent ``missing_mode`` is used.
"""
super().__init__(metric_fn=metric_fn, mode=mode, metric_fn_signature=metric_fn_signature, **kwargs)
self.missing_mode = MetricMissingMode(missing_mode)
self.missing_mode = missing_mode
self._missing_mode_enum = MetricMissingMode(missing_mode)

def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset):
"""Check that ``y_true`` and ``y_pred`` doesn't have NaNs depending on ``missing_mode``.
Expand All @@ -442,7 +447,7 @@ def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset):
df_pred = y_pred.df.loc[:, pd.IndexSlice[:, "target"]]

df_true_isna_sum = df_true.isna().sum()
if self.missing_mode is MetricMissingMode.error and (df_true_isna_sum > 0).any():
if self._missing_mode_enum is MetricMissingMode.error and (df_true_isna_sum > 0).any():
error_segments = set(df_true_isna_sum[df_true_isna_sum > 0].index.droplevel("feature").tolist())
raise ValueError(f"There are NaNs in y_true! Segments with NaNs: {reprlib.repr(error_segments)}.")

Expand Down
69 changes: 59 additions & 10 deletions etna/metrics/functional_metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from enum import Enum
from functools import partial
from typing import Optional
Expand All @@ -6,7 +7,7 @@

import numpy as np
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_squared_error as mse_sklearn
from sklearn.metrics import mean_squared_log_error as msle
from sklearn.metrics import median_absolute_error as medae
from sklearn.metrics import r2_score
Expand Down Expand Up @@ -41,11 +42,59 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]:
assert_never(multioutput_enum)


def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
"""Mean squared error with missing values handling.
.. math::
MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
The nans are ignored during computation. If all values are nans, the result is NaN.
Parameters
----------
y_true:
array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred:
array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
multioutput:
Defines aggregating of multiple output values
(see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
Returns
-------
:
A non-negative floating point value (the best value is 0.0), or an array of floating point values,
one for each individual target.
"""
y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)

if len(y_true_array.shape) != len(y_pred_array.shape):
raise ValueError("Shapes of the labels must be the same")

axis = _get_axis_by_multioutput(multioutput)
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="Mean of empty slice",
action="ignore",
)
result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
return result


def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
"""Mean absolute percentage error.
`Wikipedia entry on the Mean absolute percentage error
<https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`_
.. math::
MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
`Scale-dependent errors <https://otexts.com/fpp3/accuracy.html#scale-dependent-errors>`_
Parameters
----------
Expand Down Expand Up @@ -88,11 +137,8 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
"""Symmetric mean absolute percentage error.
`Wikipedia entry on the Symmetric mean absolute percentage error
<https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error>`_
.. math::
SMAPE = \dfrac{100}{n}\sum_{t=1}^{n}\dfrac{|ytrue_{t}-ypred_{t}|}{(|ypred_{t}|+|ytrue_{t}|) / 2}
SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
Parameters
----------
Expand Down Expand Up @@ -136,7 +182,7 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
"""Sign error metric.
.. math::
Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=0}^{n - 1}{sign(y\_true_i - y\_pred_i)}
Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)}
Parameters
----------
Expand Down Expand Up @@ -173,6 +219,9 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
"""Max Deviation metric.
.. math::
MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i}
Parameters
----------
y_true:
Expand Down Expand Up @@ -206,14 +255,14 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
return np.max(np.abs(prefix_error_sum), axis=axis)


rmse = partial(mse, squared=False)
rmse = partial(mse_sklearn, squared=False)


def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
"""Weighted average percentage Error metric.
.. math::
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|}
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|}
Parameters
----------
Expand Down
25 changes: 18 additions & 7 deletions etna/metrics/intervals_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from etna.datasets import TSDataset
from etna.metrics.base import Metric
from etna.metrics.base import MetricAggregationMode
from etna.metrics.functional_metrics import ArrayLike


Expand Down Expand Up @@ -56,7 +55,7 @@ class Coverage(Metric, _IntervalsMetricMixin):
def __init__(
self,
quantiles: Optional[Tuple[float, float]] = None,
mode: str = MetricAggregationMode.per_segment,
mode: str = "per-segment",
upper_name: Optional[str] = None,
lower_name: Optional[str] = None,
**kwargs,
Expand All @@ -67,8 +66,14 @@ def __init__(
----------
quantiles:
lower and upper quantiles
mode: 'macro' or 'per-segment'
metrics aggregation mode
mode:
"macro" or "per-segment", way to aggregate metric values over segments:
* if "macro" computes average value
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
upper_name:
name of column with upper border of the interval
lower_name:
Expand Down Expand Up @@ -169,7 +174,7 @@ class Width(Metric, _IntervalsMetricMixin):
def __init__(
self,
quantiles: Optional[Tuple[float, float]] = None,
mode: str = MetricAggregationMode.per_segment,
mode: str = "per-segment",
upper_name: Optional[str] = None,
lower_name: Optional[str] = None,
**kwargs,
Expand All @@ -180,8 +185,14 @@ def __init__(
----------
quantiles:
lower and upper quantiles
mode: 'macro' or 'per-segment'
metrics aggregation mode
mode:
"macro" or "per-segment", way to aggregate metric values over segments:
* if "macro" computes average value
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
upper_name:
name of column with upper border of the interval
lower_name:
Expand Down
Loading

0 comments on commit a7e20f4

Please sign in to comment.