Skip to content

Commit

Permalink
Update Sign, WAPE, MaxDeviation to handle missing values (#530)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-a-bunin authored Dec 18, 2024
1 parent 5b17421 commit 064e645
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515))
- Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523))
- Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524))
- Add parameter `missing_mode` into `Sign`, `WAPE` and `MaxDeviation` metrics ([#530](https://github.com/etna-team/etna/pull/530))
-
- Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
-
Expand Down
60 changes: 54 additions & 6 deletions etna/metrics/functional_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
.. math::
SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
The nans are ignored during computation. If all values are nans, the result is NaN.
Parameters
----------
y_true:
Expand Down Expand Up @@ -247,6 +249,8 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
.. math::
Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)}
The nans are ignored during computation. If all values are nans, the result is NaN.
Parameters
----------
y_true:
Expand Down Expand Up @@ -275,8 +279,15 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
raise ValueError("Shapes of the labels must be the same")

axis = _get_axis_by_multioutput(multioutput)
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="Mean of empty slice",
action="ignore",
)
result = np.nanmean(np.sign(y_true_array - y_pred_array), axis=axis)

return np.mean(np.sign(y_true_array - y_pred_array), axis=axis)
return result


def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
Expand All @@ -285,6 +296,8 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
.. math::
MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i}
The nans are ignored during computation. If all values are nans, the result is NaN.
Parameters
----------
y_true:
Expand Down Expand Up @@ -313,9 +326,15 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
raise ValueError("Shapes of the labels must be the same")

axis = _get_axis_by_multioutput(multioutput)

prefix_error_sum = np.cumsum(y_pred_array - y_true_array, axis=axis)
return np.max(np.abs(prefix_error_sum), axis=axis)
diff = y_pred_array - y_true_array
prefix_error_sum = np.nancumsum(diff, axis=axis)
isnan = np.all(np.isnan(diff), axis=axis)
result = np.max(np.abs(prefix_error_sum), axis=axis)
result = np.where(isnan, np.NaN, result)
try:
return result.item()
except ValueError as e:
return result # type: ignore


rmse = partial(mse_sklearn, squared=False)
Expand All @@ -327,6 +346,8 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
.. math::
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|}
The nans are ignored during computation. If all values are nans, the result is NaN.
Parameters
----------
y_true:
Expand Down Expand Up @@ -355,8 +376,35 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
raise ValueError("Shapes of the labels must be the same")

axis = _get_axis_by_multioutput(multioutput)

return np.sum(np.abs(y_true_array - y_pred_array), axis=axis) / np.sum(np.abs(y_true_array), axis=axis) # type: ignore
diff = y_true_array - y_pred_array
numerator = np.nansum(np.abs(diff), axis=axis)
isnan = np.isnan(diff)
denominator = np.nansum(np.abs(y_true_array * (~isnan)), axis=axis)
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="invalid value encountered in scalar divide",
action="ignore",
)
warnings.filterwarnings(
message="invalid value encountered in divide",
action="ignore",
)
warnings.filterwarnings(
message="divide by zero encountered in scalar divide",
action="ignore",
)
warnings.filterwarnings(
message="divide by zero encountered in divide",
action="ignore",
)
isnan = np.all(isnan, axis=axis)
result = np.where(denominator == 0, np.NaN, numerator / denominator)
result = np.where(isnan, np.NaN, result)
try:
return result.item()
except ValueError as e:
return result # type: ignore


def count_missing_values(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
Expand Down
51 changes: 42 additions & 9 deletions etna/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,18 +350,21 @@ def greater_is_better(self) -> bool:
return False


class Sign(Metric):
class Sign(MetricWithMissingHandling):
"""Sign error metric with multi-segment computation support.
.. math::
Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)}
This metric can handle missing values with parameter ``missing_mode``.
If there are too many of them in ``ignore`` mode, the result will be ``None``.
Notes
-----
You can read more about logic of multi-segment metrics in Metric docs.
"""

def __init__(self, mode: str = "per-segment", **kwargs):
def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs):
"""Init metric.
Parameters
Expand All @@ -374,30 +377,41 @@ def __init__(self, mode: str = "per-segment", **kwargs):
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
missing_mode:
mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
kwargs:
metric's computation arguments
"""
sign_per_output = partial(sign, multioutput="raw_values")
super().__init__(mode=mode, metric_fn=sign_per_output, metric_fn_signature="matrix_to_array", **kwargs)
super().__init__(
mode=mode,
metric_fn=sign_per_output,
metric_fn_signature="matrix_to_array",
missing_mode=missing_mode,
**kwargs,
)

@property
def greater_is_better(self) -> None:
"""Whether higher metric value is better."""
return None


class MaxDeviation(Metric):
class MaxDeviation(MetricWithMissingHandling):
"""Max Deviation metric with multi-segment computation support (maximum deviation value of cumulative sums).
.. math::
MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i}
This metric can handle missing values with parameter ``missing_mode``.
If there are too many of them in ``ignore`` mode, the result will be ``None``.
Notes
-----
You can read more about logic of multi-segment metrics in Metric docs.
"""

def __init__(self, mode: str = "per-segment", **kwargs):
def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs):
"""Init metric.
Parameters
Expand All @@ -410,30 +424,41 @@ def __init__(self, mode: str = "per-segment", **kwargs):
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
missing_mode:
mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
kwargs:
metric's computation arguments
"""
max_deviation_per_output = partial(max_deviation, multioutput="raw_values")
super().__init__(mode=mode, metric_fn=max_deviation_per_output, metric_fn_signature="matrix_to_array", **kwargs)
super().__init__(
mode=mode,
metric_fn=max_deviation_per_output,
metric_fn_signature="matrix_to_array",
missing_mode=missing_mode,
**kwargs,
)

@property
def greater_is_better(self) -> bool:
"""Whether higher metric value is better."""
return False


class WAPE(Metric):
class WAPE(MetricWithMissingHandling):
"""Weighted average percentage Error metric with multi-segment computation support.
.. math::
WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|}
This metric can handle missing values with parameter ``missing_mode``.
If there are too many of them in ``ignore`` mode, the result will be ``None``.
Notes
-----
You can read more about logic of multi-segment metrics in Metric docs.
"""

def __init__(self, mode: str = "per-segment", **kwargs):
def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs):
"""Init metric.
Parameters
Expand All @@ -446,11 +471,19 @@ def __init__(self, mode: str = "per-segment", **kwargs):
* if "per-segment" -- does not aggregate metrics
See :py:class:`~etna.metrics.base.MetricAggregationMode`.
missing_mode:
mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
kwargs:
metric's computation arguments
"""
wape_per_output = partial(wape, multioutput="raw_values")
super().__init__(mode=mode, metric_fn=wape_per_output, metric_fn_signature="matrix_to_array", **kwargs)
super().__init__(
mode=mode,
metric_fn=wape_per_output,
metric_fn_signature="matrix_to_array",
missing_mode=missing_mode,
**kwargs,
)

@property
def greater_is_better(self) -> bool:
Expand Down
Loading

0 comments on commit 064e645

Please sign in to comment.