etna-team · DanilSmorchkov · Dec 23, 2024 · Dec 19, 2024 · Dec 23, 2024 · Dec 23, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - Add parameter `missing_mode` into `R2` and `MedAE` metrics ([#537](https://github.com/etna-team/etna/pull/537))
-- 
+- Add parameer `missing_mode` into `RMSE` and `MSLE` metrics ([#542](https://github.com/etna-team/etna/pull/542))
 - 
 
 ### Fixed

diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
@@ -1,7 +1,5 @@
 """Module with metrics of forecasting quality."""
 
-from sklearn.metrics import mean_squared_log_error as msle
-
 from etna.metrics.base import Metric
 from etna.metrics.base import MetricAggregationMode
 from etna.metrics.base import MetricMissingMode

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
@@ -1,13 +1,10 @@
 import warnings
 from enum import Enum
-from functools import partial
 from typing import Optional
 from typing import Sequence
 from typing import Union
 
 import numpy as np
-from sklearn.metrics import mean_squared_error as mse_sklearn
-from sklearn.metrics import mean_squared_log_error as msle
 from typing_extensions import assert_never
 
 ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]]
@@ -132,10 +129,10 @@ def mae(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr
 
 
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Mean absolute percentage error.
+    """Mean absolute percentage error with missing values handling.
 
     .. math::
-       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+        MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
 
     `Scale-dependent errors <https://otexts.com/fpp3/accuracy.html#scale-dependent-errors>`_
 
@@ -188,10 +185,10 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
 
 
 def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Symmetric mean absolute percentage error.
+    """Symmetric mean absolute percentage error with missing values handling.
 
     .. math::
-       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
+        SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     The nans are ignored during computation. If all values are nans, the result is NaN.
 
@@ -457,7 +454,103 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
         return result
 
 
-rmse = partial(mse_sklearn, squared=False)
+def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Root mean squared error with missing values handling
+
+    .. math::
+        RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    axis = _get_axis_by_multioutput(multioutput)
+
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+
+    return np.sqrt(result)
+
+
+def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Mean squared logarithmic error with missing values handling
+
+    .. math::
+        MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(log(1 + y\_true_i) - log(1 + y\_pred_i))^2}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    if (y_true_array < 0).any() or (y_pred_array < 0).any():
+        raise ValueError("Mean Squared Logarithmic Error cannot be used when targets contain negative values.")
+
+    axis = _get_axis_by_multioutput(multioutput)
+
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((np.log1p(y_true_array) - np.log1p(y_pred_array)) ** 2, axis=axis)
+
+    return result
 
 
 def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:

diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
@@ -112,18 +112,21 @@ def greater_is_better(self) -> bool:
         return False
 
 
-class RMSE(Metric):
+class RMSE(MetricWithMissingHandling):
     """Root mean squared error metric with multi-segment computation support.
 
     .. math::
         RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
 
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
+
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode="error", **kwargs):
         """Init metric.
 
         Parameters
@@ -136,11 +139,20 @@ def __init__(self, mode: str = "per-segment", **kwargs):
             * if "per-segment" -- does not aggregate metrics
 
             See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
         """
         rmse_per_output = partial(rmse, multioutput="raw_values")
-        super().__init__(mode=mode, metric_fn=rmse_per_output, metric_fn_signature="matrix_to_array", **kwargs)
+        super().__init__(
+            mode=mode,
+            metric_fn=rmse_per_output,
+            metric_fn_signature="matrix_to_array",
+            missing_mode=missing_mode,
+            **kwargs,
+        )
 
     @property
     def greater_is_better(self) -> bool:
@@ -199,7 +211,7 @@ class MAPE(MetricWithMissingHandling):
     """Mean absolute percentage error metric with multi-segment computation support.
 
     .. math::
-       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+        MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
 
     This metric can handle missing values with parameter ``missing_mode``.
     If there are too many of them in ``ignore`` mode, the result will be ``None``.
@@ -246,7 +258,7 @@ class SMAPE(MetricWithMissingHandling):
     """Symmetric mean absolute percentage error metric with multi-segment computation support.
 
     .. math::
-       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
+        SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     This metric can handle missing values with parameter ``missing_mode``.
     If there are too many of them in ``ignore`` mode, the result will be ``None``.
@@ -336,18 +348,21 @@ def greater_is_better(self) -> bool:
         return False
 
 
-class MSLE(Metric):
+class MSLE(MetricWithMissingHandling):
     """Mean squared logarithmic error metric with multi-segment computation support.
 
     .. math::
-       MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(ln(1 + y\_true_i) - ln(1 + y\_pred_i))^2}
+        MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(ln(1 + y\_true_i) - ln(1 + y\_pred_i))^2}
+
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
 
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode="error", **kwargs):
         """Init metric.
 
         Parameters
@@ -360,12 +375,21 @@ def __init__(self, mode: str = "per-segment", **kwargs):
             * if "per-segment" -- does not aggregate metrics
 
             See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
 
         """
         msle_per_output = partial(msle, multioutput="raw_values")
-        super().__init__(mode=mode, metric_fn=msle_per_output, metric_fn_signature="matrix_to_array", **kwargs)
+        super().__init__(
+            mode=mode,
+            metric_fn=msle_per_output,
+            metric_fn_signature="matrix_to_array",
+            missing_mode=missing_mode,
+            **kwargs,
+        )
 
     @property
     def greater_is_better(self) -> bool: