etna-team · DanilSmorchkov · Dec 23, 2024 · Dec 19, 2024 · Dec 23, 2024 · Dec 23, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - Add parameter `missing_mode` into `R2` and `MedAE` metrics ([#537](https://github.com/etna-team/etna/pull/537))
 - Update `analysis.forecast.plots.plot_metric_per_segment` to handle `None` from metrics ([#540](https://github.com/etna-team/etna/pull/540))
+- Add parameter `missing_mode` into `RMSE` and `MSLE` metrics ([#542](https://github.com/etna-team/etna/pull/542))
 - 
 - 
 - 

diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
@@ -1,7 +1,5 @@
 """Module with metrics of forecasting quality."""
 
-from sklearn.metrics import mean_squared_log_error as msle
-
 from etna.metrics.base import Metric
 from etna.metrics.base import MetricAggregationMode
 from etna.metrics.base import MetricMissingMode

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
@@ -1,13 +1,10 @@
 import warnings
 from enum import Enum
-from functools import partial
 from typing import Optional
 from typing import Sequence
 from typing import Union
 
 import numpy as np
-from sklearn.metrics import mean_squared_error as mse_sklearn
-from sklearn.metrics import mean_squared_log_error as msle
 from typing_extensions import assert_never
 
 ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]]
@@ -68,6 +65,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -114,6 +117,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -132,10 +141,10 @@
 
 
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Mean absolute percentage error.
+    """Mean absolute percentage error with missing values handling.
 
     .. math::
-       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+        MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
 
     `Scale-dependent errors <https://otexts.com/fpp3/accuracy.html#scale-dependent-errors>`_
 
@@ -166,6 +175,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -188,10 +203,10 @@
 
 
 def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Symmetric mean absolute percentage error.
+    """Symmetric mean absolute percentage error with missing values handling.
 
     .. math::
-       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
+        SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     The nans are ignored during computation. If all values are nans, the result is NaN.
 
@@ -220,6 +235,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -270,6 +291,12 @@
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -346,6 +373,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -392,6 +425,12 @@
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -439,6 +478,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -457,7 +502,105 @@
         return result
 
 
-rmse = partial(mse_sklearn, squared=False)
+def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Root mean squared error with missing values handling.
+
+    .. math::
+        RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
+    """
+    mse_result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    result = np.sqrt(mse_result)
+
+    return result  # type: ignore
+
+
+def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Mean squared logarithmic error with missing values handling.
+
+    .. math::
+        MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(log(1 + y\_true_i) - log(1 + y\_pred_i))^2}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
+    ValueError:
+        If input arrays contain negative values.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    if (y_true_array < 0).any() or (y_pred_array < 0).any():
+        raise ValueError("Mean squared logarithmic error cannot be used when targets contain negative values.")
+
+    axis = _get_axis_by_multioutput(multioutput)
+
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((np.log1p(y_true_array) - np.log1p(y_pred_array)) ** 2, axis=axis)
+
+    return result
 
 
 def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
@@ -489,6 +632,12 @@
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -554,6 +703,12 @@
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)