From 67e80fc9ed4ee47ae65209d5ada44d907f5c9c97 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Thu, 19 Dec 2024 18:22:55 +0300
Subject: [PATCH 01/12] update rmse and msle

---
 etna/metrics/__init__.py           |   2 +-
 etna/metrics/functional_metrics.py | 111 ++++++++++++++++++++++++++---
 etna/metrics/metrics.py            |  41 ++++++++---
 3 files changed, 136 insertions(+), 18 deletions(-)
diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
index 48d33d098..0a7d0ccb5 100644
--- a/etna/metrics/__init__.py
+++ b/etna/metrics/__init__.py
@@ -1,6 +1,5 @@
 """Module with metrics of forecasting quality."""
 
-from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
 
@@ -19,6 +18,7 @@
 from etna.metrics.functional_metrics import sign
 from etna.metrics.functional_metrics import smape
 from etna.metrics.functional_metrics import wape
+from etna.metrics.functional_metrics import msle
 from etna.metrics.intervals_metrics import Coverage
 from etna.metrics.intervals_metrics import Width
 from etna.metrics.metrics import MAE
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index d6e086e94..4b60458e0 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -1,13 +1,10 @@
 import warnings
 from enum import Enum
-from functools import partial
 from typing import Optional
 from typing import Sequence
 from typing import Union
 
 import numpy as np
-from sklearn.metrics import mean_squared_error as mse_sklearn
-from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
 from typing_extensions import assert_never
@@ -134,10 +131,10 @@ def mae(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr
 
 
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Mean absolute percentage error.
+    """Mean absolute percentage error with missing values handling.
 
     .. math::
-       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+        MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
 
     `Scale-dependent errors <https://otexts.com/fpp3/accuracy.html#scale-dependent-errors>`_
 
@@ -190,10 +187,10 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
 
 
 def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
-    """Symmetric mean absolute percentage error.
+    """Symmetric mean absolute percentage error with missing values handling.
 
     .. math::
-       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
+        SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     The nans are ignored during computation. If all values are nans, the result is NaN.
 
@@ -337,7 +334,105 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
         return result  # type: ignore
 
 
-rmse = partial(mse_sklearn, squared=False)
+def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Root mean squared error with missing values handling
+
+    .. math::
+        RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    axis = _get_axis_by_multioutput(multioutput)
+
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+
+    return np.sqrt(result)
+
+
+def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Mean squared logarithmic error with missing values handling
+
+    .. math::
+        MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(log(1 + y\_true_i) - log(1 + y\_pred_i))^2}
+
+    The nans are ignored during computation. If all values are nans, the result is NaN.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    if (y_true_array < 0).any() or (y_pred_array < 0).any():
+        raise ValueError(
+            "Mean Squared Logarithmic Error cannot be used when targets contain negative values."
+        )
+
+    axis = _get_axis_by_multioutput(multioutput)
+
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((np.log1p(y_true_array) - np.log1p(y_pred_array)) ** 2, axis=axis)
+
+    return result
 
 
 def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index 640e2cedf..b42bc51d6 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -112,18 +112,21 @@ def greater_is_better(self) -> bool:
         return False
 
 
-class RMSE(Metric):
+class RMSE(MetricWithMissingHandling):
     """Root mean squared error metric with multi-segment computation support.
 
     .. math::
         RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
 
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
+
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
         """Init metric.
 
         Parameters
@@ -136,11 +139,19 @@ def __init__(self, mode: str = "per-segment", **kwargs):
             * if "per-segment" -- does not aggregate metrics
 
             See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
         """
         rmse_per_output = partial(rmse, multioutput="raw_values")
-        super().__init__(mode=mode, metric_fn=rmse_per_output, metric_fn_signature="matrix_to_array", **kwargs)
+        super().__init__(
+            mode=mode,
+            metric_fn=rmse_per_output,
+            metric_fn_signature="matrix_to_array",
+            missing_mode=missing_mode,
+            **kwargs)
 
     @property
     def greater_is_better(self) -> bool:
@@ -187,7 +198,7 @@ class MAPE(MetricWithMissingHandling):
     """Mean absolute percentage error metric with multi-segment computation support.
 
     .. math::
-       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+        MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
 
     This metric can handle missing values with parameter ``missing_mode``.
     If there are too many of them in ``ignore`` mode, the result will be ``None``.
@@ -234,7 +245,7 @@ class SMAPE(MetricWithMissingHandling):
     """Symmetric mean absolute percentage error metric with multi-segment computation support.
 
     .. math::
-       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
+        SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     This metric can handle missing values with parameter ``missing_mode``.
     If there are too many of them in ``ignore`` mode, the result will be ``None``.
@@ -313,18 +324,21 @@ def greater_is_better(self) -> bool:
         return False
 
 
-class MSLE(Metric):
+class MSLE(MetricWithMissingHandling):
     """Mean squared logarithmic error metric with multi-segment computation support.
 
     .. math::
-       MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(ln(1 + y\_true_i) - ln(1 + y\_pred_i))^2}
+        MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(ln(1 + y\_true_i) - ln(1 + y\_pred_i))^2}
+
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
 
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
         """Init metric.
 
         Parameters
@@ -337,12 +351,21 @@ def __init__(self, mode: str = "per-segment", **kwargs):
             * if "per-segment" -- does not aggregate metrics
 
             See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
 
         """
         msle_per_output = partial(msle, multioutput="raw_values")
-        super().__init__(mode=mode, metric_fn=msle_per_output, metric_fn_signature="matrix_to_array", **kwargs)
+        super().__init__(
+            mode=mode,
+            metric_fn=msle_per_output,
+            metric_fn_signature="matrix_to_array",
+            missing_mode=missing_mode,
+            **kwargs
+        )
 
     @property
     def greater_is_better(self) -> bool:

From a462921140ddcd46934878907b06c1fd02ffb9f2 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 10:26:21 +0300
Subject: [PATCH 02/12] modify tests

---
 tests/test_metrics/test_functional_metrics.py | 188 ++++++++++++++++++
 tests/test_metrics/test_metrics.py            |  16 +-
 2 files changed, 200 insertions(+), 4 deletions(-)

diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index cfe87866e..5be038e1c 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -667,3 +667,191 @@ def test_wape_ok(y_true, y_pred, multioutput, expected):
 def test_max_deviation(y_true, y_pred, multioutput, expected):
     result = max_deviation(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
     npt.assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "y_true, y_pred, multioutput, expected",
+    [
+        # 1d
+        (np.array([1.0]), np.array([1.0]), "joint", 0.0),
+        (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", np.sqrt(2.0)),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", np.sqrt(2.5)),
+        (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", np.sqrt(2.5)),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", np.sqrt(2.5)),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 2.0),
+        (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN),
+        (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN),
+        (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
+        (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
+        # 2d
+        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", np.sqrt(2.5)),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            2.0,
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            np.sqrt(2.5),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            np.NaN,
+        ),
+        (
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+            "raw_values",
+            np.sqrt(np.array([2.0, 3.0])),
+        ),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([2.0, 2.0]),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.sqrt(np.array([np.NaN, 2.5])),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.NaN, np.NaN]),
+        ),
+    ],
+)
+def test_mse_ok(y_true, y_pred, multioutput, expected):
+    result = rmse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    npt.assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "y_true, y_pred, multioutput, expected",
+    [
+        # 1d
+        (np.array([1.0]), np.array([1.0]), "joint", 0.0),
+        (
+                np.array([1.0, 2.0, 3.0]),
+                np.array([3.0, 1.0, 2.0]),
+                "joint",
+                1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2)
+        ),
+        (
+                np.array([1.0, np.NaN, 3.0]),
+                np.array([3.0, 1.0, 2.0]),
+                "joint",
+                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+        ),
+        (
+                np.array([1.0, 2.0, 3.0]),
+                np.array([3.0, np.NaN, 2.0]),
+                "joint",
+                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+        ),
+        (
+                np.array([1.0, np.NaN, 3.0]),
+                np.array([3.0, np.NaN, 2.0]),
+                "joint",
+                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+        ),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", (np.log1p(1.) - np.log1p(3.)) ** 2),
+        (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN),
+        (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN),
+        (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
+        (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
+        # 2d
+        (
+                np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+                np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+                "joint",
+                1 / 6 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2 +
+                         np.log(2 / 3) ** 2 + np.log(5 / 3) ** 2 + np.log(6 / 5) ** 2)
+        ),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            0.5 * (np.log(1 / 2) ** 2 + np.log(2 / 3) ** 2),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            0.5 * (np.log(2 / 3) ** 2 + np.log(6 / 5) ** 2),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            np.NaN,
+        ),
+        (
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+            "raw_values",
+            np.array([
+                1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2),
+                1 / 3 * (np.log(2 / 3) ** 2 + np.log(5 / 3) ** 2 + np.log(6 / 5) ** 2)]),
+        ),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.log(1 / 2) ** 2, np.log(2 / 3) ** 2]),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.NaN, 0.5 * (np.log(2 / 3) ** 2 + np.log(6 / 5) ** 2)]),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.NaN, np.NaN]),
+        ),
+    ],
+)
+def test_msle_ok(y_true, y_pred, multioutput, expected):
+    result = msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    npt.assert_allclose(result, expected)
+
+@pytest.mark.parametrize(
+    "y_true, y_pred, multioutput",
+    [
+        # 1d
+        (np.array([1.0, 2.0, -3.0]), np.array([3.0, 2.0, 1.0]), "joint"),
+        (np.array([1.0, 2.0, 3.0]), np.array([-3.0, 2.0, 1.0]), "joint"),
+        (np.array([1.0, -2.0, 3.0]), np.array([3.0, 2.0, -1.0]), "joint"),
+        # 2d
+        (np.array([[1.0, 2.0, -3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, 3.0], [-3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, -1.0], [5.0, 4.0, 3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [-5.0, 4.0, 3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T, np.array([[3.0, -2.0, 1.0], [5.0, 4.0, -3.0]]).T, "joint"),
+
+        (
+                np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T,
+                np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T,
+                "raw_values"
+        ),
+        (
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, -2.0, 1.0], [-5.0, 4.0, 3.0]]).T,
+            "raw_values"
+        )
+    ]
+)
+def test_msle_negative(y_true, y_pred, multioutput):
+    with pytest.raises(ValueError):
+        msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py
index e70559926..d58afb7ab 100644
--- a/tests/test_metrics/test_metrics.py
+++ b/tests/test_metrics/test_metrics.py
@@ -44,9 +44,9 @@
         (MAE(missing_mode="ignore"), "MAE(mode = 'per-segment', missing_mode = 'ignore', )"),
         (MAE(mode="macro", missing_mode="ignore"), "MAE(mode = 'macro', missing_mode = 'ignore', )"),
         (MSE(), "MSE(mode = 'per-segment', missing_mode = 'error', )"),
-        (RMSE(), "RMSE(mode = 'per-segment', )"),
+        (RMSE(), "RMSE(mode = 'per-segment', missing_mode = 'error', )"),
         (MedAE(), "MedAE(mode = 'per-segment', )"),
-        (MSLE(), "MSLE(mode = 'per-segment', )"),
+        (MSLE(), "MSLE(mode = 'per-segment', missing_mode = 'error', )"),
         (MAPE(), "MAPE(mode = 'per-segment', missing_mode = 'error', )"),
         (SMAPE(), "SMAPE(mode = 'per-segment', missing_mode = 'error', )"),
         (R2(), "R2(mode = 'per-segment', )"),
@@ -181,9 +181,9 @@ def test_invalid_nans_pred(metric_class, train_test_dfs):
     (
         MAE(missing_mode="error"),
         MSE(missing_mode="error"),
-        RMSE(),
+        RMSE(missing_mode="error"),
         MedAE(),
-        MSLE(),
+        MSLE(missing_mode="error"),
         MAPE(missing_mode="error"),
         SMAPE(missing_mode="error"),
         R2(),
@@ -212,6 +212,8 @@ def test_invalid_nans_true(metric, train_test_dfs):
         WAPE(missing_mode="ignore"),
         MaxDeviation(missing_mode="ignore"),
         MissingCounter(),
+        RMSE(missing_mode="ignore"),
+        MSLE(missing_mode="ignore")
     ),
 )
 def test_invalid_single_nan_ignore(metric, train_test_dfs):
@@ -235,6 +237,8 @@ def test_invalid_single_nan_ignore(metric, train_test_dfs):
         (Sign(mode="per-segment", missing_mode="ignore"), type(None)),
         (WAPE(mode="per-segment", missing_mode="ignore"), type(None)),
         (MaxDeviation(mode="per-segment", missing_mode="ignore"), type(None)),
+        (RMSE(mode="per-segment", missing_mode="ignore"), type(None)),
+        (MSLE(mode="per-segment", missing_mode="ignore"), type(None)),
         (MissingCounter(mode="per-segment"), float),
     ),
 )
@@ -265,6 +269,8 @@ def test_invalid_segment_nans_ignore_per_segment(metric, expected_type, train_te
         Sign(mode="macro", missing_mode="ignore"),
         WAPE(mode="macro", missing_mode="ignore"),
         MaxDeviation(mode="macro", missing_mode="ignore"),
+        RMSE(mode="macro", missing_mode="ignore"),
+        MSLE(mode="macro", missing_mode="ignore"),
         MissingCounter(mode="macro"),
     ),
 )
@@ -286,6 +292,8 @@ def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs):
         (Sign(mode="macro", missing_mode="ignore"), type(None)),
         (WAPE(mode="macro", missing_mode="ignore"), type(None)),
         (MaxDeviation(mode="macro", missing_mode="ignore"), type(None)),
+        (RMSE(mode="macro", missing_mode="ignore"), type(None)),
+        (MSLE(mode="macro", missing_mode="ignore"), type(None)),
         (MissingCounter(mode="macro"), float),
     ),
 )

From 3f9b39dbad39cda30bfdf9759ef8ecf9a519cd3e Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 10:45:57 +0300
Subject: [PATCH 03/12] lint

---
 etna/metrics/functional_metrics.py            |  4 +-
 etna/metrics/metrics.py                       |  9 +-
 tests/test_metrics/test_functional_metrics.py | 82 +++++++++++--------
 tests/test_metrics/test_metrics.py            |  2 +-
 4 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 4b60458e0..15185496d 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -418,9 +418,7 @@ def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
         raise ValueError("Shapes of the labels must be the same")
 
     if (y_true_array < 0).any() or (y_pred_array < 0).any():
-        raise ValueError(
-            "Mean Squared Logarithmic Error cannot be used when targets contain negative values."
-        )
+        raise ValueError("Mean Squared Logarithmic Error cannot be used when targets contain negative values.")
 
     axis = _get_axis_by_multioutput(multioutput)
 
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index b42bc51d6..54b354724 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -126,7 +126,7 @@ class RMSE(MetricWithMissingHandling):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode="error", **kwargs):
         """Init metric.
 
         Parameters
@@ -151,7 +151,8 @@ def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
             metric_fn=rmse_per_output,
             metric_fn_signature="matrix_to_array",
             missing_mode=missing_mode,
-            **kwargs)
+            **kwargs,
+        )
 
     @property
     def greater_is_better(self) -> bool:
@@ -338,7 +339,7 @@ class MSLE(MetricWithMissingHandling):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode="error", **kwargs):
         """Init metric.
 
         Parameters
@@ -364,7 +365,7 @@ def __init__(self, mode: str = "per-segment", missing_mode='error', **kwargs):
             metric_fn=msle_per_output,
             metric_fn_signature="matrix_to_array",
             missing_mode=missing_mode,
-            **kwargs
+            **kwargs,
         )
 
     @property
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index 5be038e1c..3992a22a0 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -684,7 +684,12 @@ def test_max_deviation(y_true, y_pred, multioutput, expected):
         (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
         (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
         # 2d
-        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", np.sqrt(2.5)),
+        (
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+            "joint",
+            np.sqrt(2.5),
+        ),
         (
             np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
             np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
@@ -740,41 +745,49 @@ def test_mse_ok(y_true, y_pred, multioutput, expected):
         # 1d
         (np.array([1.0]), np.array([1.0]), "joint", 0.0),
         (
-                np.array([1.0, 2.0, 3.0]),
-                np.array([3.0, 1.0, 2.0]),
-                "joint",
-                1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2)
+            np.array([1.0, 2.0, 3.0]),
+            np.array([3.0, 1.0, 2.0]),
+            "joint",
+            1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2),
         ),
         (
-                np.array([1.0, np.NaN, 3.0]),
-                np.array([3.0, 1.0, 2.0]),
-                "joint",
-                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+            np.array([1.0, np.NaN, 3.0]),
+            np.array([3.0, 1.0, 2.0]),
+            "joint",
+            0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2),
         ),
         (
-                np.array([1.0, 2.0, 3.0]),
-                np.array([3.0, np.NaN, 2.0]),
-                "joint",
-                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+            np.array([1.0, 2.0, 3.0]),
+            np.array([3.0, np.NaN, 2.0]),
+            "joint",
+            0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2),
         ),
         (
-                np.array([1.0, np.NaN, 3.0]),
-                np.array([3.0, np.NaN, 2.0]),
-                "joint",
-                0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2)
+            np.array([1.0, np.NaN, 3.0]),
+            np.array([3.0, np.NaN, 2.0]),
+            "joint",
+            0.5 * (np.log(1 / 2) ** 2 + np.log(4 / 3) ** 2),
         ),
-        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", (np.log1p(1.) - np.log1p(3.)) ** 2),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", (np.log1p(1.0) - np.log1p(3.0)) ** 2),
         (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN),
         (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", np.NaN),
         (np.array([1.0, 2.0, 3.0]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
         (np.array([np.NaN, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, np.NaN]), "joint", np.NaN),
         # 2d
         (
-                np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
-                np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
-                "joint",
-                1 / 6 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2 +
-                         np.log(2 / 3) ** 2 + np.log(5 / 3) ** 2 + np.log(6 / 5) ** 2)
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+            "joint",
+            1
+            / 6
+            * (
+                np.log(1 / 2) ** 2
+                + np.log(3 / 2) ** 2
+                + np.log(4 / 3) ** 2
+                + np.log(2 / 3) ** 2
+                + np.log(5 / 3) ** 2
+                + np.log(6 / 5) ** 2
+            ),
         ),
         (
             np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
@@ -798,9 +811,12 @@ def test_mse_ok(y_true, y_pred, multioutput, expected):
             np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
             np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
             "raw_values",
-            np.array([
-                1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2),
-                1 / 3 * (np.log(2 / 3) ** 2 + np.log(5 / 3) ** 2 + np.log(6 / 5) ** 2)]),
+            np.array(
+                [
+                    1 / 3 * (np.log(1 / 2) ** 2 + np.log(3 / 2) ** 2 + np.log(4 / 3) ** 2),
+                    1 / 3 * (np.log(2 / 3) ** 2 + np.log(5 / 3) ** 2 + np.log(6 / 5) ** 2),
+                ]
+            ),
         ),
         (
             np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
@@ -826,6 +842,7 @@ def test_msle_ok(y_true, y_pred, multioutput, expected):
     result = msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
     npt.assert_allclose(result, expected)
 
+
 @pytest.mark.parametrize(
     "y_true, y_pred, multioutput",
     [
@@ -839,18 +856,17 @@ def test_msle_ok(y_true, y_pred, multioutput, expected):
         (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, -1.0], [5.0, 4.0, 3.0]]).T, "joint"),
         (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [-5.0, 4.0, 3.0]]).T, "joint"),
         (np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T, np.array([[3.0, -2.0, 1.0], [5.0, 4.0, -3.0]]).T, "joint"),
-
         (
-                np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T,
-                np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T,
-                "raw_values"
+            np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T,
+            np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T,
+            "raw_values",
         ),
         (
             np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
             np.array([[3.0, -2.0, 1.0], [-5.0, 4.0, 3.0]]).T,
-            "raw_values"
-        )
-    ]
+            "raw_values",
+        ),
+    ],
 )
 def test_msle_negative(y_true, y_pred, multioutput):
     with pytest.raises(ValueError):
diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py
index d58afb7ab..0ef17d0c6 100644
--- a/tests/test_metrics/test_metrics.py
+++ b/tests/test_metrics/test_metrics.py
@@ -213,7 +213,7 @@ def test_invalid_nans_true(metric, train_test_dfs):
         MaxDeviation(missing_mode="ignore"),
         MissingCounter(),
         RMSE(missing_mode="ignore"),
-        MSLE(missing_mode="ignore")
+        MSLE(missing_mode="ignore"),
     ),
 )
 def test_invalid_single_nan_ignore(metric, train_test_dfs):

From e2e6324b4edadf5f0665b6170a9d6cd3572e942d Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 11:09:14 +0300
Subject: [PATCH 04/12] change changelog and lint

---
 CHANGELOG.md             | 2 +-
 etna/metrics/__init__.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e43952a0f..f7f772590 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - Add parameter `missing_mode` into `R2` and `MedAE` metrics ([#537](https://github.com/etna-team/etna/pull/537))
-- 
+- Add parameer `missing_mode` into `RMSE` and `MSLE` metrics ([#542](https://github.com/etna-team/etna/pull/542))
 - 
 
 ### Fixed
diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
index f852d205d..6f86914f0 100644
--- a/etna/metrics/__init__.py
+++ b/etna/metrics/__init__.py
@@ -15,7 +15,6 @@
 from etna.metrics.functional_metrics import sign
 from etna.metrics.functional_metrics import smape
 from etna.metrics.functional_metrics import wape
-from etna.metrics.functional_metrics import msle
 from etna.metrics.intervals_metrics import Coverage
 from etna.metrics.intervals_metrics import Width
 from etna.metrics.metrics import MAE
@@ -30,4 +29,4 @@
 from etna.metrics.metrics import MedAE
 from etna.metrics.metrics import MissingCounter
 from etna.metrics.metrics import Sign
-from etna.metrics.utils import compute_metrics
+from etna.metrics.utils import compute_metrics
\ No newline at end of file

From 1fc1be21749dee3ae781fb598149b39a9068938a Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 11:16:04 +0300
Subject: [PATCH 05/12] lint again...

---
 etna/metrics/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
index 6f86914f0..f9f5878e0 100644
--- a/etna/metrics/__init__.py
+++ b/etna/metrics/__init__.py
@@ -29,4 +29,4 @@
 from etna.metrics.metrics import MedAE
 from etna.metrics.metrics import MissingCounter
 from etna.metrics.metrics import Sign
-from etna.metrics.utils import compute_metrics
\ No newline at end of file
+from etna.metrics.utils import compute_metrics

From 9da1a6637f9650eba547129169bcda37acc5cfa0 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 11:23:21 +0300
Subject: [PATCH 06/12] lint

---
 etna/metrics/functional_metrics.py | 4 ++--
 etna/metrics/metrics.py            | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 60d72195e..b537262e1 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -455,7 +455,7 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
 
 
 def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
-    """Root mean squared error with missing values handling
+    """Root mean squared error with missing values handling.
 
     .. math::
         RMSE(y\_true, y\_pred) = \\sqrt\\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
@@ -503,7 +503,7 @@ def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
 
 
 def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
-    """Mean squared logarithmic error with missing values handling
+    """Mean squared logarithmic error with missing values handling.
 
     .. math::
         MSLE(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{(log(1 + y\_true_i) - log(1 + y\_pred_i))^2}
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index d31bef0ea..943450e71 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -1,6 +1,5 @@
 from functools import partial
 
-from etna.metrics.base import Metric
 from etna.metrics.base import MetricWithMissingHandling
 from etna.metrics.functional_metrics import count_missing_values
 from etna.metrics.functional_metrics import mae

From 2a26a83996bbdfd340d8879df44cb303fb62d2e7 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 12:03:02 +0300
Subject: [PATCH 07/12] correct errors by comments

---
 CHANGELOG.md                                  |  2 +-
 etna/metrics/functional_metrics.py            | 24 +++++++------------
 tests/test_metrics/test_functional_metrics.py | 18 +++++++-------
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7f772590..1bfdf70ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - Add parameter `missing_mode` into `R2` and `MedAE` metrics ([#537](https://github.com/etna-team/etna/pull/537))
-- Add parameer `missing_mode` into `RMSE` and `MSLE` metrics ([#542](https://github.com/etna-team/etna/pull/542))
+- Add parameter `missing_mode` into `RMSE` and `MSLE` metrics ([#542](https://github.com/etna-team/etna/pull/542))
 - 
 
 ### Fixed
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index b537262e1..2718837bf 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -484,22 +484,9 @@ def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
     """
-    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
-
-    if len(y_true_array.shape) != len(y_pred_array.shape):
-        raise ValueError("Shapes of the labels must be the same")
-
-    axis = _get_axis_by_multioutput(multioutput)
-
-    with warnings.catch_warnings():
-        # this helps to prevent warning in case of all nans
-        warnings.filterwarnings(
-            message="Mean of empty slice",
-            action="ignore",
-        )
-        result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+    mse_result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
 
-    return np.sqrt(result)
+    return np.sqrt(mse_result)
 
 
 def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
@@ -531,6 +518,13 @@ def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
+        If input arrays contain negative values.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index c6e964b0d..64777c01b 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -994,15 +994,11 @@ def test_msle_ok(y_true, y_pred, multioutput, expected):
     "y_true, y_pred, multioutput",
     [
         # 1d
-        (np.array([1.0, 2.0, -3.0]), np.array([3.0, 2.0, 1.0]), "joint"),
-        (np.array([1.0, 2.0, 3.0]), np.array([-3.0, 2.0, 1.0]), "joint"),
-        (np.array([1.0, -2.0, 3.0]), np.array([3.0, 2.0, -1.0]), "joint"),
+        (np.array([-1.0, 2.0, -3.0]), np.array([3.0, 2.0, 1.0]), "joint"),
+        (np.array([1.0, 2.0, 3.0]), np.array([-3.0, 2.0, -1.0]), "joint"),
         # 2d
-        (np.array([[1.0, 2.0, -3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T, "joint"),
-        (np.array([[1.0, 2.0, 3.0], [-3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T, "joint"),
-        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, -1.0], [5.0, 4.0, 3.0]]).T, "joint"),
-        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [-5.0, 4.0, 3.0]]).T, "joint"),
-        (np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T, np.array([[3.0, -2.0, 1.0], [5.0, 4.0, -3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, -3.0], [3.0, -4.0, 5.0]]).T, np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T, "joint"),
+        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 2.0, -1.0], [-5.0, 4.0, -3.0]]).T, "joint"),
         (
             np.array([[1.0, 2.0, -3.0], [3.0, 4.0, -5.0]]).T,
             np.array([[3.0, 2.0, 1.0], [5.0, 4.0, 3.0]]).T,
@@ -1010,11 +1006,13 @@ def test_msle_ok(y_true, y_pred, multioutput, expected):
         ),
         (
             np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
-            np.array([[3.0, -2.0, 1.0], [-5.0, 4.0, 3.0]]).T,
+            np.array([[3.0, -2.0, 1.0], [-5.0, 4.0, -3.0]]).T,
             "raw_values",
         ),
     ],
 )
 def test_msle_negative(y_true, y_pred, multioutput):
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError, match="Mean Squared Logarithmic Error cannot be used when targets contain negative values."
+    ):
         msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)

From 1dd99a7b6703627e4e2f0cb198c0e586076dcd28 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 12:37:39 +0300
Subject: [PATCH 08/12] fix typing

---
 etna/metrics/functional_metrics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 2718837bf..27e104f5f 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -484,9 +484,10 @@ def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
     """
-    mse_result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    mse_result = np.asarray(mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput))
+    result = np.sqrt(mse_result)
 
-    return np.sqrt(mse_result)
+    return result
 
 
 def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:

From 7b3278eca235685565cfcb03b1aee678a6983ce7 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 13:24:58 +0300
Subject: [PATCH 09/12] correction of comments

---
 etna/metrics/functional_metrics.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 27e104f5f..fb3526cba 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -525,6 +525,7 @@ def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     :
     ValueError:
         If the shapes of the input arrays do not match.
+    ValueError:
         If input arrays contain negative values.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
@@ -533,7 +534,7 @@ def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
         raise ValueError("Shapes of the labels must be the same")
 
     if (y_true_array < 0).any() or (y_pred_array < 0).any():
-        raise ValueError("Mean Squared Logarithmic Error cannot be used when targets contain negative values.")
+        raise ValueError("Mean squared logarithmic error cannot be used when targets contain negative values.")
 
     axis = _get_axis_by_multioutput(multioutput)
 

From b64bf5132c641dff3525fc8fa72cd596ae6c7f88 Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 15:07:56 +0300
Subject: [PATCH 10/12] correction of comments

---
 etna/metrics/functional_metrics.py            | 60 +++++++++++++++++++
 tests/test_metrics/test_functional_metrics.py |  6 +-
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index fb3526cba..223c8cc07 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -65,6 +65,12 @@ def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -111,6 +117,12 @@ def mae(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -163,6 +175,12 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -217,6 +235,12 @@ def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -267,6 +291,12 @@ def r2_score(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -343,6 +373,12 @@ def medae(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> A
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -389,6 +425,12 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -483,6 +525,12 @@ def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     mse_result = np.asarray(mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput))
     result = np.sqrt(mse_result)
@@ -578,6 +626,12 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
@@ -643,6 +697,12 @@ def count_missing_values(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str
     :
         A floating point value, or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index 64777c01b..bbe85a778 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -881,8 +881,9 @@ def test_madae_ok(y_true, y_pred, multioutput, expected):
         ),
     ],
 )
-def test_mse_ok(y_true, y_pred, multioutput, expected):
+def test_rmse_ok(y_true, y_pred, multioutput, expected):
     result = rmse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    assert np.shape(result) == np.shape(expected)
     npt.assert_allclose(result, expected)
 
 
@@ -987,6 +988,7 @@ def test_mse_ok(y_true, y_pred, multioutput, expected):
 )
 def test_msle_ok(y_true, y_pred, multioutput, expected):
     result = msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    assert np.shape(result) == np.shape(expected)
     npt.assert_allclose(result, expected)
 
 
@@ -1013,6 +1015,6 @@ def test_msle_ok(y_true, y_pred, multioutput, expected):
 )
 def test_msle_negative(y_true, y_pred, multioutput):
     with pytest.raises(
-        ValueError, match="Mean Squared Logarithmic Error cannot be used when targets contain negative values."
+        ValueError, match="Mean squared logarithmic error cannot be used when targets contain negative values."
     ):
         msle(y_true=y_true, y_pred=y_pred, multioutput=multioutput)

From 20aa4f42668f519a919bdf9ff6c9b71f8d9270cc Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 15:11:32 +0300
Subject: [PATCH 11/12] fix

---
 etna/metrics/functional_metrics.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 223c8cc07..57304d901 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -478,6 +478,12 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
     :
         A non-negative floating point value (the best value is 0.0), or an array of floating point values,
         one for each individual target.
+
+    Raises
+    ------
+    :
+    ValueError:
+        If the shapes of the input arrays do not match.
     """
     y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
 

From b9b63bd7ae7eb1aeb24df213c93cbf0efb4df8dd Mon Sep 17 00:00:00 2001
From: Danil Smorchkov <Danil Smorchkov>
Date: Mon, 23 Dec 2024 15:53:51 +0300
Subject: [PATCH 12/12] remove np.asarray

---
 etna/metrics/functional_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 57304d901..9e4a5b31f 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -538,10 +538,10 @@ def rmse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     ValueError:
         If the shapes of the input arrays do not match.
     """
-    mse_result = np.asarray(mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput))
+    mse_result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
     result = np.sqrt(mse_result)
 
-    return result
+    return result  # type: ignore
 
 
 def msle(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: