etna-team · brsnw250 · Dec 23, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 23, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523))
 - Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524))
 - Add parameter `missing_mode` into `Sign`, `WAPE` and `MaxDeviation` metrics ([#530](https://github.com/etna-team/etna/pull/530))
+- Add parameter `missing_mode` into `Coverage` and `Width` metrics ([#541](https://github.com/etna-team/etna/pull/541))
 - 
 - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
 - 

diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py
@@ -1,3 +1,5 @@
+import warnings
+from reprlib import repr
 from typing import Dict
 from typing import Optional
 from typing import Sequence
@@ -8,7 +10,7 @@
 import pandas as pd
 
 from etna.datasets import TSDataset
-from etna.metrics.base import Metric
+from etna.metrics.base import MetricWithMissingHandling
 from etna.metrics.functional_metrics import ArrayLike
 
 
@@ -34,17 +36,36 @@
             if not borders_presented:
                 raise ValueError("Provided intervals borders names must be in dataset!")
 
+            else:
+                missing_per_segment = ts.loc[:, pd.IndexSlice[:, list(borders_set)]].isna().any()
+                if missing_per_segment.any():
+                    raise ValueError(
+                        "Provided intervals borders contain missing values! "
+                        f"Series with missing values {repr(missing_per_segment[missing_per_segment].index.tolist())}"
+                    )
+
         else:
             if not quantiles_presented:
                 raise ValueError("All quantiles must be presented in the dataset!")
 
+            else:
+                missing_per_segment = ts.loc[:, pd.IndexSlice[:, list(quantiles_set)]].isna().any()
+                if missing_per_segment.any():
+                    raise ValueError(
+                        "Quantiles contain missing values! "
+                        f"Series with missing values {repr(missing_per_segment[missing_per_segment].index.tolist())}"
+                    )
+
 
-class Coverage(Metric, _IntervalsMetricMixin):
+class Coverage(MetricWithMissingHandling, _IntervalsMetricMixin):
     """Coverage metric for prediction intervals - precenteage of samples in the interval ``[lower quantile, upper quantile]``.
 
     .. math::
         Coverage(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{[ y\_true_i \\ge y\_pred_i^{lower\_quantile}] * [y\_true_i \\le y\_pred_i^{upper\_quantile}] }}{n}
 
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
+
     Notes
     -----
     Works just if ``quantiles`` presented in ``y_pred``
@@ -58,6 +79,7 @@
         mode: str = "per-segment",
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
+        missing_mode: str = "error",
         **kwargs,
     ):
         """Init metric.
@@ -78,6 +100,8 @@
             name of column with upper border of the interval
         lower_name:
             name of column with lower border of the interval
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
         """
@@ -96,7 +120,7 @@
         if quantiles is None and lower_name is None:
             quantiles = (0.025, 0.975)
 
-        super().__init__(mode=mode, metric_fn=dummy, **kwargs)
+        super().__init__(mode=mode, metric_fn=dummy, missing_mode=missing_mode, **kwargs)
         self.quantiles = sorted(quantiles if quantiles is not None else tuple())
         self.upper_name = upper_name
         self.lower_name = lower_name
@@ -136,7 +160,7 @@
             lower_border = f"target_{self.quantiles[0]:.4g}"
             upper_border = f"target_{self.quantiles[1]:.4g}"
 
-        df_true = y_true[:, :, "target"].sort_index(axis=1)
+        df_true = y_true.df.loc[:, pd.IndexSlice[:, "target"]].sort_index(axis=1)
 
         intervals_df: pd.DataFrame = y_pred.get_prediction_intervals()
         df_pred_lower = intervals_df.loc[:, pd.IndexSlice[:, lower_border]].sort_index(axis=1)
@@ -146,8 +170,19 @@
 
         upper_quantile_flag = df_true.values <= df_pred_upper.values
         lower_quantile_flag = df_true.values >= df_pred_lower.values
-        values = np.mean(upper_quantile_flag * lower_quantile_flag, axis=0)
-        metrics_per_segment = dict(zip(segments, values))
+
+        nan_mask = np.isnan(df_true.values) | np.isnan(df_pred_upper.values) | np.isnan(df_pred_lower.values)
+        in_bounds = (upper_quantile_flag * lower_quantile_flag).astype(float)
+        in_bounds[nan_mask] = np.NaN
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                message="Mean of empty slice",
+                action="ignore",
+            )
+            values = np.nanmean(in_bounds, axis=0)
+
+        metrics_per_segment = dict(zip(segments, (None if np.isnan(x) else x for x in values)))
 
         metrics = self._aggregate_metrics(metrics_per_segment)
         return metrics
@@ -158,12 +193,15 @@
         return None
 
 
-class Width(Metric, _IntervalsMetricMixin):
+class Width(MetricWithMissingHandling, _IntervalsMetricMixin):
     """Mean width of prediction intervals.
 
     .. math::
         Width(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}\\mid y\_pred_i^{upper\_quantile} - y\_pred_i^{lower\_quantile} \\mid}{n}
 
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
+
     Notes
     -----
     Works just if quantiles presented in ``y_pred``.
@@ -177,6 +215,7 @@
         mode: str = "per-segment",
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
+        missing_mode: str = "error",
         **kwargs,
     ):
         """Init metric.
@@ -197,6 +236,8 @@
             name of column with upper border of the interval
         lower_name:
             name of column with lower border of the interval
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
         """
@@ -215,7 +256,7 @@
         if quantiles is None and lower_name is None:
             quantiles = (0.025, 0.975)
 
-        super().__init__(mode=mode, metric_fn=dummy, **kwargs)
+        super().__init__(mode=mode, metric_fn=dummy, missing_mode=missing_mode, **kwargs)
         self.quantiles = sorted(quantiles if quantiles is not None else tuple())
         self.upper_name = upper_name
         self.lower_name = lower_name
@@ -255,16 +296,22 @@
             lower_border = f"target_{self.quantiles[0]:.4g}"
             upper_border = f"target_{self.quantiles[1]:.4g}"
 
-        df_true = y_true[:, :, "target"].sort_index(axis=1)
+        df_true = y_true.df.loc[:, pd.IndexSlice[:, "target"]].sort_index(axis=1)
 
         intervals_df: pd.DataFrame = y_pred.get_prediction_intervals()
         df_pred_lower = intervals_df.loc[:, pd.IndexSlice[:, lower_border]].sort_index(axis=1)
         df_pred_upper = intervals_df.loc[:, pd.IndexSlice[:, upper_border]].sort_index(axis=1)
 
         segments = df_true.columns.get_level_values("segment").unique()
 
-        values = np.mean(np.abs(df_pred_upper.values - df_pred_lower.values), axis=0)
-        metrics_per_segment = dict(zip(segments, values))
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                message="Mean of empty slice",
+                action="ignore",
+            )
+            values = np.nanmean(np.abs(df_pred_upper.values - df_pred_lower.values), axis=0)
+
+        metrics_per_segment = dict(zip(segments, (None if np.isnan(x) else x for x in values)))
 
         metrics = self._aggregate_metrics(metrics_per_segment)
         return metrics