Update aggregate_metrics_df to work with None values (#522)

etna-team · Dec 17, 2024 · 5b17421 · 5b17421
1 parent 904e626
commit 5b17421
Show file tree

Hide file tree

Showing 9 changed files with 454 additions and 52 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523))
 - Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524))
 - 
-- 
+- Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
 - 
 - 
 - 

diff --git a/etna/auto/auto.py b/etna/auto/auto.py
@@ -484,7 +484,11 @@ def _objective(trial: Trial) -> float:
             for metric in aggregated_metrics:
                 trial.set_user_attr(metric, aggregated_metrics[metric])
 
-            return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
+            result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
+            if result_value is None:
+                raise ValueError("Metric value is None! It should be float for optimization.")
+
+            return result_value
 
         return _objective
 
@@ -809,7 +813,11 @@ def _objective(trial: Trial) -> float:
                 for metric in aggregated_metrics:
                     trial.set_user_attr(metric, aggregated_metrics[metric])
 
-                return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
+                result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
+                if result_value is None:
+                    raise ValueError("Metric value is None! It should be float for optimization.")
+
+                return result_value
 
         return _objective
 

diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable
 from typing import Dict
 from typing import List
@@ -37,32 +38,97 @@ def compute_metrics(
     return metrics_values
 
 
+def mean_agg():
+    """Mean for pandas agg."""
+
+    def func(x: pd.Series):
+        with warnings.catch_warnings():
+            # this helps to prevent warning in case of all nans
+            warnings.filterwarnings(
+                message="Mean of empty slice",
+                action="ignore",
+            )
+            return np.nanmean(a=x.values)
+
+    func.__name__ = "mean"
+    return func
+
+
+def median_agg():
+    """Median for pandas agg."""
+
+    def func(x: pd.Series):
+        with warnings.catch_warnings():
+            # this helps to prevent warning in case of all nans
+            warnings.filterwarnings(
+                message="All-NaN slice encountered",
+                action="ignore",
+            )
+            return np.nanmedian(a=x.values)
+
+    func.__name__ = "median"
+    return func
+
+
+def std_agg():
+    """Std for pandas agg."""
+
+    def func(x: pd.Series):
+        with warnings.catch_warnings():
+            # this helps to prevent warning in case of all nans
+            warnings.filterwarnings(
+                message="Degrees of freedom <= 0",
+                action="ignore",
+            )
+            return np.nanstd(a=x.values)
+
+    func.__name__ = "std"
+    return func
+
+
+def notna_size_agg():
+    """Size of not-na elements for pandas agg."""
+
+    def func(x: pd.Series):
+        return len(x) - pd.isna(x.values).sum()
+
+    func.__name__ = "notna_size"
+    return func
+
+
 def percentile(n: int):
     """Percentile for pandas agg."""
 
-    def percentile_(x):
-        return np.nanpercentile(a=x.values, q=n)
+    def func(x: pd.Series):
+        with warnings.catch_warnings():
+            # this helps to prevent warning in case of all nans
+            warnings.filterwarnings(
+                message="All-NaN slice encountered",
+                action="ignore",
+            )
+            return np.nanpercentile(a=x.values, q=n)
 
-    percentile_.__name__ = f"percentile_{n}"
-    return percentile_
+    func.__name__ = f"percentile_{n}"
+    return func
 
 
 MetricAggregationStatistics = Literal[
-    "median", "mean", "std", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
+    "median", "mean", "std", "notna_size", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
 ]
 
 METRICS_AGGREGATION_MAP: Dict[MetricAggregationStatistics, Union[str, Callable]] = {
-    "median": "median",
-    "mean": "mean",
-    "std": "std",
+    "median": mean_agg(),
+    "mean": median_agg(),
+    "std": std_agg(),
+    "notna_size": notna_size_agg(),
     "percentile_5": percentile(5),
     "percentile_25": percentile(25),
     "percentile_75": percentile(75),
     "percentile_95": percentile(95),
 }
 
 
-def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
+def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]]:
     """Aggregate metrics in :py:meth:`log_backtest_metrics` method.
 
     Parameters
@@ -74,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
     if "fold_number" in metrics_df.columns:
         metrics_dict = (
             metrics_df.groupby("segment")
-            .mean()
+            .mean(numeric_only=False)
             .reset_index()
             .drop(["segment", "fold_number"], axis=1)
             .apply(list(METRICS_AGGREGATION_MAP.values()))
@@ -85,10 +151,11 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
     else:
         metrics_dict = metrics_df.drop(["segment"], axis=1).apply(list(METRICS_AGGREGATION_MAP.values())).to_dict()
 
-    metrics_dict_wide = {
-        f"{metrics_key}_{statistics_key}": value
-        for metrics_key, values in metrics_dict.items()
-        for statistics_key, value in values.items()
-    }
+    cur_dict = {}
+    for metrics_key, values in metrics_dict.items():
+        for statistics_key, value in values.items():
+            new_key = f"{metrics_key}_{statistics_key}"
+            new_value = value if not pd.isna(value) else None
+            cur_dict[new_key] = new_value
 
-    return metrics_dict_wide
+    return cur_dict
diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
@@ -856,7 +856,9 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame
         metrics_df.sort_values(["segment", self._fold_column], inplace=True)
 
         if aggregate_metrics:
-            metrics_df = metrics_df.groupby("segment").mean().reset_index().drop(self._fold_column, axis=1)
+            metrics_df = (
+                metrics_df.groupby("segment").mean(numeric_only=False).reset_index().drop(self._fold_column, axis=1)
+            )
 
         return metrics_df
 

diff --git a/tests/test_auto/conftest.py b/tests/test_auto/conftest.py
@@ -1,11 +1,14 @@
 from os import unlink
 
+import numpy as np
+import pandas as pd
 import pytest
 from optuna.storages import RDBStorage
 from optuna.trial import TrialState
 from typing_extensions import NamedTuple
 
 from etna.auto.utils import config_hash
+from etna.datasets import TSDataset
 from etna.models import NaiveModel
 from etna.pipeline import Pipeline
 
@@ -35,3 +38,102 @@ class Trial(NamedTuple):
     fail_trials = [Trial(user_attrs={}, state=TrialState.FAIL)]
 
     return complete_trials + complete_trials[:3] + fail_trials
+
+
+@pytest.fixture
+def ts_with_fold_missing_tail(random_seed) -> TSDataset:
+    periods = 100
+    df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df1["segment"] = "segment_1"
+    df1["target"] = np.random.uniform(10, 20, size=periods)
+    df1.loc[df1.index[-7:], "target"] = np.NaN
+
+    df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df2["segment"] = "segment_2"
+    df2["target"] = np.random.uniform(-15, 5, size=periods)
+    df2.loc[df2.index[-7:], "target"] = np.NaN
+
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
+
+
+@pytest.fixture
+def ts_with_fold_missing_middle(random_seed) -> TSDataset:
+    periods = 100
+    df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df1["segment"] = "segment_1"
+    df1["target"] = np.random.uniform(10, 20, size=periods)
+    df1.loc[df1.index[-14:-7], "target"] = np.NaN
+
+    df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df2["segment"] = "segment_2"
+    df2["target"] = np.random.uniform(-15, 5, size=periods)
+    df2.loc[df2.index[-14:-7], "target"] = np.NaN
+
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
+
+
+@pytest.fixture
+def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset:
+    periods = 100
+    df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df1["segment"] = "segment_1"
+    df1["target"] = np.random.uniform(10, 20, size=periods)
+    df1.loc[df1.index[-40:], "target"] = np.NaN
+
+    df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df2["segment"] = "segment_2"
+    df2["target"] = np.random.uniform(-15, 5, size=periods)
+
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
+
+
+@pytest.fixture
+def ts_with_all_folds_missing_all_segments(random_seed) -> TSDataset:
+    periods = 100
+    df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df1["segment"] = "segment_1"
+    df1["target"] = np.random.uniform(10, 20, size=periods)
+    df1.loc[df1.index[-40:], "target"] = np.NaN
+
+    df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df2["segment"] = "segment_2"
+    df2["target"] = np.random.uniform(-15, 5, size=periods)
+    df2.loc[df2.index[-40:], "target"] = np.NaN
+
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
+
+
+@pytest.fixture
+def ts_with_few_missing(random_seed) -> TSDataset:
+    periods = 100
+    df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df1["segment"] = "segment_1"
+    df1["target"] = np.random.uniform(10, 20, size=periods)
+    df1.loc[df1.index[-4:-2], "target"] = np.NaN
+
+    df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
+    df2["segment"] = "segment_2"
+    df2["target"] = np.random.uniform(-15, 5, size=periods)
+    df2.loc[df2.index[-12:-10], "target"] = np.NaN
+
+    df = pd.concat([df1, df2]).reset_index(drop=True)
+    df = TSDataset.to_dataset(df)
+    tsds = TSDataset(df, freq="D")
+
+    return tsds
-Original file line number
+Diff line change
@@ Expand Up @@
     - Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523))
     - Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524))
     -
-    -
+    - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
     -
     -
     -
@@ Expand Down @@