diff --git a/CHANGELOG.md b/CHANGELOG.md index ff88a71ca..43e02c68d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add config for Codecov to control CI ([#80](https://github.com/etna-team/etna/pull/80)) - Add `EventTransform` ([#78](https://github.com/etna-team/etna/pull/78)) - `NaiveVariancePredictionIntervals` method for prediction quantiles estimation ([#109](https://github.com/etna-team/etna/pull/109)) +- Update interval metrics to work with arbitrary interval bounds ([#113](https://github.com/etna-team/etna/pull/113)) ### Changed - diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py index d8c2937dc..ebd2f4a84 100644 --- a/etna/metrics/intervals_metrics.py +++ b/etna/metrics/intervals_metrics.py @@ -1,9 +1,11 @@ from typing import Dict +from typing import Optional from typing import Sequence from typing import Tuple from typing import Union import numpy as np +import pandas as pd from etna.datasets import TSDataset from etna.metrics.base import Metric @@ -15,15 +17,30 @@ def dummy(y_true: ArrayLike, y_pred: ArrayLike) -> ArrayLike: return np.nan -class _QuantileMetricMixin: - def _validate_tsdataset_quantiles(self, ts: TSDataset, quantiles: Sequence[float]) -> None: - """Check if quantiles presented in y_pred.""" - features = set(ts.df.columns.get_level_values("feature")) - for quantile in quantiles: - assert f"target_{quantile:.4g}" in features, f"Quantile {quantile} is not presented in tsdataset." +class _IntervalsMetricMixin: + def _validate_tsdataset_intervals( + self, ts: TSDataset, quantiles: Sequence[float], upper_name: Optional[str], lower_name: Optional[str] + ) -> None: + """Check if intervals borders presented in ``y_pred``.""" + ts_intervals = set(ts.prediction_intervals_names) + borders_set = {upper_name, lower_name} + borders_presented = borders_set.issubset(ts_intervals) -class Coverage(Metric, _QuantileMetricMixin): + quantiles_set = {f"target_{quantile:.4g}" for quantile in quantiles} + quantiles_presented = quantiles_set.issubset(ts_intervals) + quantiles_presented &= len(quantiles_set) > 0 + + if upper_name is not None and lower_name is not None: + if not borders_presented: + raise ValueError("Provided intervals borders names must be in dataset!") + + else: + if not quantiles_presented: + raise ValueError("All quantiles must be presented in the dataset!") + + +class Coverage(Metric, _IntervalsMetricMixin): """Coverage metric for prediction intervals - precenteage of samples in the interval ``[lower quantile, upper quantile]``. .. math:: @@ -32,10 +49,17 @@ class Coverage(Metric, _QuantileMetricMixin): Notes ----- Works just if ``quantiles`` presented in ``y_pred`` + + When ``quantiles``, ``upper_name`` and ``lower_name`` all set to ``None`` then 0.025 and 0.975 quantiles will be used. """ def __init__( - self, quantiles: Tuple[float, float] = (0.025, 0.975), mode: str = MetricAggregationMode.per_segment, **kwargs + self, + quantiles: Optional[Tuple[float, float]] = None, + mode: str = MetricAggregationMode.per_segment, + upper_name: Optional[str] = None, + lower_name: Optional[str] = None, + **kwargs, ): """Init metric. @@ -45,11 +69,32 @@ def __init__( lower and upper quantiles mode: 'macro' or 'per-segment' metrics aggregation mode + upper_name: + name of column with upper border of the interval + lower_name: + name of column with lower border of the interval kwargs: metric's computation arguments """ + if (lower_name is None) ^ (upper_name is None): + raise ValueError("Both `lower_name` and `upper_name` must be set if using names to specify borders!") + + if not (quantiles is None or lower_name is None): + raise ValueError( + "Both `quantiles` and border names are specified. Use only one way to set interval borders!" + ) + + if quantiles is not None and len(quantiles) != 2: + raise ValueError(f"Expected tuple with two values for `quantiles` parameter, got {len(quantiles)}") + + # default behavior + if quantiles is None and lower_name is None: + quantiles = (0.025, 0.975) + super().__init__(mode=mode, metric_fn=dummy, **kwargs) - self.quantiles = quantiles + self.quantiles = sorted(quantiles if quantiles is not None else tuple()) + self.upper_name = upper_name + self.lower_name = lower_name def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[str, float]]: """ @@ -74,11 +119,23 @@ def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[st self._validate_target_columns(y_true=y_true, y_pred=y_pred) self._validate_index(y_true=y_true, y_pred=y_pred) self._validate_nans(y_true=y_true, y_pred=y_pred) - self._validate_tsdataset_quantiles(ts=y_pred, quantiles=self.quantiles) + self._validate_tsdataset_intervals( + ts=y_pred, quantiles=self.quantiles, lower_name=self.lower_name, upper_name=self.upper_name + ) + + if self.upper_name is not None: + lower_border = self.lower_name + upper_border = self.upper_name + + else: + lower_border = f"target_{self.quantiles[0]:.4g}" + upper_border = f"target_{self.quantiles[1]:.4g}" df_true = y_true[:, :, "target"].sort_index(axis=1) - df_pred_lower = y_pred[:, :, f"target_{self.quantiles[0]:.4g}"].sort_index(axis=1) - df_pred_upper = y_pred[:, :, f"target_{self.quantiles[1]:.4g}"].sort_index(axis=1) + + intervals_df: pd.DataFrame = y_pred.get_prediction_intervals() + df_pred_lower = intervals_df.loc[:, pd.IndexSlice[:, lower_border]].sort_index(axis=1) + df_pred_upper = intervals_df.loc[:, pd.IndexSlice[:, upper_border]].sort_index(axis=1) segments = df_true.columns.get_level_values("segment").unique() @@ -96,7 +153,7 @@ def greater_is_better(self) -> None: return None -class Width(Metric, _QuantileMetricMixin): +class Width(Metric, _IntervalsMetricMixin): """Mean width of prediction intervals. .. math:: @@ -104,11 +161,18 @@ class Width(Metric, _QuantileMetricMixin): Notes ----- - Works just if quantiles presented in ``y_pred`` + Works just if quantiles presented in ``y_pred``. + + When ``quantiles``, ``upper_name`` and ``lower_name`` all set to ``None`` then 0.025 and 0.975 quantiles will be used. """ def __init__( - self, quantiles: Tuple[float, float] = (0.025, 0.975), mode: str = MetricAggregationMode.per_segment, **kwargs + self, + quantiles: Optional[Tuple[float, float]] = None, + mode: str = MetricAggregationMode.per_segment, + upper_name: Optional[str] = None, + lower_name: Optional[str] = None, + **kwargs, ): """Init metric. @@ -118,11 +182,32 @@ def __init__( lower and upper quantiles mode: 'macro' or 'per-segment' metrics aggregation mode + upper_name: + name of column with upper border of the interval + lower_name: + name of column with lower border of the interval kwargs: metric's computation arguments """ + if (lower_name is None) ^ (upper_name is None): + raise ValueError("Both `lower_name` and `upper_name` must be set if using names to specify borders!") + + if not (quantiles is None or lower_name is None): + raise ValueError( + "Both `quantiles` and border names are specified. Use only one way to set interval borders!" + ) + + if quantiles is not None and len(quantiles) != 2: + raise ValueError(f"Expected tuple with two values for `quantiles` parameter, got {len(quantiles)}") + + # default behavior + if quantiles is None and lower_name is None: + quantiles = (0.025, 0.975) + super().__init__(mode=mode, metric_fn=dummy, **kwargs) - self.quantiles = quantiles + self.quantiles = sorted(quantiles if quantiles is not None else tuple()) + self.upper_name = upper_name + self.lower_name = lower_name def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[str, float]]: """ @@ -147,11 +232,23 @@ def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[st self._validate_target_columns(y_true=y_true, y_pred=y_pred) self._validate_index(y_true=y_true, y_pred=y_pred) self._validate_nans(y_true=y_true, y_pred=y_pred) - self._validate_tsdataset_quantiles(ts=y_pred, quantiles=self.quantiles) + self._validate_tsdataset_intervals( + ts=y_pred, quantiles=self.quantiles, lower_name=self.lower_name, upper_name=self.upper_name + ) + + if self.upper_name is not None: + lower_border = self.lower_name + upper_border = self.upper_name + + else: + lower_border = f"target_{self.quantiles[0]:.4g}" + upper_border = f"target_{self.quantiles[1]:.4g}" df_true = y_true[:, :, "target"].sort_index(axis=1) - df_pred_lower = y_pred[:, :, f"target_{self.quantiles[0]:.4g}"].sort_index(axis=1) - df_pred_upper = y_pred[:, :, f"target_{self.quantiles[1]:.4g}"].sort_index(axis=1) + + intervals_df: pd.DataFrame = y_pred.get_prediction_intervals() + df_pred_lower = intervals_df.loc[:, pd.IndexSlice[:, lower_border]].sort_index(axis=1) + df_pred_upper = intervals_df.loc[:, pd.IndexSlice[:, upper_border]].sort_index(axis=1) segments = df_true.columns.get_level_values("segment").unique() diff --git a/tests/test_metrics/test_intervals_metrics.py b/tests/test_metrics/test_intervals_metrics.py index 9595e0d53..3a372f7ca 100644 --- a/tests/test_metrics/test_intervals_metrics.py +++ b/tests/test_metrics/test_intervals_metrics.py @@ -1,3 +1,4 @@ +import pandas as pd import pytest from etna.datasets import TSDataset @@ -5,34 +6,120 @@ from etna.metrics import Width +def get_datasets_with_intervals(df, lower_name, upper_name): + tsdf = TSDataset.to_dataset(df) + ts_train = TSDataset(df=tsdf, freq="H") + + ts_test = TSDataset(df=tsdf.copy(), freq="H") + + intervals_df = df.rename({"target": lower_name}, axis=1) + intervals_df[upper_name] = intervals_df[lower_name] + intervals_df = TSDataset.to_dataset(df=intervals_df) + + intervals_df.loc[:, pd.IndexSlice["segment_1", lower_name]] = ( + intervals_df.loc[:, pd.IndexSlice["segment_1", lower_name]] + 1 + ) + intervals_df.loc[:, pd.IndexSlice["segment_1", upper_name]] = ( + intervals_df.loc[:, pd.IndexSlice["segment_1", upper_name]] + 2 + ) + + ts_test.add_prediction_intervals(prediction_intervals_df=intervals_df) + return ts_train, ts_test + + @pytest.fixture def tsdataset_with_zero_width_quantiles(example_df): - ts_train = TSDataset.to_dataset(example_df) - ts_train = TSDataset(ts_train, freq="H") - example_df["target_0.025"] = example_df["target"] - example_df["target_0.975"] = example_df["target"] - ts_test = TSDataset.to_dataset(example_df) - ts_test = TSDataset(ts_test, freq="H") + df = TSDataset.to_dataset(example_df) + ts_train = TSDataset(df, freq="H") + + ts_test = TSDataset(df.copy(), freq="H") + + intervals_df = pd.concat( + [ + df.rename({"target": "target_0.025"}, axis=1, level="feature"), + df.rename({"target": "target_0.975"}, axis=1, level="feature"), + ], + axis=1, + ) + ts_test.add_prediction_intervals(prediction_intervals_df=intervals_df) + return ts_train, ts_test @pytest.fixture -def tsdataset_with_different_width_and_shifted_quantiles(example_df): +def tsdataset_with_lower_upper_borders(example_df): + return get_datasets_with_intervals(df=example_df, lower_name="target_lower", upper_name="target_upper") + - ts_train = TSDataset.to_dataset(example_df) - ts_train = TSDataset(ts_train, freq="H") - example_df["target_0.025"] = example_df["target"] - example_df["target_0.975"] = example_df["target"] +@pytest.fixture +def tsdataset_with_quantiles_and_lower_upper_borders(example_df): + train_ts, test_ts = get_datasets_with_intervals(df=example_df, lower_name="target_lower", upper_name="target_upper") - segment_one_index = example_df[lambda x: x.segment == "segment_1"].index + intervals_df = test_ts.get_prediction_intervals() + test_ts.drop_prediction_intervals() - example_df.loc[segment_one_index, "target_0.025"] = example_df.loc[segment_one_index, "target_0.025"] + 1 - example_df.loc[segment_one_index, "target_0.975"] = example_df.loc[segment_one_index, "target_0.975"] + 2 + intervals_df = pd.concat( + [ + intervals_df, + intervals_df.rename( + {"target_lower": "target_0.025", "target_upper": "target_0.975"}, axis=1, level="feature" + ), + ], + axis=1, + ) - ts_test = TSDataset.to_dataset(example_df) - ts_test = TSDataset(ts_test, freq="H") - return ts_train, ts_test + test_ts.add_prediction_intervals(prediction_intervals_df=intervals_df) + return train_ts, test_ts + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +@pytest.mark.parametrize("upper_name,lower_name", ((None, "name"), ("name", None))) +def test_single_border_name_set_error(metric_class, upper_name, lower_name): + with pytest.raises(ValueError, match="Both `lower_name` and `upper_name` must be set"): + _ = metric_class(lower_name=lower_name, upper_name=upper_name) + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +@pytest.mark.parametrize("quantiles,upper_name,lower_name", (((0.025, 0.975), "target_upper", "target_lower"),)) +def test_interval_names_and_quantiles_set_error( + tsdataset_with_lower_upper_borders, metric_class, quantiles, upper_name, lower_name +): + with pytest.raises(ValueError, match="Both `quantiles` and border names are specified"): + _ = metric_class(quantiles=quantiles, lower_name=lower_name, upper_name=upper_name) + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +@pytest.mark.parametrize( + "upper_name,lower_name", (("name1", "name2"), ("target_upper", "name"), ("name", "target_lower")) +) +def test_interval_names_not_in_dataset_error(tsdataset_with_lower_upper_borders, metric_class, upper_name, lower_name): + train_ts, test_ts = tsdataset_with_lower_upper_borders + metric = metric_class(quantiles=None, lower_name=lower_name, upper_name=upper_name) + with pytest.raises(ValueError, match="Provided intervals borders names must be in dataset!"): + _ = metric(y_true=train_ts, y_pred=test_ts) + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +@pytest.mark.parametrize("quantiles", ((0.025,), tuple(), (0.1, 0.2, 0.3))) +def test_quantiles_invalid_size_error(tsdataset_with_lower_upper_borders, metric_class, quantiles): + with pytest.raises(ValueError, match="Expected tuple with two values"): + _ = metric_class(quantiles=quantiles, lower_name=None, upper_name=None) + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +@pytest.mark.parametrize("quantiles", ((0.025, 0.975), (0.1, 0.5))) +def test_quantiles_not_presented_error(tsdataset_with_lower_upper_borders, metric_class, quantiles): + train_ts, test_ts = tsdataset_with_lower_upper_borders + metric = metric_class(quantiles=quantiles, lower_name=None, upper_name=None) + with pytest.raises(ValueError, match="All quantiles must be presented in the dataset!"): + _ = metric(y_true=train_ts, y_pred=test_ts) + + +@pytest.mark.parametrize("metric_class", (Coverage, Width)) +def test_no_intervals_error(example_tsds, metric_class): + with pytest.raises(ValueError, match="All quantiles must be presented in the dataset!"): + _ = metric_class()(y_true=example_tsds, y_pred=example_tsds) def test_width_metric_with_zero_width_quantiles(tsdataset_with_zero_width_quantiles): @@ -45,23 +132,47 @@ def test_width_metric_with_zero_width_quantiles(tsdataset_with_zero_width_quanti assert width_metric[segment] == expected_metric -def test_width_metric_with_different_width_and_shifted_quantiles(tsdataset_with_different_width_and_shifted_quantiles): - ts_train, ts_test = tsdataset_with_different_width_and_shifted_quantiles +@pytest.mark.parametrize( + "quantiles,lower_name,upper_name", + ( + (None, "target_0.025", "target_0.975"), + (None, "target_lower", "target_upper"), + ((0.025, 0.975), None, None), + (None, None, None), + ), +) +def test_width_metric_with_different_width_and_shifted_quantiles( + tsdataset_with_quantiles_and_lower_upper_borders, quantiles, lower_name, upper_name +): + ts_train, ts_test = tsdataset_with_quantiles_and_lower_upper_borders expected_metric = {"segment_1": 1.0, "segment_2": 0.0} - width_metric = Width(mode="per-segment")(ts_train, ts_test) + width_metric = Width(mode="per-segment", quantiles=quantiles, lower_name=lower_name, upper_name=upper_name)( + ts_train, ts_test + ) for segment in width_metric: assert width_metric[segment] == expected_metric[segment] +@pytest.mark.parametrize( + "quantiles,lower_name,upper_name", + ( + (None, "target_0.025", "target_0.975"), + (None, "target_lower", "target_upper"), + ((0.025, 0.975), None, None), + (None, None, None), + ), +) def test_coverage_metric_with_different_width_and_shifted_quantiles( - tsdataset_with_different_width_and_shifted_quantiles, + tsdataset_with_quantiles_and_lower_upper_borders, quantiles, lower_name, upper_name ): - ts_train, ts_test = tsdataset_with_different_width_and_shifted_quantiles + ts_train, ts_test = tsdataset_with_quantiles_and_lower_upper_borders expected_metric = {"segment_1": 0.0, "segment_2": 1.0} - coverage_metric = Coverage(mode="per-segment")(ts_train, ts_test) + coverage_metric = Coverage(mode="per-segment", quantiles=quantiles, lower_name=lower_name, upper_name=upper_name)( + ts_train, ts_test + ) for segment in coverage_metric: assert coverage_metric[segment] == expected_metric[segment] @@ -70,7 +181,7 @@ def test_coverage_metric_with_different_width_and_shifted_quantiles( @pytest.mark.parametrize("metric", [Coverage(quantiles=(0.1, 0.3)), Width(quantiles=(0.1, 0.3))]) def test_using_not_presented_quantiles(metric, tsdataset_with_zero_width_quantiles): ts_train, ts_test = tsdataset_with_zero_width_quantiles - with pytest.raises(AssertionError, match="Quantile .* is not presented in tsdataset."): + with pytest.raises(ValueError, match="All quantiles must be presented in the dataset!"): _ = metric(ts_train, ts_test)