Skip to content

Commit

Permalink
Create EventTransform (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
egoriyaa authored Sep 15, 2023
1 parent adfd18c commit c14b46d
Show file tree
Hide file tree
Showing 9 changed files with 439 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add error page into documentation ([#57](https://github.com/etna-team/etna/pull/57))
- Add `LimitTransform` ([#63](https://github.com/etna-team/etna/pull/63))
- Add config for Codecov to control CI ([#80](https://github.com/etna-team/etna/pull/80))
- Add `EventTransform` ([#78](https://github.com/etna-team/etna/pull/78))

### Changed
-
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Transforms to work with time-related features:
SpecialDaysTransform
HolidayTransform
FourierTransform
EventTransform

Shift transforms:

Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from etna.transforms.outliers import MedianOutliersTransform
from etna.transforms.outliers import PredictionIntervalOutliersTransform
from etna.transforms.timestamp import DateFlagsTransform
from etna.transforms.timestamp import EventTransform
from etna.transforms.timestamp import FourierTransform
from etna.transforms.timestamp import HolidayTransform
from etna.transforms.timestamp import SpecialDaysTransform
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/timestamp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from etna.transforms.timestamp.date_flags import DateFlagsTransform
from etna.transforms.timestamp.event import EventTransform
from etna.transforms.timestamp.fourier import FourierTransform
from etna.transforms.timestamp.holiday import HolidayTransform
from etna.transforms.timestamp.special_days import SpecialDaysTransform
Expand Down
196 changes: 196 additions & 0 deletions etna/transforms/timestamp/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from enum import Enum
from typing import Dict
from typing import List
from typing import Optional

import numpy as np
import pandas as pd

from etna.datasets import TSDataset
from etna.distributions import BaseDistribution
from etna.distributions import CategoricalDistribution
from etna.distributions import IntDistribution
from etna.transforms.base import IrreversibleTransform


class ImputerMode(str, Enum):
"""Enum for different imputation strategy."""

binary = "binary"
distance = "distance"

@classmethod
def _missing_(cls, value):
raise NotImplementedError(
f"{value} is not a valid {cls.__name__}. Supported modes: {', '.join([repr(m.value) for m in cls])}"
)


class EventTransform(IrreversibleTransform):
"""EventTransform marks days before and after event depending on ``mode``.
It creates two columns for future and past.
* In `'binary'` mode shows whether there will be or were events regarding current date.
* In `'distance'` mode shows distance to the previous and future events regarding current date. Computed as :math:`1 / x`, where x is a distance to the nearest event.
Examples
--------
>>> from copy import deepcopy
>>> import numpy as np
>>> import pandas as pd
>>> from etna.datasets import generate_const_df
>>> from etna.datasets import TSDataset
>>> from etna.transforms import EventTransform
>>>
>>> df = generate_const_df(start_time="2020-01-01", periods=5, freq="D", scale=1, n_segments=1)
>>> df_exog = generate_const_df(start_time="2020-01-01", periods=10, freq="D", scale=1, n_segments=1)
>>> df_exog.rename(columns={"target": "holiday"}, inplace=True)
>>> df_exog["holiday"] = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 0])
>>> df = TSDataset.to_dataset(df)
>>> df_exog = TSDataset.to_dataset(df_exog)
>>> ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all")
>>> transform = EventTransform(in_column='holiday', out_column='holiday', n_pre=1, n_post=1)
>>> transform.fit_transform(deepcopy(ts))
segment segment_0
feature holiday holiday_post holiday_pre target
timestamp
2020-01-01 0 0.0 0.0 1.0
2020-01-02 0 0.0 1.0 1.0
2020-01-03 1 0.0 0.0 1.0
2020-01-04 0 1.0 0.0 1.0
2020-01-05 0 0.0 0.0 1.0
>>> transform = EventTransform(in_column='holiday', out_column='holiday', n_pre=2, n_post=2, mode='distance')
>>> transform.fit_transform(deepcopy(ts))
segment segment_0
feature holiday holiday_post holiday_pre target
timestamp
2020-01-01 0 0.0 0.5 1.0
2020-01-02 0 0.0 1.0 1.0
2020-01-03 1 0.0 0.0 1.0
2020-01-04 0 1.0 0.0 1.0
2020-01-05 0 0.5 0.0 1.0
"""

def __init__(self, in_column: str, out_column: str, n_pre: int, n_post: int, mode: str = ImputerMode.binary):
"""
Init EventTransform.
Parameters
----------
in_column:
binary column with event indicator.
out_column:
base for creating out columns names for future and past - '{out_column}_pre' and '{out_column}_post'
n_pre:
number of days before the event to react.
n_post:
number of days after the event to react.
mode:
mode of marking events:
- `'binary'`: whether there will be or were events regarding current date in binary type;
- `'distance'`: distance to the previous and future events regarding current date;
Raises
------
ValueError:
Some ``in_column`` features are not binary.
ValueError:
``n_pre`` or ``n_post`` values are less than one.
NotImplementedError:
Given ``mode`` value is not supported.
"""
if n_pre < 1 or n_post < 1:
raise ValueError(f"`n_pre` and `n_post` must be greater than zero, given {n_pre} and {n_post}")
super().__init__(required_features=[in_column])
self.in_column = in_column
self.out_column = out_column
self.n_pre = n_pre
self.n_post = n_post
self.mode = ImputerMode(mode)
self.in_column_regressor: Optional[bool] = None

def fit(self, ts: TSDataset) -> "EventTransform":
"""Fit the transform."""
self.in_column_regressor = self.in_column in ts.regressors
super().fit(ts)
return self

def _fit(self, df: pd.DataFrame):
"""Fit method does nothing and is kept for compatibility.
Parameters
----------
df:
dataframe with data.
"""
pass

def _compute_event_column(self, df: pd.DataFrame, column: str, max_distance: int) -> pd.DataFrame:
"""Compute event column."""
indexes = df.copy()
indexes[:] = np.repeat((np.arange(len(indexes)) + 1).reshape(-1, 1), len(indexes.columns), axis=1)

col = indexes.copy()
col.mask(df != 1, None, inplace=True)
col = (col.bfill() if column == "pre" else col.ffill()).fillna(indexes)
col = (col - indexes).abs()
distance = 1 if self.mode == "binary" else 1 / col
col.mask(col > max_distance, 0, inplace=True)
col = col.mask((col >= 1) & (col <= max_distance), distance).astype(float)

col.rename(columns={self.in_column: f"{self.out_column}_{column}"}, inplace=True, level="feature")
return col

def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add marked days before and after event to dataset.
Parameters
----------
df:
dataframe with data to transform.
Returns
-------
:
transformed dataframe
"""
if not set(df.values.reshape(-1)).issubset({0, 1}):
raise ValueError("Input columns must be binary")

pre = self._compute_event_column(df, column="pre", max_distance=self.n_pre)
post = self._compute_event_column(df, column="post", max_distance=self.n_post)

df = pd.concat([df, pre, post], axis=1)

return df

def get_regressors_info(self) -> List[str]:
"""Return the list with regressors created by the transform."""
if self.in_column_regressor is None:
raise ValueError("Fit the transform to get the correct regressors info!")
return [self.out_column + "_pre", self.out_column + "_post"] if self.in_column_regressor else []

def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
This grid tunes parameters: ``n_pre``, ``n_post``.
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
return {
"n_pre": IntDistribution(low=1, high=self.n_pre),
"n_post": IntDistribution(low=1, high=self.n_post),
"mode": CategoricalDistribution(["binary", "distance"]),
}


__all__ = ["EventTransform"]
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,3 +803,18 @@ def total_level_constant_forecast_with_target_components(hierarchical_structure)
ts = TSDataset(df=df, freq="D", hierarchical_structure=hierarchical_structure)
ts.add_target_components(target_components_df=target_components_df)
return ts


@pytest.fixture
def ts_with_binary_exog() -> TSDataset:
periods = 100
periods_exog = periods + 10
df = generate_const_df(start_time="2020-01-01", periods=periods, freq="D", scale=1, n_segments=3)
df_exog = generate_const_df(start_time="2020-01-01", periods=periods_exog, freq="D", scale=1, n_segments=3)
df_exog.rename(columns={"target": "holiday"}, inplace=True)
df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 3)

df = TSDataset.to_dataset(df)
df_exog = TSDataset.to_dataset(df_exog)
ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all")
return ts
35 changes: 35 additions & 0 deletions tests/test_transforms/test_inference/test_inverse_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from etna.transforms import DensityOutliersTransform
from etna.transforms import DeseasonalityTransform
from etna.transforms import DifferencingTransform
from etna.transforms import EventTransform
from etna.transforms import FilterFeaturesTransform
from etna.transforms import FourierTransform
from etna.transforms import GaleShapleyFeatureSelectionTransform
Expand Down Expand Up @@ -225,6 +226,11 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
(HolidayTransform(mode="category"), "regular_ts"),
(SpecialDaysTransform(), "regular_ts"),
(TimeFlagsTransform(), "regular_ts"),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog"),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
),
],
)
def test_inverse_transform_train_subset_segments(self, transform, dataset_name, request):
Expand Down Expand Up @@ -436,6 +442,11 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
(HolidayTransform(mode="category"), "regular_ts"),
(SpecialDaysTransform(), "regular_ts"),
(TimeFlagsTransform(), "regular_ts"),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog"),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
),
],
)
def test_inverse_transform_future_subset_segments(self, transform, dataset_name, request):
Expand Down Expand Up @@ -669,6 +680,12 @@ def _test_inverse_transform_train_new_segments(self, ts, transform, train_segmen
"regular_ts",
{},
),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_train_new_segments(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1005,6 +1022,12 @@ def _test_inverse_transform_future_new_segments(self, ts, transform, train_segme
"regular_ts",
{},
),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_new_segments(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1493,6 +1516,12 @@ def _test_inverse_transform_future_with_target(
{},
),
(SpecialDaysTransform(), "regular_ts", {}),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_with_target(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1920,6 +1949,12 @@ def _test_inverse_transform_future_without_target(
{},
),
(SpecialDaysTransform(), "regular_ts", {}),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_without_target(self, transform, dataset_name, expected_changes, request):
Expand Down
Loading

0 comments on commit c14b46d

Please sign in to comment.