Skip to content

Commit

Permalink
Improve sample_acf and sample_pacf plots (#1004)
Browse files Browse the repository at this point in the history
* improve acf and pacf plots

* update EDA notebook

* fix comments

* fix test

* some fix

* fix moment with NaN

* fix NaN moment and changelog

* Modify warning ignore instruction, add autoclose fixture for plots

* Fix pacf warning

* fix in docs

Co-authored-by: ext.ytarasyuk <[email protected]>
Co-authored-by: d.a.bunin <[email protected]>
  • Loading branch information
3 people authored Nov 25, 2022
1 parent 554d4ea commit cfbfb01
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 435 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
-
- Add python 3.10 support ([#1005](https://github.com/tinkoff-ai/etna/pull/1005))
-
-
- Add `plot_change_points_interactive` ([#988](https://github.com/tinkoff-ai/etna/pull/988))
- Add `experimental` module with `TimeSeriesBinaryClassifier` and `PredictabilityAnalyzer` ([#985](https://github.com/tinkoff-ai/etna/pull/985))
- Inference track results: add `predict` method to pipelines, teach some models to work with context, change hierarchy of base models, update notebook examples ([#979](https://github.com/tinkoff-ai/etna/pull/979))
Expand All @@ -21,7 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
-
- Change returned model in get_model of BATSModel, TBATSModel ([#987](https://github.com/tinkoff-ai/etna/pull/987))
-
- Add acf_plot, deprecated sample_acf_plot, sample_pacf_plot ([#1004](https://github.com/tinkoff-ai/etna/pull/1004))
-
- Change returned model in `get_model` of `HoltWintersModel`, `HoltModel`, `SimpleExpSmoothingModel` ([#986](https://github.com/tinkoff-ai/etna/pull/986))
-
Expand Down
1 change: 1 addition & 0 deletions etna/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from etna.analysis.eda_utils import SeasonalPlotAggregation
from etna.analysis.eda_utils import SeasonalPlotAlignment
from etna.analysis.eda_utils import SeasonalPlotCycle
from etna.analysis.eda_utils import acf_plot
from etna.analysis.eda_utils import cross_corr_plot
from etna.analysis.eda_utils import distribution_plot
from etna.analysis.eda_utils import prediction_actual_scatter_plot
Expand Down
114 changes: 82 additions & 32 deletions etna/analysis/eda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,21 +166,28 @@ def cross_corr_plot(
ax[i].xaxis.set_major_locator(MaxNLocator(integer=True))


def sample_acf_plot(
def acf_plot(
ts: "TSDataset",
n_segments: int = 10,
lags: int = 21,
partial: bool = False,
columns_num: int = 2,
segments: Optional[List[str]] = None,
figsize: Tuple[int, int] = (10, 5),
):
"""
Autocorrelation plot for multiple timeseries.
Autocorrelation and partial autocorrelation plot for multiple timeseries.
Notes
-----
`Definition of autocorrelation <https://en.wikipedia.org/wiki/Autocorrelation>`_.
`Definition of partial autocorrelation <https://en.wikipedia.org/wiki/Partial_autocorrelation_function>`_.
* If ``partial=False`` function works with NaNs at any place of the time-series.
* if ``partial=True`` function works only with NaNs at the edges of the time-series and fails if there are NaNs inside it.
Parameters
----------
ts:
Expand All @@ -189,30 +196,85 @@ def sample_acf_plot(
number of random segments to plot
lags:
number of timeseries shifts for cross-correlation
partial:
plot autocorrelation or partial autocorrelation
columns_num:
number of columns in subplots
segments:
segments to plot
figsize:
size of the figure per subplot with one segment in inches
Raises
------
ValueError:
If partial=True and there is a NaN in the middle of the time series
"""
if segments is None:
segments = sorted(ts.segments)
exist_segments = sorted(ts.segments)
chosen_segments = np.random.choice(exist_segments, size=min(len(exist_segments), n_segments), replace=False)
segments = list(chosen_segments)

k = min(n_segments, len(segments))
columns_num = min(2, k)
rows_num = math.ceil(k / columns_num)
title = "Partial Autocorrelation" if partial else "Autocorrelation"

fig, ax = prepare_axes(num_plots=len(segments), columns_num=columns_num, figsize=figsize)
fig.suptitle(title, fontsize=16)

df = ts.to_pandas()

for i, name in enumerate(segments):
df_slice = df[name].reset_index()["target"]
if partial:
# for partial autocorrelation remove NaN from the beginning and end of the series
begin = df_slice.first_valid_index()
end = df_slice.last_valid_index()
x = df_slice.values[begin:end]
if np.isnan(x).any():
raise ValueError("There is a NaN in the middle of the time series!")
plot_pacf(x=x, ax=ax[i], lags=lags)

if not partial:
plot_acf(x=df_slice.values, ax=ax[i], lags=lags, missing="conservative")

figsize = (figsize[0] * columns_num, figsize[1] * rows_num)
fig, ax = plt.subplots(rows_num, columns_num, figsize=figsize, constrained_layout=True, squeeze=False)
ax = ax.ravel()
fig.suptitle("Autocorrelation", fontsize=16)
for i, name in enumerate(sorted(np.random.choice(segments, size=k, replace=False))):
df_slice = ts[:, name, :][name]
plot_acf(x=df_slice["target"].values, ax=ax[i], lags=lags)
ax[i].set_title(name)
ax[i].grid()

plt.show()


def sample_acf_plot(
ts: "TSDataset",
n_segments: int = 10,
lags: int = 21,
segments: Optional[List[str]] = None,
figsize: Tuple[int, int] = (10, 5),
):
"""
Autocorrelation plot for multiple timeseries.
Notes
-----
`Definition of autocorrelation <https://en.wikipedia.org/wiki/Autocorrelation>`_.
Parameters
----------
ts:
TSDataset with timeseries data
n_segments:
number of random segments to plot
lags:
number of timeseries shifts for cross-correlation
segments:
segments to plot
figsize:
size of the figure per subplot with one segment in inches
"""
acf_plot(ts=ts, n_segments=n_segments, lags=lags, segments=segments, figsize=figsize, partial=False)
warnings.warn(
"DeprecationWarning: This function is deprecated and will be removed in etna=2.0; Please use acf_plot instead.",
DeprecationWarning,
)


def sample_pacf_plot(
ts: "TSDataset",
n_segments: int = 10,
Expand Down Expand Up @@ -240,23 +302,11 @@ def sample_pacf_plot(
figsize:
size of the figure per subplot with one segment in inches
"""
if segments is None:
segments = sorted(ts.segments)

k = min(n_segments, len(segments))
columns_num = min(2, k)
rows_num = math.ceil(k / columns_num)

figsize = (figsize[0] * columns_num, figsize[1] * rows_num)
fig, ax = plt.subplots(rows_num, columns_num, figsize=figsize, constrained_layout=True, squeeze=False)
ax = ax.ravel()
fig.suptitle("Partial Autocorrelation", fontsize=16)
for i, name in enumerate(sorted(np.random.choice(segments, size=k, replace=False))):
df_slice = ts[:, name, :][name]
plot_pacf(x=df_slice["target"].values, ax=ax[i], lags=lags)
ax[i].set_title(name)
ax[i].grid()
plt.show()
acf_plot(ts=ts, n_segments=n_segments, lags=lags, segments=segments, figsize=figsize, partial=True)
warnings.warn(
"DeprecationWarning: This function is deprecated and will be removed in etna=2.0; Please use acf_plot instead.",
DeprecationWarning,
)


def distribution_plot(
Expand Down
Loading

1 comment on commit cfbfb01

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.