Source code for etna.analysis.outliers.median_outliers

import math
import typing

import numpy as np
import pandas as pd

if typing.TYPE_CHECKING:
    from etna.datasets import TSDataset


[docs]def get_anomalies_median( ts: "TSDataset", in_column: str = "target", window_size: int = 10, alpha: float = 3 ) -> typing.Dict[str, typing.List[pd.Timestamp]]: """ Get point outliers in time series using median model (estimation model-based method). Outliers are all points deviating from the median by more than alpha * std, where std is the sample variance in the window. Parameters ---------- ts: TSDataset with timeseries data in_column: name of the column in which the anomaly is searching window_size: number of points in the window alpha: coefficient for determining the threshold Returns ------- : dict of outliers in format {segment: [outliers_timestamps]} """ outliers_per_segment = {} segments = ts.segments for seg in segments: anomalies: typing.List[int] = [] segment_df = ts.df[seg].reset_index() values = segment_df[in_column].values timestamp = segment_df["timestamp"].values n_iter = math.ceil(len(values) / window_size) for i in range(n_iter): left_border = i * window_size right_border = min(left_border + window_size, len(values)) med = np.median(values[left_border:right_border]) std = np.std(values[left_border:right_border]) diff = np.abs(values[left_border:right_border] - med) anomalies.extend(np.where(diff > std * alpha)[0] + left_border) outliers_per_segment[seg] = [timestamp[i] for i in anomalies] return outliers_per_segment