From 8b4faa99150e16c072990c4997f39b720a06d91e Mon Sep 17 00:00:00 2001 From: Thomas Bury Date: Sat, 18 May 2024 21:13:46 -0400 Subject: [PATCH] add functions to compute entropy --- ewstools/core.py | 116 +++++++++++++++++++++++++++++++++++++---- tests/test_ewstools.py | 28 ++++++++-- 2 files changed, 130 insertions(+), 14 deletions(-) diff --git a/ewstools/core.py b/ewstools/core.py index 47831bb..739f2f6 100644 --- a/ewstools/core.py +++ b/ewstools/core.py @@ -54,6 +54,7 @@ import plotly.graph_objects as go from plotly.subplots import make_subplots +import EntropyHub as EH # For deprecating old functions import deprecation @@ -66,7 +67,8 @@ class TimeSeries: """ - Univariate time series data on which to compute early warning signals. + Class to hold univariate time series data on which to + compute early warning signals. Parameters ---------- @@ -116,7 +118,7 @@ def __init__(self, data, transition=None): def detrend(self, method="Gaussian", bandwidth=0.2, span=0.2): """ Detrend the time series using a chosen method. - Add column to the dataframe for 'smoothing' and 'residuals' + Output is stored in the self.state dataframe. Parameters ---------- @@ -182,8 +184,7 @@ def compute_var(self, rolling_window=0.25): Compute variance over a rolling window. If residuals have not been computed, computation will be performed over state variable. - - Put into 'ews' dataframe + Output is stored in the self.ews dataframe. Parameters ---------- @@ -224,8 +225,8 @@ def compute_std(self, rolling_window=0.25): Compute standard deviation over a rolling window. If residuals have not been computed, computation will be performed over state variable. + Output is stored in the self.ews dataframe. - Put into 'ews' dataframe Parameters ---------- @@ -268,8 +269,7 @@ def compute_cv(self, rolling_window=0.25): mean of the state variable. If residuals have not been computed, computation will be performed over state variable. - - Put into 'ews' dataframe + Output is stored in the self.ews dataframe. Parameters ---------- @@ -313,7 +313,10 @@ def compute_cv(self, rolling_window=0.25): def compute_auto(self, rolling_window=0.25, lag=1): """ - Compute autocorrelation over a rolling window. Add to dataframe. + Compute autocorrelation at a give lag over a rolling window. + If residuals have not been computed, computation will be + performed over state variable. + Output is stored in the self.ews dataframe. Parameters ---------- @@ -364,8 +367,8 @@ def compute_skew(self, rolling_window=0.25): Compute skew over a rolling window. If residuals have not been computed, computation will be performed over state variable. + Output is stored in the self.ews dataframe. - Add to dataframe. Parameters ---------- @@ -406,8 +409,8 @@ def compute_kurt(self, rolling_window=0.25): Compute kurtosis over a rolling window. If residuals have not been computed, computation will be performed over state variable. + Output is stored in the self.ews dataframe. - Add to dataframe. Parameters ---------- @@ -443,6 +446,99 @@ def compute_kurt(self, rolling_window=0.25): self.ews["kurtosis"] = kurt_values + def compute_entropy(self, rolling_window=0.25, method="sample", **kwargs): + """ + Compute entropy using a given method over a rolling window. + Uses EntropyHub https://www.entropyhub.xyz/Home.html. + If residuals have not been computed, computation will be + performed over state variable. + Output is stored in the self.ews dataframe. + + Parameters + ---------- + rolling_window : float + Length of rolling window used to compute variance. Can be specified + as an absolute value or as a proportion of the length of the + data being analysed. Default is 0.25. + method : str + The method by which to compute entropy. Options include 'sample', + 'approximate', and 'kolmogorov' + **kwargs + Keyword arguments for the EntropyHub function + + Returns + ------- + None. + + """ + + # Get time series data prior to transition + if self.transition: + df_pre = self.state[self.state.index <= self.transition] + else: + df_pre = self.state + + # Get absolute size of rollling window if given as a proportion + if 0 < rolling_window <= 1: + rw_absolute = int(rolling_window * len(df_pre)) + else: + rw_absolute = rolling_window + + # Get data on which to compute entropy + if "residuals" in df_pre.columns: + ts_vals = df_pre["residuals"] + else: + ts_vals = df_pre["state"] + + # Compute sample entropy + if method == "sample": + + def entropy_func(series): + Samp, A, B = EH.SampEn(series.values, **kwargs) + return Samp + + # Compute approxiamte entropy + if method == "approximate": + + def entropy_func(series): + Ap, Phi = EH.ApEn(series.values, **kwargs) + return Ap + + # Compute komogorov entropy + if method == "kolmogorov": + + def entropy_func(series): + K2, Ci = EH.K2En(series.values, **kwargs) + return K2 + + list_times = [] + list_entropy = [] + # Set up rolling window (cannot use pandas.rolling as multi-output fn) + for k in np.arange(0, len(ts_vals) - (rw_absolute - 1)): + # Select subset of series contained in window + window_series = ts_vals.iloc[k : k + rw_absolute] + # Assign the time value for the metrics (right end point of window) + t_point = ts_vals.index[k + (rw_absolute - 1)] + # Compute entropy (value for each channel) + entropy = entropy_func(window_series) + + list_times.append(t_point) + list_entropy.append(entropy) + + ar_entropy = np.array(list_entropy) + + df_entropy = pd.DataFrame() + df_entropy["time"] = list_times + num_embedding_dims = ar_entropy.shape[1] + for dim in range(num_embedding_dims): + df_entropy["{}-entropy-{}".format(method, dim)] = ar_entropy[:, dim] + df_entropy.set_index("time", inplace=True) + + # Add info to self.ews dataframe + for col in df_entropy.columns: + self.ews[col] = df_entropy[col] + # self.ews = pd.merge(self.ews, df_entropy, how="left", on="time") + def compute_ktau(self, tmin="earliest", tmax="latest"): """ Compute kendall tau values of CSD-based EWS. diff --git a/tests/test_ewstools.py b/tests/test_ewstools.py index e8d00b1..3cfeec9 100644 --- a/tests/test_ewstools.py +++ b/tests/test_ewstools.py @@ -3,7 +3,6 @@ --------------- """ - import pytest import numpy as np import pandas as pd @@ -145,10 +144,18 @@ def test_TimeSeries_ews(): ts.compute_auto(lag=5, rolling_window=rolling_window) ts.compute_skew(rolling_window=rolling_window) ts.compute_kurt(rolling_window=rolling_window) + ts.compute_entropy(rolling_window=rolling_window, method="sample") + ts.compute_entropy(rolling_window=rolling_window, method="approximate") + ts.compute_entropy(rolling_window=rolling_window, method="kolmogorov") + assert type(ts.ews) == pd.DataFrame assert "variance" in ts.ews.columns assert "ac5" in ts.ews.columns assert "cv" in ts.ews.columns + assert "sample-entropy-0" in ts.ews.columns + assert "sample-entropy-2" in ts.ews.columns + assert "approximate-entropy-1" in ts.ews.columns + assert "kolmogorov-entropy-1" in ts.ews.columns # Compute EWS on time series without transition rolling_window = 0.5 @@ -159,10 +166,17 @@ def test_TimeSeries_ews(): ts2.compute_auto(lag=5, rolling_window=rolling_window) ts2.compute_skew(rolling_window=rolling_window) ts2.compute_kurt(rolling_window=rolling_window) + ts2.compute_entropy(rolling_window=rolling_window, method="sample") + ts2.compute_entropy(rolling_window=rolling_window, method="approximate") + ts2.compute_entropy(rolling_window=rolling_window, method="kolmogorov") assert type(ts2.ews) == pd.DataFrame assert "variance" in ts2.ews.columns assert "ac5" in ts2.ews.columns assert "cv" in ts2.ews.columns + assert "sample-entropy-0" in ts2.ews.columns + assert "sample-entropy-2" in ts2.ews.columns + assert "approximate-entropy-1" in ts2.ews.columns + assert "kolmogorov-entropy-1" in ts2.ews.columns # Detrend data using Gaussian and Lowess filter ts.detrend("Gaussian", bandwidth=0.2) @@ -181,16 +195,24 @@ def test_TimeSeries_ews(): ts.compute_auto(lag=5, rolling_window=rolling_window) ts.compute_skew(rolling_window=rolling_window) ts.compute_kurt(rolling_window=rolling_window) + ts.compute_entropy(rolling_window=rolling_window, method="sample") + ts.compute_entropy(rolling_window=rolling_window, method="approximate") + ts.compute_entropy(rolling_window=rolling_window, method="kolmogorov") assert type(ts.ews) == pd.DataFrame assert "variance" in ts.ews.columns assert "ac5" in ts.ews.columns + assert "sample-entropy-0" in ts.ews.columns + assert "sample-entropy-2" in ts.ews.columns + assert "approximate-entropy-1" in ts.ews.columns + assert "kolmogorov-entropy-1" in ts.ews.columns # Test kendall tau computation ts.compute_ktau() assert type(ts.ktau) == dict assert "variance" in ts.ktau.keys() assert "ac5" in ts.ktau.keys() + assert "sample-entropy-0" in ts.ktau.keys() # Make plotly fig fig = ts.make_plotly() @@ -333,9 +355,7 @@ def test_shopf_init(): [sigma, mu, w0] = helpers.shopf_init(smax, stot, wdom) # Values that smax, stot should attain (+/- 1dp) - smax_assert = (sigma**2 / (4 * np.pi * mu**2)) * ( - 1 + (mu**2 / (mu**2 + 4 * w0**2)) - ) + smax_assert = (sigma**2 / (4 * np.pi * mu**2)) * (1 + (mu**2 / (mu**2 + 4 * w0**2))) stot_assert = -(sigma**2) / (2 * mu) wdom_assert = w0