From 8b4faa99150e16c072990c4997f39b720a06d91e Mon Sep 17 00:00:00 2001
From: Thomas Bury <tombury182@gmail.com>
Date: Sat, 18 May 2024 21:13:46 -0400
Subject: [PATCH] add functions to compute entropy

---
 ewstools/core.py       | 116 +++++++++++++++++++++++++++++++++++++----
 tests/test_ewstools.py |  28 ++++++++--
 2 files changed, 130 insertions(+), 14 deletions(-)

diff --git a/ewstools/core.py b/ewstools/core.py
index 47831bb..739f2f6 100644
--- a/ewstools/core.py
+++ b/ewstools/core.py
@@ -54,6 +54,7 @@
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 
+import EntropyHub as EH
 
 # For deprecating old functions
 import deprecation
@@ -66,7 +67,8 @@
 
 class TimeSeries:
     """
-    Univariate time series data on which to compute early warning signals.
+    Class to hold univariate time series data on which to
+    compute early warning signals.
 
     Parameters
     ----------
@@ -116,7 +118,7 @@ def __init__(self, data, transition=None):
     def detrend(self, method="Gaussian", bandwidth=0.2, span=0.2):
         """
         Detrend the time series using a chosen method.
-        Add column to the dataframe for 'smoothing' and 'residuals'
+        Output is stored in the self.state dataframe.
 
         Parameters
         ----------
@@ -182,8 +184,7 @@ def compute_var(self, rolling_window=0.25):
         Compute variance over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
-
-        Put into 'ews' dataframe
+        Output is stored in the self.ews dataframe.
 
         Parameters
         ----------
@@ -224,8 +225,8 @@ def compute_std(self, rolling_window=0.25):
         Compute standard deviation over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
+        Output is stored in the self.ews dataframe.
 
-        Put into 'ews' dataframe
 
         Parameters
         ----------
@@ -268,8 +269,7 @@ def compute_cv(self, rolling_window=0.25):
         mean of the state variable.
         If residuals have not been computed, computation will be
         performed over state variable.
-
-        Put into 'ews' dataframe
+        Output is stored in the self.ews dataframe.
 
         Parameters
         ----------
@@ -313,7 +313,10 @@ def compute_cv(self, rolling_window=0.25):
 
     def compute_auto(self, rolling_window=0.25, lag=1):
         """
-        Compute autocorrelation over a rolling window. Add to dataframe.
+        Compute autocorrelation at a give lag over a rolling window.
+        If residuals have not been computed, computation will be
+        performed over state variable.
+        Output is stored in the self.ews dataframe.
 
         Parameters
         ----------
@@ -364,8 +367,8 @@ def compute_skew(self, rolling_window=0.25):
         Compute skew over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
+        Output is stored in the self.ews dataframe.
 
-        Add to dataframe.
 
         Parameters
         ----------
@@ -406,8 +409,8 @@ def compute_kurt(self, rolling_window=0.25):
         Compute kurtosis over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
+        Output is stored in the self.ews dataframe.
 
-        Add to dataframe.
 
         Parameters
         ----------
@@ -443,6 +446,99 @@ def compute_kurt(self, rolling_window=0.25):
 
         self.ews["kurtosis"] = kurt_values
 
+    def compute_entropy(self, rolling_window=0.25, method="sample", **kwargs):
+        """
+        Compute entropy using a given method over a rolling window.
+        Uses EntropyHub https://www.entropyhub.xyz/Home.html.
+        If residuals have not been computed, computation will be
+        performed over state variable.
+        Output is stored in the self.ews dataframe.
+
+        Parameters
+        ----------
+        rolling_window : float
+            Length of rolling window used to compute variance. Can be specified
+            as an absolute value or as a proportion of the length of the
+            data being analysed. Default is 0.25.
+        method : str
+            The method by which to compute entropy. Options include 'sample',
+            'approximate', and 'kolmogorov'
+        **kwargs
+            Keyword arguments for the EntropyHub function
+
+        Returns
+        -------
+        None.
+
+        """
+
+        # Get time series data prior to transition
+        if self.transition:
+            df_pre = self.state[self.state.index <= self.transition]
+        else:
+            df_pre = self.state
+
+        # Get absolute size of rollling window if given as a proportion
+        if 0 < rolling_window <= 1:
+            rw_absolute = int(rolling_window * len(df_pre))
+        else:
+            rw_absolute = rolling_window
+
+        # Get data on which to compute entropy
+        if "residuals" in df_pre.columns:
+            ts_vals = df_pre["residuals"]
+        else:
+            ts_vals = df_pre["state"]
+
+        # Compute sample entropy
+        if method == "sample":
+
+            def entropy_func(series):
+                Samp, A, B = EH.SampEn(series.values, **kwargs)
+                return Samp
+
+        # Compute approxiamte entropy
+        if method == "approximate":
+
+            def entropy_func(series):
+                Ap, Phi = EH.ApEn(series.values, **kwargs)
+                return Ap
+
+        # Compute komogorov entropy
+        if method == "kolmogorov":
+
+            def entropy_func(series):
+                K2, Ci = EH.K2En(series.values, **kwargs)
+                return K2
+
+        list_times = []
+        list_entropy = []
+        # Set up rolling window (cannot use pandas.rolling as multi-output fn)
+        for k in np.arange(0, len(ts_vals) - (rw_absolute - 1)):
+            # Select subset of series contained in window
+            window_series = ts_vals.iloc[k : k + rw_absolute]
+            # Assign the time value for the metrics (right end point of window)
+            t_point = ts_vals.index[k + (rw_absolute - 1)]
+            # Compute entropy (value for each channel)
+            entropy = entropy_func(window_series)
+
+            list_times.append(t_point)
+            list_entropy.append(entropy)
+
+        ar_entropy = np.array(list_entropy)
+
+        df_entropy = pd.DataFrame()
+        df_entropy["time"] = list_times
+        num_embedding_dims = ar_entropy.shape[1]
+        for dim in range(num_embedding_dims):
+            df_entropy["{}-entropy-{}".format(method, dim)] = ar_entropy[:, dim]
+        df_entropy.set_index("time", inplace=True)
+
+        # Add info to self.ews dataframe
+        for col in df_entropy.columns:
+            self.ews[col] = df_entropy[col]
+        # self.ews = pd.merge(self.ews, df_entropy, how="left", on="time")
+
     def compute_ktau(self, tmin="earliest", tmax="latest"):
         """
         Compute kendall tau values of CSD-based EWS.
diff --git a/tests/test_ewstools.py b/tests/test_ewstools.py
index e8d00b1..3cfeec9 100644
--- a/tests/test_ewstools.py
+++ b/tests/test_ewstools.py
@@ -3,7 +3,6 @@
 ---------------
 """
 
-
 import pytest
 import numpy as np
 import pandas as pd
@@ -145,10 +144,18 @@ def test_TimeSeries_ews():
     ts.compute_auto(lag=5, rolling_window=rolling_window)
     ts.compute_skew(rolling_window=rolling_window)
     ts.compute_kurt(rolling_window=rolling_window)
+    ts.compute_entropy(rolling_window=rolling_window, method="sample")
+    ts.compute_entropy(rolling_window=rolling_window, method="approximate")
+    ts.compute_entropy(rolling_window=rolling_window, method="kolmogorov")
+
     assert type(ts.ews) == pd.DataFrame
     assert "variance" in ts.ews.columns
     assert "ac5" in ts.ews.columns
     assert "cv" in ts.ews.columns
+    assert "sample-entropy-0" in ts.ews.columns
+    assert "sample-entropy-2" in ts.ews.columns
+    assert "approximate-entropy-1" in ts.ews.columns
+    assert "kolmogorov-entropy-1" in ts.ews.columns
 
     # Compute EWS on time series without transition
     rolling_window = 0.5
@@ -159,10 +166,17 @@ def test_TimeSeries_ews():
     ts2.compute_auto(lag=5, rolling_window=rolling_window)
     ts2.compute_skew(rolling_window=rolling_window)
     ts2.compute_kurt(rolling_window=rolling_window)
+    ts2.compute_entropy(rolling_window=rolling_window, method="sample")
+    ts2.compute_entropy(rolling_window=rolling_window, method="approximate")
+    ts2.compute_entropy(rolling_window=rolling_window, method="kolmogorov")
     assert type(ts2.ews) == pd.DataFrame
     assert "variance" in ts2.ews.columns
     assert "ac5" in ts2.ews.columns
     assert "cv" in ts2.ews.columns
+    assert "sample-entropy-0" in ts2.ews.columns
+    assert "sample-entropy-2" in ts2.ews.columns
+    assert "approximate-entropy-1" in ts2.ews.columns
+    assert "kolmogorov-entropy-1" in ts2.ews.columns
 
     # Detrend data using Gaussian and Lowess filter
     ts.detrend("Gaussian", bandwidth=0.2)
@@ -181,16 +195,24 @@ def test_TimeSeries_ews():
     ts.compute_auto(lag=5, rolling_window=rolling_window)
     ts.compute_skew(rolling_window=rolling_window)
     ts.compute_kurt(rolling_window=rolling_window)
+    ts.compute_entropy(rolling_window=rolling_window, method="sample")
+    ts.compute_entropy(rolling_window=rolling_window, method="approximate")
+    ts.compute_entropy(rolling_window=rolling_window, method="kolmogorov")
 
     assert type(ts.ews) == pd.DataFrame
     assert "variance" in ts.ews.columns
     assert "ac5" in ts.ews.columns
+    assert "sample-entropy-0" in ts.ews.columns
+    assert "sample-entropy-2" in ts.ews.columns
+    assert "approximate-entropy-1" in ts.ews.columns
+    assert "kolmogorov-entropy-1" in ts.ews.columns
 
     # Test kendall tau computation
     ts.compute_ktau()
     assert type(ts.ktau) == dict
     assert "variance" in ts.ktau.keys()
     assert "ac5" in ts.ktau.keys()
+    assert "sample-entropy-0" in ts.ktau.keys()
 
     # Make plotly fig
     fig = ts.make_plotly()
@@ -333,9 +355,7 @@ def test_shopf_init():
     [sigma, mu, w0] = helpers.shopf_init(smax, stot, wdom)
 
     # Values that smax, stot should attain (+/- 1dp)
-    smax_assert = (sigma**2 / (4 * np.pi * mu**2)) * (
-        1 + (mu**2 / (mu**2 + 4 * w0**2))
-    )
+    smax_assert = (sigma**2 / (4 * np.pi * mu**2)) * (1 + (mu**2 / (mu**2 + 4 * w0**2)))
     stot_assert = -(sigma**2) / (2 * mu)
     wdom_assert = w0