From b39f1283352bfafbe4b590388ea74688d2d4cd4a Mon Sep 17 00:00:00 2001
From: Thomas Bury <tombury182@gmail.com>
Date: Tue, 23 May 2023 17:48:27 -0400
Subject: [PATCH] fix issue caused by groupby over non-numeric values, spotted
 by jjvaldes

---
 ewstools/core.py | 843 +++++++++++++++++++++++------------------------
 1 file changed, 420 insertions(+), 423 deletions(-)

diff --git a/ewstools/core.py b/ewstools/core.py
index 8875e16..6c5a91f 100644
--- a/ewstools/core.py
+++ b/ewstools/core.py
@@ -59,11 +59,11 @@
 import deprecation
 
 
-
 # ---------------
 # Classes
 # --------------
 
+
 class TimeSeries:
     """
     Univariate time series data on which to compute early warning signals.
@@ -83,26 +83,28 @@ def __init__(self, data, transition=None):
 
         # Put data into a pandas DataFrame
         if type(data) in [list, np.ndarray]:
-            df_state = pd.DataFrame({'time': np.arange(len(data)), 'state': data})
-            df_state.set_index('time', inplace=True)
-        
+            df_state = pd.DataFrame({"time": np.arange(len(data)), "state": data})
+            df_state.set_index("time", inplace=True)
+
         # If given as pandas series, carry index forward
         elif type(data) == pd.Series:
-            df_state = pd.DataFrame({'state': data.values})
+            df_state = pd.DataFrame({"state": data.values})
             df_state.index = data.index
             # Rename index if no name given
             if not df_state.index.name:
-                df_state.index.name = 'time'
-                
+                df_state.index.name = "time"
+
         # If data is not provided as either of these, flag error
         else:
-            print('\nERROR: data has been provided as type {}'.format(type(data)))
-            print('Make sure to provide data as either a list, np.ndarray or pd.Series\n')
-        
+            print("\nERROR: data has been provided as type {}".format(type(data)))
+            print(
+                "Make sure to provide data as either a list, np.ndarray or pd.Series\n"
+            )
+
         # Set state and transition attributes
         self.state = df_state
         self.transition = float(transition) if transition else transition
-        
+
         # Initialise other attributes
         self.ews = pd.DataFrame(index=df_state.index)
         self.dl_preds = pd.DataFrame()
@@ -110,17 +112,16 @@ def __init__(self, data, transition=None):
         self.pspec = None
         self.pspec_fits = None
         self.ews_spec = pd.DataFrame()
-        
 
-    def detrend(self, method='Gaussian', bandwidth=0.2, span=0.2):
-        '''
+    def detrend(self, method="Gaussian", bandwidth=0.2, span=0.2):
+        """
         Detrend the time series using a chosen method.
         Add column to the dataframe for 'smoothing' and 'residuals'
 
         Parameters
         ----------
         method : str, optional
-            Method of detrending to use. 
+            Method of detrending to use.
             Select from ['Gaussian', 'Lowess']
             The default is 'Gaussian'.
         bandwidth : float, optional
@@ -130,15 +131,15 @@ def detrend(self, method='Gaussian', bandwidth=0.2, span=0.2):
             such that the kernel has its quartiles at +/- 0.25*bandwidth.
             The default is 0.2.
         span : float, optional
-            Span of time-series data used for Lowess filtering. Provide as a 
-            proportion of data length or as a number of data points. 
+            Span of time-series data used for Lowess filtering. Provide as a
+            proportion of data length or as a number of data points.
             The default is 0.2.
 
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
@@ -146,8 +147,7 @@ def detrend(self, method='Gaussian', bandwidth=0.2, span=0.2):
         else:
             df_pre = self.state
 
-
-        if method == 'Gaussian':
+        if method == "Gaussian":
             # Get size of bandwidth in terms of num. datapoints if given as a proportion
             if 0 < bandwidth <= 1:
                 bw_num = bandwidth * len(df_pre)
@@ -158,35 +158,31 @@ def detrend(self, method='Gaussian', bandwidth=0.2, span=0.2):
             # Standard deviation of kernel given bandwidth
             # Note that for a Gaussian, quartiles are at +/- 0.675*sigma
             sigma = (0.25 / 0.675) * bw_num
-            smooth_values = gf(df_pre['state'].values, sigma=sigma, mode='reflect')
+            smooth_values = gf(df_pre["state"].values, sigma=sigma, mode="reflect")
             smooth_series = pd.Series(smooth_values, index=df_pre.index)
 
-
-        if method == 'Lowess':
+        if method == "Lowess":
             # Convert span to a proportion of the length of the data
             if not 0 < span <= 1:
                 span_prop = span / len(df_pre)
             else:
                 span_prop = span
 
-            smooth_values = lowess(df_pre['state'].values, 
-                                   df_pre.index.values, 
-                                   frac=span_prop)[:,1]
+            smooth_values = lowess(
+                df_pre["state"].values, df_pre.index.values, frac=span_prop
+            )[:, 1]
             smooth_series = pd.Series(smooth_values, index=df_pre.index)
 
-    
         # Add smoothed data and residuals to the 'state' DataFrame
         self.state["smoothing"] = smooth_series
         self.state["residuals"] = self.state["state"] - self.state["smoothing"]
 
-
-
     def compute_var(self, rolling_window=0.25):
-        '''
+        """
         Compute variance over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
-        
+
         Put into 'ews' dataframe
 
         Parameters
@@ -195,19 +191,19 @@ def compute_var(self, rolling_window=0.25):
             Length of rolling window used to compute variance. Can be specified
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
-            
+
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -215,22 +211,20 @@ def compute_var(self, rolling_window=0.25):
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            var_values = df_pre['residuals'].rolling(window=rw_absolute).var()
+        if "residuals" in df_pre.columns:
+            var_values = df_pre["residuals"].rolling(window=rw_absolute).var()
         # Else, compute over state variable
         else:
-            var_values = df_pre['state'].rolling(window=rw_absolute).var()
-        
-        self.ews['variance'] = var_values
-        
-        
-        
+            var_values = df_pre["state"].rolling(window=rw_absolute).var()
+
+        self.ews["variance"] = var_values
+
     def compute_std(self, rolling_window=0.25):
-        '''
+        """
         Compute standard deviation over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
-        
+
         Put into 'ews' dataframe
 
         Parameters
@@ -239,19 +233,19 @@ def compute_std(self, rolling_window=0.25):
             Length of rolling window used to compute variance. Can be specified
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
-            
+
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -259,23 +253,22 @@ def compute_std(self, rolling_window=0.25):
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            std_values = df_pre['residuals'].rolling(window=rw_absolute).std()
+        if "residuals" in df_pre.columns:
+            std_values = df_pre["residuals"].rolling(window=rw_absolute).std()
         # Else, compute over state variable
         else:
-            std_values = df_pre['state'].rolling(window=rw_absolute).std()
-        
-        self.ews['std'] = std_values        
+            std_values = df_pre["state"].rolling(window=rw_absolute).std()
 
+        self.ews["std"] = std_values
 
     def compute_cv(self, rolling_window=0.25):
-        '''
+        """
         Compute coefficient of variation over a rolling window.
         This is the standard deviation of the residuals divided by the
         mean of the state variable.
         If residuals have not been computed, computation will be
         performed over state variable.
-        
+
         Put into 'ews' dataframe
 
         Parameters
@@ -284,19 +277,19 @@ def compute_cv(self, rolling_window=0.25):
             Length of rolling window used to compute variance. Can be specified
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
-            
+
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -305,26 +298,21 @@ def compute_cv(self, rolling_window=0.25):
 
         # Get standard deviation values
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            std_values = df_pre['residuals'].rolling(window=rw_absolute).std()
+        if "residuals" in df_pre.columns:
+            std_values = df_pre["residuals"].rolling(window=rw_absolute).std()
         # Else, compute over state variable
         else:
-            std_values = df_pre['state'].rolling(window=rw_absolute).std()
-        
-        # Get mean values from state variable
-        mean_values = df_pre['state'].rolling(window=rw_absolute).mean()
-        
-        cv_values = std_values/mean_values
-        
-        self.ews['cv'] = cv_values        
-
-
+            std_values = df_pre["state"].rolling(window=rw_absolute).std()
 
+        # Get mean values from state variable
+        mean_values = df_pre["state"].rolling(window=rw_absolute).mean()
 
+        cv_values = std_values / mean_values
 
+        self.ews["cv"] = cv_values
 
     def compute_auto(self, rolling_window=0.25, lag=1):
-        '''
+        """
         Compute autocorrelation over a rolling window. Add to dataframe.
 
         Parameters
@@ -335,19 +323,19 @@ def compute_auto(self, rolling_window=0.25, lag=1):
             data being analysed. Default is 0.25.
         lag : int
             Lag of autocorrelation
-            
+
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -355,25 +343,28 @@ def compute_auto(self, rolling_window=0.25, lag=1):
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            ac_values = df_pre['residuals'].rolling(window=rw_absolute).apply(
-                func=lambda x: pd.Series(x).autocorr(lag=lag), raw=True
+        if "residuals" in df_pre.columns:
+            ac_values = (
+                df_pre["residuals"]
+                .rolling(window=rw_absolute)
+                .apply(func=lambda x: pd.Series(x).autocorr(lag=lag), raw=True)
             )
         # Else, compute over state variable
         else:
-            ac_values = df_pre['state'].rolling(window=rw_absolute).apply(
-                func=lambda x: pd.Series(x).autocorr(lag=lag), raw=True
-            )                    
-            
-        self.ews['ac{}'.format(lag)] = ac_values
+            ac_values = (
+                df_pre["state"]
+                .rolling(window=rw_absolute)
+                .apply(func=lambda x: pd.Series(x).autocorr(lag=lag), raw=True)
+            )
 
+        self.ews["ac{}".format(lag)] = ac_values
 
     def compute_skew(self, rolling_window=0.25):
-        '''
+        """
         Compute skew over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
-        
+
         Add to dataframe.
 
         Parameters
@@ -382,19 +373,19 @@ def compute_skew(self, rolling_window=0.25):
             Length of rolling window used to compute variance. Can be specified
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
-            
+
         Returns
         -------
         None.
 
-        '''
-        
+        """
+
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -402,22 +393,20 @@ def compute_skew(self, rolling_window=0.25):
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            skew_values = df_pre['residuals'].rolling(window=rw_absolute).skew()
+        if "residuals" in df_pre.columns:
+            skew_values = df_pre["residuals"].rolling(window=rw_absolute).skew()
         # Else, compute over state variable
         else:
-            skew_values = df_pre['state'].rolling(window=rw_absolute).skew()
-        
-        self.ews['skew'] = skew_values
-
+            skew_values = df_pre["state"].rolling(window=rw_absolute).skew()
 
+        self.ews["skew"] = skew_values
 
     def compute_kurt(self, rolling_window=0.25):
-        '''
+        """
         Compute kurtosis over a rolling window.
         If residuals have not been computed, computation will be
         performed over state variable.
-        
+
         Add to dataframe.
 
         Parameters
@@ -426,19 +415,19 @@ def compute_kurt(self, rolling_window=0.25):
             Length of rolling window used to compute variance. Can be specified
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
-            
+
         Returns
         -------
         None.
 
-        '''
+        """
 
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
         else:
             df_pre = self.state
-    
+
         # Get absolute size of rollling window if given as a proportion
         if 0 < rolling_window <= 1:
             rw_absolute = int(rolling_window * len(df_pre))
@@ -446,21 +435,18 @@ def compute_kurt(self, rolling_window=0.25):
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            kurt_values = df_pre['residuals'].rolling(window=rw_absolute).kurt()
+        if "residuals" in df_pre.columns:
+            kurt_values = df_pre["residuals"].rolling(window=rw_absolute).kurt()
         # Else, compute over state variable
         else:
-            kurt_values = df_pre['state'].rolling(window=rw_absolute).kurt()
-        
-        self.ews['kurtosis'] = kurt_values
-
+            kurt_values = df_pre["state"].rolling(window=rw_absolute).kurt()
 
+        self.ews["kurtosis"] = kurt_values
 
-
-    def compute_ktau(self, tmin='earliest', tmax='latest'):
-        '''
+    def compute_ktau(self, tmin="earliest", tmax="latest"):
+        """
         Compute kendall tau values of CSD-based EWS.
-        Output is placed in the attribute *ktau*, which is a Python 
+        Output is placed in the attribute *ktau*, which is a Python
         dictionary contianing Kendall tau values for each CSD-based EWS.
 
         Parameters
@@ -473,35 +459,31 @@ def compute_ktau(self, tmin='earliest', tmax='latest'):
             taken as latest time point in EWS time series, which could be
             the end of the state time series, or a defined transtion point.
 
-        '''
-        
-        
+        """
+
         # Get tmin and tmax values if using extrema
-        if tmin == 'earliest':
+        if tmin == "earliest":
             tmin = self.ews.dropna().index[0]
-        
-        if tmax == 'latest':
+
+        if tmax == "latest":
             tmax = self.ews.dropna().index[-1]
-        
+
         # Get cropped data
-        df_ews = self.ews[(self.ews.index >= tmin) &\
-                          (self.ews.index <= tmax)].copy()
-        
+        df_ews = self.ews[(self.ews.index >= tmin) & (self.ews.index <= tmax)].copy()
+
         # Include smax in Kendall tau computation if it exists
-        if 'smax' in self.ews_spec.columns:
-            df_ews = df_ews.join(self.ews_spec['smax'])
-            
+        if "smax" in self.ews_spec.columns:
+            df_ews = df_ews.join(self.ews_spec["smax"])
+
         # Make a series with the time values
         time_values = pd.Series(data=df_ews.index, index=df_ews.index)
         ktau_out = df_ews.corrwith(time_values, method="kendall", axis=0)
         self.ktau = dict(ktau_out)
-        
 
-    def apply_classifier(self, classifier, tmin, tmax, 
-                        name='c1', verbose=1):
-        '''
+    def apply_classifier(self, classifier, tmin, tmax, name="c1", verbose=1):
+        """
         Apply a deep learning classifier to the residual time series from
-        tmin to tmax. 
+        tmin to tmax.
         If time series has not been detrended, apply to the raw data.
         Predictions from the classifier are saved into the attribute dl_preds.
 
@@ -518,55 +500,57 @@ def apply_classifier(self, classifier, tmin, tmax,
         verbose : int, optional
             Verbosity of update messages from TensorFlow. 0 = silent, 1 = progress bar, 2 = single line.
             The default is 1.
-        
+
         Returns
         -------
         None.
 
-        '''
-        
+        """
+
         # Length of time series required as input to classifier
         input_len = classifier.layers[0].input_shape[1]
-        
+
         # Get time series segment. Use residuals if detrending performed.
         # Otherwise use state variable.
-        if 'residuals' in self.state.columns:
-            series = self.state[(self.state.index >= tmin) &\
-                                (self.state.index < tmax)]['residuals']      
+        if "residuals" in self.state.columns:
+            series = self.state[(self.state.index >= tmin) & (self.state.index < tmax)][
+                "residuals"
+            ]
         else:
-            series = self.state[(self.state.index >= tmin) &\
-                                (self.state.index < tmax)]['state']
-        
+            series = self.state[(self.state.index >= tmin) & (self.state.index < tmax)][
+                "state"
+            ]
+
         # Get values in series
         data = series.values
-        
+
         # If time series segment is larger than input dimension of classifier
         if len(data) > input_len:
-            print('ERROR: Length of time series segment is too long for the classifier. You can modify the tmin and tmax parameters to select a smaller segment.')
+            print(
+                "ERROR: Length of time series segment is too long for the classifier. You can modify the tmin and tmax parameters to select a smaller segment."
+            )
             return
-        
+
         # Normalise by mean of absolute value
-        data_norm = data/(abs(data).mean())
+        data_norm = data / (abs(data).mean())
 
         # Prepend with zeros to make appropriate length for classifier
         num_zeros = input_len - len(data_norm)
-        input_data = np.concatenate((np.zeros(num_zeros),data_norm)).reshape(1,-1, 1)
-        
+        input_data = np.concatenate((np.zeros(num_zeros), data_norm)).reshape(1, -1, 1)
+
         # Get DL prediction
         dl_pred = classifier.predict(input_data, verbose=verbose)[0]
         # Put info into dataframe
-        dict_dl_pred = {i:val for (i,val) in zip(np.arange(len(dl_pred)), dl_pred)}
-        dict_dl_pred['time'] = series.index[-1]
-        dict_dl_pred['classifier'] = name      
+        dict_dl_pred = {i: val for (i, val) in zip(np.arange(len(dl_pred)), dl_pred)}
+        dict_dl_pred["time"] = series.index[-1]
+        dict_dl_pred["classifier"] = name
         df_dl_pred = pd.DataFrame(dict_dl_pred, index=[len(self.dl_preds)])
 
         # Append to dataframe contiaining DL predictions
         self.dl_preds = pd.concat([self.dl_preds, df_dl_pred], ignore_index=True)
 
-
-
-    def apply_classifier_inc(self, classifier, inc=10, name='c1', verbose=1):
-        '''
+    def apply_classifier_inc(self, classifier, inc=10, name="c1", verbose=1):
+        """
         Apply a deep learning classifier to incrementally increasing time
         series lengths. First prediction is made on time series segment from
         data point at index 0 to data point at index inc. Second prediction
@@ -583,7 +567,7 @@ def apply_classifier_inc(self, classifier, inc=10, name='c1', verbose=1):
         name : str, optional
             Name assigned to the classifier. The default is 'c1'.
         verbose : int, optional
-            Verbosity of update messages from TensorFlow. 
+            Verbosity of update messages from TensorFlow.
             0 = silent, 1 = progress bar, 2 = single line.
             The default is 1.
 
@@ -591,39 +575,40 @@ def apply_classifier_inc(self, classifier, inc=10, name='c1', verbose=1):
         -------
         None.
 
-        '''
-        
-        dt = self.state.index[1]-self.state.index[0]
+        """
+
+        dt = self.state.index[1] - self.state.index[0]
         tmin = self.state.index[0]
         tend = self.state.index[-1] if not self.transition else self.transition
-        tend += dt # Make transition point inclusive
-        
+        tend += dt  # Make transition point inclusive
+
         # Tmax values for each time series segment
-        tmax_vals = np.arange(tmin+inc, tend+dt, inc)
+        tmax_vals = np.arange(tmin + inc, tend + dt, inc)
         for tmax in tmax_vals:
-            self.apply_classifier(classifier, name=name, tmin=tmin, tmax=tmax, 
-                                verbose=verbose)
-    
-    
-
+            self.apply_classifier(
+                classifier, name=name, tmin=tmin, tmax=tmax, verbose=verbose
+            )
 
     def clear_dl_preds(self):
-        '''
+        """
         Clear the attribute *dl_preds*
 
         Returns
         -------
         None.
-        '''
-        
-        self.dl_preds = pd.DataFrame()
-
+        """
 
+        self.dl_preds = pd.DataFrame()
 
-    def compute_spectrum(self, rolling_window=0.25, ham_length=40,
-                         ham_offset=0.5, pspec_roll_offset=20,
-                         w_cutoff=1):
-        '''
+    def compute_spectrum(
+        self,
+        rolling_window=0.25,
+        ham_length=40,
+        ham_offset=0.5,
+        pspec_roll_offset=20,
+        w_cutoff=1,
+    ):
+        """
         Compute the power spectrum over a rolling window.
         Stores the power spectra as a DataFrame in TimeSeries.pspec
 
@@ -634,10 +619,10 @@ def compute_spectrum(self, rolling_window=0.25, ham_length=40,
             as an absolute value or as a proportion of the length of the
             data being analysed. Default is 0.25.
         ham_length : int
-            Length of the Hamming window used to compute the power spectrum. 
+            Length of the Hamming window used to compute the power spectrum.
             The default is 40.
         ham_offset : float
-            Hamming offset as a proportion of the Hamming window size. 
+            Hamming offset as a proportion of the Hamming window size.
             The default is 0.5.
         pspec_roll_offset : int
             Rolling window offset used when computing power spectra. Power spectrum
@@ -651,8 +636,8 @@ def compute_spectrum(self, rolling_window=0.25, ham_length=40,
         -------
         None.
 
-        '''
-        
+        """
+
         # Get time series data prior to transition
         if self.transition:
             df_pre = self.state[self.state.index <= self.transition]
@@ -666,11 +651,11 @@ def compute_spectrum(self, rolling_window=0.25, ham_length=40,
             rw_absolute = rolling_window
 
         # If residuals column exists, compute over residuals.
-        if 'residuals' in df_pre.columns:
-            series = df_pre['residuals']
+        if "residuals" in df_pre.columns:
+            series = df_pre["residuals"]
         # Else, compute over state variable
         else:
-            series = df_pre['state']
+            series = df_pre["state"]
 
         # Number of components in the residual time-series
         num_comps = len(df_pre)
@@ -708,16 +693,14 @@ def compute_spectrum(self, rolling_window=0.25, ham_length=40,
                 columns={"Power spectrum": "empirical"}, inplace=True
             )
             # Include a column for the time-stamp
-            df_pspec_empirical['time'] = t_point * np.ones(len(pspec))
+            df_pspec_empirical["time"] = t_point * np.ones(len(pspec))
             list_pspec.append(df_pspec_empirical)
-        
+
         # Concatenate power spectra dataframes and store in attribute pspec
         self.pspec = pd.concat(list_pspec)
 
-
-
     def compute_smax(self):
-        '''
+        """
         Compute Smax (the maximum power in the power spectrum).
         This can only be applied after applying compute_spectrum().
         Stores Smax values in TimeSeries.ews_spec
@@ -726,21 +709,20 @@ def compute_smax(self):
         -------
         None.
 
-        '''
-        
+        """
+
         if self.pspec is None:
-            print('ERROR: The power spectrum must be computed before computing\
+            print(
+                "ERROR: The power spectrum must be computed before computing\
                   spectral EWS such as Smax. The power spectrum can be\
-                  computed using compute_pspec()')
-        
-        smax_values = self.pspec[['time','power']].groupby(['time']).max()
-        self.ews_spec['smax'] = smax_values
-
-
+                  computed using compute_pspec()"
+            )
 
+        smax_values = self.pspec[["time", "power"]].groupby(["time"]).max()
+        self.ews_spec["smax"] = smax_values
 
     def compute_spec_type(self, sweep=False):
-        '''
+        """
         Fit the analytical forms of the Fold, Hopf and Null power spectrum
         to the empirical power spectrum. Get Akaike Information Criterion
         (AIC) weights to determine best fit.
@@ -759,31 +741,32 @@ def compute_spec_type(self, sweep=False):
         -------
         None.
 
-        '''
-        
-        
+        """
+
         if self.pspec is None:
-            print('ERROR: The power spectrum must be computed before computing\
+            print(
+                "ERROR: The power spectrum must be computed before computing\
                   the spectrum type. The power spectrum can be\
-                  computed using compute_pspec()')    
-    
+                  computed using compute_pspec()"
+            )
+
         list_aic = []
         list_pspec_fits = []
-        
+
         # Loop through time values
-        for time in self.pspec['time'].unique():
-            pspec = self.pspec[self.pspec['time']==time]
-            pspec_series = pspec.set_index('frequency')['power']
-            
+        for time in self.pspec["time"].unique():
+            pspec = self.pspec[self.pspec["time"] == time]
+            pspec_series = pspec.set_index("frequency")["power"]
+
             ## Compute the AIC values
-            metrics = helpers.pspec_metrics(pspec_series, ews=['aic'], sweep=sweep)
+            metrics = helpers.pspec_metrics(pspec_series, ews=["aic"], sweep=sweep)
             dict_aic = {}
-            dict_aic['fold'] = metrics['AIC fold']
-            dict_aic['hopf'] = metrics['AIC hopf']
-            dict_aic['null'] = metrics['AIC null']
-            dict_aic['time'] = time
+            dict_aic["fold"] = metrics["AIC fold"]
+            dict_aic["hopf"] = metrics["AIC hopf"]
+            dict_aic["null"] = metrics["AIC null"]
+            dict_aic["time"] = time
             list_aic.append(dict_aic)
-            
+
             # Generate data to plot the fitted power spectra
 
             # Create fine-scale frequency values
@@ -806,8 +789,8 @@ def compute_spec_type(self, sweep=False):
 
             ## Put spectrum fits into a dataframe
             dict_fits = {
-                'time': time * np.ones(len(wVals)),
-                'frequency': wVals,
+                "time": time * np.ones(len(wVals)),
+                "frequency": wVals,
                 "fit fold": pspec_fold,
                 "fit hopf": pspec_hopf,
                 "fit null": pspec_null,
@@ -816,17 +799,14 @@ def compute_spec_type(self, sweep=False):
             list_pspec_fits.append(df_pspec_fits)
 
         # Concatenate
-        df_aic = pd.DataFrame(list_aic).set_index('time')
+        df_aic = pd.DataFrame(list_aic).set_index("time")
         df_pspec_fits = pd.concat(list_pspec_fits)
 
         self.ews_spec = self.ews_spec.join(df_aic)
         self.pspec_fits = df_pspec_fits
-    
-
-
 
     def make_plotly(self, kendall_tau=True, ens_avg=False):
-        '''
+        """
         Make an interactive Plotly figure to view all EWS computed
 
         Parameters
@@ -836,15 +816,15 @@ def make_plotly(self, kendall_tau=True, ens_avg=False):
             Set as true to show Kendall tau values (if they have been computed)
             Default is True.
         ens_avg : bool, optional
-            Plot the ensenble average of DL predictions. 
+            Plot the ensenble average of DL predictions.
             Default is False.
 
         Returns
         -------
         Plotly figure
 
-        '''
-        
+        """
+
         # Trace colours
         colours = px.colors.qualitative.Plotly
         # Count number of panels for Plotly subplots
@@ -852,275 +832,292 @@ def make_plotly(self, kendall_tau=True, ens_avg=False):
         # Always a row for state varialbe
         row_count += 1
         # Row for autocorrelation
-        ac_labels = [s for s in self.ews.columns if s[:2]=='ac']
-        if len(ac_labels)>0:
-            row_count+=1
+        ac_labels = [s for s in self.ews.columns if s[:2] == "ac"]
+        if len(ac_labels) > 0:
+            row_count += 1
         # Row for each other EWS
-        row_count += len(self.ews.columns)-len(ac_labels)
+        row_count += len(self.ews.columns) - len(ac_labels)
         # Row for Smax if included
-        if 'smax' in self.ews_spec.columns:
-            row_count+=1
+        if "smax" in self.ews_spec.columns:
+            row_count += 1
         # Row for AIC weights if computed
-        if 'fold' in self.ews_spec.columns:
-            row_count+=1
+        if "fold" in self.ews_spec.columns:
+            row_count += 1
         # Row for DL predictions if computed
-        if len(self.dl_preds.columns)>0:
-            row_count+=1
-        
+        if len(self.dl_preds.columns) > 0:
+            row_count += 1
+
         num_rows = row_count
-        row_count = 1 # reset row counter
-            
+        row_count = 1  # reset row counter
+
         # Make Plotly subplots frame
-        fig = make_subplots(rows=num_rows, cols=1, 
-                            shared_xaxes=True,
-                            x_title='Time',
-                            vertical_spacing=0.02
-                            )
-        
+        fig = make_subplots(
+            rows=num_rows,
+            cols=1,
+            shared_xaxes=True,
+            x_title="Time",
+            vertical_spacing=0.02,
+        )
+
         # Plot state variable
         fig.add_trace(
-            go.Scatter(x=self.state.index.values,
-                       y=self.state['state'].values,
-                       name='state',
-                       ),
-            row=row_count, col=1
-            )
-        fig.update_yaxes(title='State', row=row_count)
+            go.Scatter(
+                x=self.state.index.values,
+                y=self.state["state"].values,
+                name="state",
+            ),
+            row=row_count,
+            col=1,
+        )
+        fig.update_yaxes(title="State", row=row_count)
 
-        
         # Plot smoothing if computed
-        if 'smoothing' in self.state.columns:
+        if "smoothing" in self.state.columns:
             fig.add_trace(
-                go.Scatter(x=self.state.index.values,
-                           y=self.state['smoothing'].values,
-                           name='smoothing',
-                           ),
-                row=row_count, col=1
-                )
-        
+                go.Scatter(
+                    x=self.state.index.values,
+                    y=self.state["smoothing"].values,
+                    name="smoothing",
+                ),
+                row=row_count,
+                col=1,
+            )
+
         # Plot variance if computed
-        if 'variance' in self.ews.columns:
+        if "variance" in self.ews.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('variance' in self.ktau.keys()):
-                ktau = self.ktau['variance']
-                name = 'variance (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("variance" in self.ktau.keys()):
+                ktau = self.ktau["variance"]
+                name = "variance (ktau={:.2f})".format(ktau)
             else:
-                name = 'variance'
-            
+                name = "variance"
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews['variance'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Variance', row=row_count)
-            
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews["variance"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Variance", row=row_count)
+
         # Plot standard deviation if computed
-        if 'std' in self.ews.columns:
+        if "std" in self.ews.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('std' in self.ktau.keys()):
-                ktau = self.ktau['std']
-                name = 'std (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("std" in self.ktau.keys()):
+                ktau = self.ktau["std"]
+                name = "std (ktau={:.2f})".format(ktau)
             else:
-                name = 'std'
-            
+                name = "std"
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews['std'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='St. Dev.', row=row_count)
-            
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews["std"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="St. Dev.", row=row_count)
+
         # Plot coefficient of variation if computed
-        if 'cv' in self.ews.columns:
+        if "cv" in self.ews.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('cv' in self.ktau.keys()):
-                ktau = self.ktau['cv']
-                name = 'cv (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("cv" in self.ktau.keys()):
+                ktau = self.ktau["cv"]
+                name = "cv (ktau={:.2f})".format(ktau)
             else:
-                name = 'cv'
-            
+                name = "cv"
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews['cv'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Coeff. of Var.', row=row_count)
-            
-                                    
-            
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews["cv"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Coeff. of Var.", row=row_count)
+
         # Plot autocorrelation metrics if computed
-        if len(ac_labels)!=0:
-            row_count+=1
+        if len(ac_labels) != 0:
+            row_count += 1
         for ac_label in ac_labels:
-            
+
             # Add kendall tau to name
             if kendall_tau and (ac_label in self.ktau.keys()):
                 ktau = self.ktau[ac_label]
-                name = '{} (ktau={:.2f})'.format(ac_label, ktau)
+                name = "{} (ktau={:.2f})".format(ac_label, ktau)
             else:
-                name = ac_label            
-            
+                name = ac_label
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews[ac_label].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Autocorrelation', row=row_count)
-            
-                     
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews[ac_label].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Autocorrelation", row=row_count)
+
         # Plot skew if computed
-        if 'skew' in self.ews.columns:
+        if "skew" in self.ews.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('skew' in self.ktau.keys()):
-                ktau = self.ktau['skew']
-                name = 'skew (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("skew" in self.ktau.keys()):
+                ktau = self.ktau["skew"]
+                name = "skew (ktau={:.2f})".format(ktau)
             else:
-                name = 'skew'
-                
+                name = "skew"
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews['skew'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Skew', row=row_count)
-                        
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews["skew"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Skew", row=row_count)
+
         # Plot kurtosis if computed
-        if 'kurtosis' in self.ews.columns:
+        if "kurtosis" in self.ews.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('kurtosis' in self.ktau.keys()):
-                ktau = self.ktau['kurtosis']
-                name = 'kurtosis (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("kurtosis" in self.ktau.keys()):
+                ktau = self.ktau["kurtosis"]
+                name = "kurtosis (ktau={:.2f})".format(ktau)
             else:
-                name = 'kurtosis'   
-                
+                name = "kurtosis"
+
             fig.add_trace(
-                go.Scatter(x=self.ews.index.values,
-                           y=self.ews['kurtosis'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Kurtosis', row=row_count)
-               
-            
+                go.Scatter(
+                    x=self.ews.index.values,
+                    y=self.ews["kurtosis"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Kurtosis", row=row_count)
+
         # Plot Smax if computd
-        if 'smax' in self.ews_spec.columns:
+        if "smax" in self.ews_spec.columns:
             row_count += 1
-            
+
             # Add kendall tau to name
-            if kendall_tau and ('smax' in self.ktau.keys()):
-                ktau = self.ktau['smax']
-                name = 'smax (ktau={:.2f})'.format(ktau)
+            if kendall_tau and ("smax" in self.ktau.keys()):
+                ktau = self.ktau["smax"]
+                name = "smax (ktau={:.2f})".format(ktau)
             else:
-                name = 'smax'   
-                            
+                name = "smax"
+
             fig.add_trace(
-                go.Scatter(x=self.ews_spec.index.values,
-                           y=self.ews_spec['smax'].values,
-                           name=name,
-                           ),
-                row=row_count, col=1
-                )  
-            fig.update_yaxes(title='Smax', row=row_count)
-                          
-            
+                go.Scatter(
+                    x=self.ews_spec.index.values,
+                    y=self.ews_spec["smax"].values,
+                    name=name,
+                ),
+                row=row_count,
+                col=1,
+            )
+            fig.update_yaxes(title="Smax", row=row_count)
+
         # Plot AIC weights if computd
-        if 'fold' in self.ews_spec.columns:
+        if "fold" in self.ews_spec.columns:
             row_count += 1
-            aic_labels = ['fold', 'hopf', 'null']
+            aic_labels = ["fold", "hopf", "null"]
             for aic_label in aic_labels:
                 fig.add_trace(
-                    go.Scatter(x=self.ews_spec.index.values,
-                               y=self.ews_spec[aic_label].values,
-                               name=aic_label,
-                               ),
-                    row=row_count, col=1
-                    )  
-                
-            fig.update_yaxes(title='AIC weights', row=row_count)
-                       
-            
-                        
+                    go.Scatter(
+                        x=self.ews_spec.index.values,
+                        y=self.ews_spec[aic_label].values,
+                        name=aic_label,
+                    ),
+                    row=row_count,
+                    col=1,
+                )
+
+            fig.update_yaxes(title="AIC weights", row=row_count)
+
         # Plot DL predictions if computed
-        if len(self.dl_preds)>0:
+        if len(self.dl_preds) > 0:
             row_count += 1
-            class_labels = [s for s in self.dl_preds.columns if s not in ['time', 'classifier']]
-            classifiers = self.dl_preds['classifier'].unique()
-            
+            class_labels = [
+                s for s in self.dl_preds.columns if s not in ["time", "classifier"]
+            ]
+            classifiers = self.dl_preds["classifier"].unique()
+
             # If plotting predictions from every classifier
             if not ens_avg:
                 # Loop through class labels and classifier names
                 for idx, class_label in enumerate(class_labels):
                     for idx2, classifier in enumerate(classifiers):
-                        df_plot = self.dl_preds[self.dl_preds['classifier']==classifier]
+                        df_plot = self.dl_preds[
+                            self.dl_preds["classifier"] == classifier
+                        ]
                         fig.add_trace(
-                            go.Scatter(x=df_plot['time'].values,
-                                       y=df_plot[class_label].values,
-                                       name='DL class {}'.format(class_label),
-                                       legendgroup='DL class {}'.format(class_label),
-                                       showlegend=True if idx2==0 else False,
-                                       line=dict(color=colours[idx])
-                                       ),
-                            row=row_count, col=1
-                            )
-                        
+                            go.Scatter(
+                                x=df_plot["time"].values,
+                                y=df_plot[class_label].values,
+                                name="DL class {}".format(class_label),
+                                legendgroup="DL class {}".format(class_label),
+                                showlegend=True if idx2 == 0 else False,
+                                line=dict(color=colours[idx]),
+                            ),
+                            row=row_count,
+                            col=1,
+                        )
+
             # If plotting ensemble average over classifiers
             else:
-                df_plot = self.dl_preds.groupby(['time']).mean().reset_index()
+                df = self.dl_preds.drop(["classifier"], axis=1)  # JJV
+                df_plot = df.groupby(["time"]).mean().reset_index()  # JJV
+
                 # Loop through class labels
                 for idx, class_label in enumerate(class_labels):
                     fig.add_trace(
-                        go.Scatter(x=df_plot['time'].values,
-                                   y=df_plot[class_label].values,
-                                   name='DL class {}'.format(class_label),
-                                   line=dict(color=colours[idx])
-                                   ),
-                        row=row_count, col=1
-                        )    
-                    
-            fig.update_yaxes(title='DL predictions', row=row_count)
-                              
-
-
-
-        # Set figure dimensions 
-        fig.update_layout(height=200*num_rows,
-                          width=800)
-        
-        # Separation between legend entries
-        fig.layout.legend.tracegroupgap = 0
-
-        return fig
+                        go.Scatter(
+                            x=df_plot["time"].values,
+                            y=df_plot[class_label].values,
+                            name="DL class {}".format(class_label),
+                            line=dict(color=colours[idx]),
+                        ),
+                        row=row_count,
+                        col=1,
+                    )
 
+            fig.update_yaxes(title="DL predictions", row=row_count)
 
+        # Set figure dimensions
+        fig.update_layout(height=200 * num_rows, width=800)
 
+        # Separation between legend entries
+        fig.layout.legend.tracegroupgap = 0
 
+        return fig
 
 
 # -----------------------------
 # Eigenvalue reconstruction
 # ------------------------------
 
+
 def eval_recon_rolling(
     df_in,
     roll_window=0.4,
@@ -1240,13 +1237,13 @@ def eval_recon_rolling(
         # Do eigenvalue reconstruction on residuals
         dic_eval_recon = helpers.eval_recon(df_window)
         # Add time component
-        dic_eval_recon['time'] = t_point
+        dic_eval_recon["time"] = t_point
         # Add them to list
         list_evaldata.append(dic_eval_recon)
 
     # Create dataframe from list of dicts of eval data
     df_evaldata = pd.DataFrame(list_evaldata)
-    df_evaldata.set_index('time', inplace=True)
+    df_evaldata.set_index("time", inplace=True)
 
     # Create output dataframe that merges all useful info
     df_out = pd.concat(
@@ -1305,7 +1302,7 @@ def block_bootstrap(series, n_samples, bs_type="Stationary", block_size=10):
         for data in bs.bootstrap(n_samples):
 
             df_temp = pd.DataFrame(
-                {"sample": count, 'time': series.index.values, "x": data[0][0]}
+                {"sample": count, "time": series.index.values, "x": data[0][0]}
             )
             list_samples.append(df_temp)
             count += 1
@@ -1318,14 +1315,14 @@ def block_bootstrap(series, n_samples, bs_type="Stationary", block_size=10):
         for data in bs.bootstrap(n_samples):
 
             df_temp = pd.DataFrame(
-                {"sample": count, 'time': series.index.values, "x": data[0][0]}
+                {"sample": count, "time": series.index.values, "x": data[0][0]}
             )
             list_samples.append(df_temp)
             count += 1
 
     # Concatenate list of samples
     df_samples = pd.concat(list_samples)
-    df_samples.set_index(["sample", 'time'], inplace=True)
+    df_samples.set_index(["sample", "time"], inplace=True)
 
     # Output DataFrame of samples
     return df_samples
@@ -1427,15 +1424,15 @@ def roll_bootstrap(
         # Compute bootstrap samples of residauls within rolling window
         df_samples_temp = block_bootstrap(window_series, n_samples, bs_type, block_size)
         df_samples_temp.reset_index(inplace=True)
-        df_samples_temp['wintime'] = df_samples_temp['time']
-        
+        df_samples_temp["wintime"] = df_samples_temp["time"]
+
         # Add column with real time (end of window)
-        df_samples_temp['time'] = t_point
+        df_samples_temp["time"] = t_point
 
         # Reorganise index
-        
+
         df_samples_temp.reset_index(inplace=True)
-        df_samples_temp.set_index(['time', "sample", 'wintime'], inplace=True)
+        df_samples_temp.set_index(["time", "sample", "wintime"], inplace=True)
 
         # Append the list of samples
         list_samples.append(df_samples_temp)