jnsebgosselin · jnsebgosselin · Feb 22, 2022 · Feb 21, 2022 · Feb 21, 2022 · Feb 21, 2022
diff --git a/gwhat/HydroCalc2.py b/gwhat/HydroCalc2.py
@@ -638,18 +638,40 @@ def btn_show_mrc_isclicked(self):
     def btn_MRCalc_isClicked(self):
         if self.wldset is None:
             return
-
         QApplication.setOverrideCursor(Qt.WaitCursor)
 
         coeffs, hp, std_err, r_squared, rmse = calculate_mrc(
             self.time, self.water_lvl, self._mrc_period_xdata,
             self.MRC_type.currentIndex())
         A = coeffs.A
         B = coeffs.B
-
         print('MRC Parameters: A={}, B={}'.format(
-            'None' if pd.isnull(A) else '{:0.3f}'.format(A),
-            'None' if pd.isnull(B) else '{:0.3f}'.format(B)))
+            'None' if pd.isnull(A) else '{:0.3f}'.format(coeffs.A),
+            'None' if pd.isnull(B) else '{:0.3f}'.format(coeffs.B)))
+
+        # Store and plot the results.
+        print('Saving MRC interpretation in dataset...')
+        self.wldset.set_mrc(
+            A, B, self._mrc_period_xdata,
+            self.time, hp,
+            std_err, r_squared, rmse)
+
+        self.show_mrc_results()
+        self.btn_save_mrc.setEnabled(True)
+        self.draw_mrc()
+        self.sig_new_mrc.emit()
+
+        QApplication.restoreOverrideCursor()
+
+    def show_mrc_results(self):
+        """Show MRC results if any."""
+        if self.wldset is None:
+            self.MRC_results.setHtml('')
+            return
+
+        mrc_data = self.wldset.get_mrc()
+
+        coeffs = mrc_data['params']
         if pd.isnull(coeffs.A):
             text = ''
         else:
@@ -661,22 +683,20 @@ def btn_MRCalc_isClicked(self):
                 "h is the depth to water table in mbgs, "
                 "and A and B are the coefficients of the MRC.<br><br>"
                 "Goodness-of-fit statistics :<br>"
-                "RMSE = {:0.5f} m<br>"
-                "r² = {:0.5f}<br>"
-                "S = {:0.5f} m"
-                ).format(A, B, rmse, r_squared, std_err)
+                ).format(coeffs.A, coeffs.B)
+
+            fit_stats = {
+                'rmse': "RMSE = {} m<br>",
+                'r_squared': "r² = {}<br>",
+                'std_err': "S = {} m"}
+            for key, label in fit_stats.items():
+                value = mrc_data[key]
+                if value is None:
+                    text += label.format('N/A')
+                else:
+                    text += label.format('{:0.5f}'.format(value))
         self.MRC_results.setHtml(text)
 
-        # Store and plot the results.
-        print('Saving MRC interpretation in dataset...')
-        self.wldset.set_mrc(
-            A, B, self._mrc_period_xdata, self.time, hp)
-        self.btn_save_mrc.setEnabled(True)
-        self.draw_mrc()
-        self.sig_new_mrc.emit()
-
-        QApplication.restoreOverrideCursor()
-
     def load_mrc_from_wldset(self):
         """Load saved MRC results from the project hdf5 file."""
         if self.wldset is not None:
@@ -687,6 +707,7 @@ def load_mrc_from_wldset(self):
             self._mrc_period_xdata = []
             self._mrc_period_memory = [[], ]
             self.btn_save_mrc.setEnabled(False)
+        self.show_mrc_results()
         self.draw_mrc()
 
     def save_mrc_tofile(self, filename=None):

diff --git a/gwhat/projet/reader_projet.py b/gwhat/projet/reader_projet.py
@@ -14,6 +14,7 @@
 import os
 import os.path as osp
 from shutil import copyfile
+from collections import namedtuple
 
 # ---- Third party imports
 import h5py
@@ -519,13 +520,14 @@ def get_wlmeas(self):
         return grp['Time'][...], grp['WL'][...]
 
     # ---- Master Recession Curve
-    def set_mrc(self, A, B, peak_indx, time, recess):
+    def set_mrc(self, A, B, periods, time, recess,
+                std_err, r_squared, rmse):
         """Save the mrc results to the hdf5 project file."""
         self.dset['mrc/params'][:] = (A, B)
 
-        peak_indx = np.array(peak_indx).flatten()
-        self.dset['mrc/peak_indx'].resize(np.shape(peak_indx))
-        self.dset['mrc/peak_indx'][:] = np.array(peak_indx)
+        periods = np.array(periods).flatten()
+        self.dset['mrc/peak_indx'].resize(np.shape(periods))
+        self.dset['mrc/peak_indx'][:] = np.array(periods)
 
         self.dset['mrc/time'].resize(np.shape(time))
         self.dset['mrc/time'][:] = time
@@ -534,6 +536,9 @@ def set_mrc(self, A, B, peak_indx, time, recess):
         self.dset['mrc/recess'][:] = recess
 
         self.dset['mrc'].attrs['exists'] = 1
+        self.dset['mrc'].attrs['std_err'] = std_err
+        self.dset['mrc'].attrs['r_squared'] = r_squared
+        self.dset['mrc'].attrs['rmse'] = rmse
 
         self.dset.file.flush()
 
@@ -545,11 +550,19 @@ def get_mrc(self):
         peak_indx = list(map(
             tuple, peak_indx[:n * m].reshape((n, m))
             ))
-        return {
-            'params': self['mrc/params'].tolist(),
+        coeffs = self['mrc/params'].tolist()
+
+        mrc_data = {
+            'params': namedtuple('Coeffs', ['A', 'B'])(*coeffs),
             'peak_indx': peak_indx,
             'time': self['mrc/time'].copy(),
             'recess': self['mrc/recess'].copy()}
+        for key in ['std_err', 'r_squared', 'rmse']:
+            try:
+                mrc_data[key] = self.dset['mrc'].attrs[key]
+            except KeyError:
+                mrc_data[key] = None
+        return mrc_data
 
     def mrc_exists(self):
         """Return whether a mrc results is saved in the hdf5 project file."""
@@ -560,23 +573,32 @@ def save_mrc_tofile(self, filename):
         if not filename.lower().endswith('.csv'):
             filename += '.csv'
 
+        mrc_data = self.get_mrc()
+
         # Prepare the file header.
         fheader = []
         keys = ['Well', 'Well ID', 'Province', 'Latitude', 'Longitude',
                 'Elevation', 'Municipality']
         for key in keys:
             fheader.append([key, self[key]])
 
-        A, B = self['mrc/params']
         fheader.extend([
-            [''],
+            [],
             ['∂h/∂t = -A * h + B'],
-            ['A (1/day)', A],
-            ['B (m/day)', B],
-            ['RMSE (m)', calcul_rmse(self['WL'], self['mrc/recess'])],
-            [''],
-            ['Observed and Predicted Water Level'],
-            ])
+            ['A (1/day)', mrc_data['params'].A],
+            ['B (m/day)', mrc_data['params'].B],
+            []])
+
+        labels = ['RMSE (m)', 'R-squared', 'S (m)']
+        keys = ['rmse', 'r_squared', 'std_err']
+        for label, key in zip(labels, keys):
+            value = mrc_data[key]
+            if value is None:
+                fheader.append([label, "N/A"])
+            else:
+                fheader.append([label, "{:0.5f} m".format(value)])
+        fheader.append([])
+        fheader.append([['Observed and Predicted Water Level']])
 
         # Save the observed and simulated data to the CSV file.
         df = pd.DataFrame(

diff --git a/gwhat/projet/tests/test_project.py b/gwhat/projet/tests/test_project.py
@@ -10,13 +10,16 @@
 # ---- Standard library imports
 import os
 import os.path as osp
+from shutil import copyfile
 os.environ['GWHAT_PYTEST'] = 'True'
 
 # ---- Third party imports
 import numpy as np
 import pytest
+import h5py
 
 # ---- Local imports
+from gwhat import __rootdir__
 from gwhat.common.utils import save_content_to_file
 from gwhat.projet.reader_projet import ProjetReader
 from gwhat.projet.manager_projet import (
@@ -78,6 +81,17 @@ def projectfile(projectpath):
     return projectpath
 
 
+@pytest.fixture
+def oldprojectfile(projectpath, tmp_path):
+    """A path to a valid existing project file."""
+    fsrc = osp.join(
+        __rootdir__, 'projet', 'tests', 'test_project_gwhat_v0.5.0.gwt')
+    fdst = osp.join(
+        tmp_path, 'test_project_gwhat_v0.5.0.gwt')
+    copyfile(fsrc, fdst)
+    return fdst
+
+
 @pytest.fixture
 def bakfile(projectfile):
     """A path to a valid project backup file."""
@@ -405,15 +419,22 @@ def test_store_mrc(project, testfile):
     periods = [(2, 100), (30000, 33000)]
     recess_time = np.arange(0, 1000, 0.1).tolist()
     recess_wlvl = np.random.rand(len(recess_time)).tolist()
+    std_err = 0.002341
+    r_squared = 0.2314
+    rmse = 0.123445
 
-    wldset.set_mrc(A, B, periods, recess_time, recess_wlvl)
+    wldset.set_mrc(A, B, periods, recess_time, recess_wlvl,
+                   std_err, r_squared, rmse)
     assert wldset.mrc_exists() is True
 
     mrc_data = wldset.get_mrc()
-    assert mrc_data['params'] == [A, B]
+    assert mrc_data['params'] == (A, B)
     assert mrc_data['peak_indx'] == periods
     assert mrc_data['time'].tolist() == recess_time
     assert mrc_data['recess'].tolist() == recess_wlvl
+    assert mrc_data['std_err'] == std_err
+    assert mrc_data['r_squared'] == r_squared
+    assert mrc_data['rmse'] == rmse
 
 
 def test_mrc_backward_compatibility(project, testfile):
@@ -427,7 +448,7 @@ def test_mrc_backward_compatibility(project, testfile):
     See jnsebgosselin/gwhat#370.
     See jnsebgosselin/gwhat#377.
     """
-    # Add the dataset to the test project.
+    # Add the dataset to the project.
     project.add_wldset('dataset_test', WLDataFrame(testfile))
     wldset = project.get_wldset('dataset_test')
 
@@ -472,6 +493,30 @@ def test_mrc_backward_compatibility(project, testfile):
     assert mrc_data['recess'].tolist() == []
 
 
+def test_project_backward_compatibility(oldprojectfile):
+    """
+    Test that old project files are opened as expected in newer versions
+    of GWHAT.
+    """
+    # Make sure old recession periods are converted from indexes to xldates.
+    with h5py.File(oldprojectfile, mode='r') as hdf5file:
+        wldset = hdf5file['wldsets/PO01 - Calixa-Lavallée']
+        assert list(wldset['mrc/peak_indx']) == [6458, 8210, 13710, 15462,
+                                                 20596, 22022, 3036, 3892,
+                                                 5155, 5725]
+
+    project = ProjetReader(oldprojectfile)
+    wldset = project.get_wldset('PO01 - Calixa-Lavallée')
+    mrc_data = wldset.get_mrc()
+    assert mrc_data['peak_indx'] == [(41309.0, 41327.25),
+                                     (41384.541666666664, 41402.791666666664),
+                                     (41456.270833333336, 41471.125),
+                                     (41273.354166666664, 41282.270833333336),
+                                     (41295.427083333336, 41301.364583333336)]
+    assert mrc_data['std_err'] is None
+    assert mrc_data['r_squared'] is None
+    assert mrc_data['rmse'] is None
+
+
 if __name__ == "__main__":
-    pytest.main(['-x', __file__, '-v', '-rw',
-                 '-k', 'test_mrc_backward_compatibility'])
+    pytest.main(['-x', __file__, '-v', '-rw'])
diff --git a/gwhat/projet/tests/test_project_gwhat_v0.5.0.gwt b/gwhat/projet/tests/test_project_gwhat_v0.5.0.gwt
diff --git a/gwhat/recession/recession_calc.py b/gwhat/recession/recession_calc.py
@@ -15,8 +15,11 @@
 
 # ---- Third party imports
 import numpy as np
+import pandas as pd
 from scipy.optimize import curve_fit
-from scipy.stats import linregress
+
+# ---- Local imports
+from gwhat.utils.math import calc_goodness_of_fit
 
 
 def predict_recession(tdeltas: np.ndarray, B: float, A: float,
@@ -64,7 +67,7 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
     Parameters
     ----------
     t : np.ndarray
-        Time in days.
+        Time in days since epoch.
     h : np.ndarray
         Water levels in meters below the ground surface.
     periods : list(tuple)
@@ -90,39 +93,30 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
         The root mean square error (RMSE) of the water levels predicted
         with the MRC.
     """
-    # Define the indices corresponding to the beginning and end of each
-    # recession segment.
-    iend = []
-    istart = []
+    index = np.arange(len(t)).astype(int)
+
+    # Extract relevant information and data for each segment identified as
+    # a recession period.
+    tdeltas = np.array([])
+    h_seg = np.array([])
+    t_seg = np.array([])
+    index_seg = np.array([], dtype=int)
+    B0_seg = []
     for period in periods:
-        indx0 = np.argmin(np.abs(t - period[0]))
-        indx1 = np.argmin(np.abs(t - period[1]))
-        if np.abs(indx1 - indx0) < 2:
-            # Periods that are smaller than two time steps are ignored.
+        mask = (t >= min(period)) & (t <= max(period))
+        if len(mask) < 2:
+            # Segments that are smaller than two time steps are ignored.
             continue
-        istart.append(min(indx0, indx1))
-        iend.append(max(indx0, indx1))
 
-    # Define the indexes corresponding to the recession segments.
-    seg_indexes = []
-    seg_tstart = []
-    for i, j in zip(istart, iend):
-        seg_tstart.extend([t[i]] * (j - i + 1))
-        seg_indexes.extend(range(i, j + 1))
-
-    # Sort periods indexes and time start so that both series are
-    # monotically increasing.
-    argsort_idx = np.argsort(seg_indexes)
-    seg_indexes = np.array(seg_indexes)[argsort_idx]
-    seg_tstart = np.array(seg_tstart)[argsort_idx]
-
-    t_seg = t[seg_indexes]
-    h_seg = h[seg_indexes]
-    tdeltas = (t_seg - seg_tstart)
+        h_seg = np.append(h_seg, h[mask])
+        t_seg = np.append(t_seg, t[mask])
+        index_seg = np.append(index_seg, index[mask])
+        tdeltas = np.append(tdeltas, t[mask] - t[mask][0])
+        B0_seg.append((h[mask][-1] - h[mask][0]) / (t[mask][-1] - t[mask][0]))
 
-    # Define initial guess for the parameters .
+    # Define initial guess for the parameters.
     A0 = 0
-    B0 = np.mean((h[istart] - h[iend]) / (t[istart] - t[iend]))
+    B0 = np.mean(B0_seg)
 
     if mrctype == 1:  # exponential (dh/dt = -a*h + b)
         coeffs, coeffs_cov = curve_fit(
@@ -141,16 +135,10 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
         coeffs = namedtuple('Coeffs', ['B', 'A'])('B', 'A')(coeffs[0], 0)
 
     hp = np.zeros(len(t)) * np.nan
-    hp[seg_indexes] = predict_recession(
+    hp[index_seg] = predict_recession(
         tdeltas, A=coeffs[1], B=coeffs[0], h=h_seg)
 
-    # Calculate metrics of the fit.
-    # https://blog.minitab.com/en/adventures-in-statistics-2/regression-analysis-how-to-interpret-s-the-standard-error-of-the-regression
-    # https://statisticsbyjim.com/glossary/standard-error-regression/
-    slope, intercept, r_value, p_value, std_err = linregress(
-        h[seg_indexes], hp[seg_indexes])
-    r_squared = r_value**2
-    rmse = (np.nanmean((h - hp)**2))**0.5
+    std_err, r_squared, rmse = calc_goodness_of_fit(h, hp)
 
     return coeffs, hp, std_err, r_squared, rmse