Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR: Add goodness-of-fit stats to MRC exported file and show MRC summary results on startup. #400

Merged
merged 14 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 39 additions & 18 deletions gwhat/HydroCalc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,18 +638,40 @@ def btn_show_mrc_isclicked(self):
def btn_MRCalc_isClicked(self):
if self.wldset is None:
return

QApplication.setOverrideCursor(Qt.WaitCursor)

coeffs, hp, std_err, r_squared, rmse = calculate_mrc(
self.time, self.water_lvl, self._mrc_period_xdata,
self.MRC_type.currentIndex())
A = coeffs.A
B = coeffs.B

print('MRC Parameters: A={}, B={}'.format(
'None' if pd.isnull(A) else '{:0.3f}'.format(A),
'None' if pd.isnull(B) else '{:0.3f}'.format(B)))
'None' if pd.isnull(A) else '{:0.3f}'.format(coeffs.A),
'None' if pd.isnull(B) else '{:0.3f}'.format(coeffs.B)))

# Store and plot the results.
print('Saving MRC interpretation in dataset...')
self.wldset.set_mrc(
A, B, self._mrc_period_xdata,
self.time, hp,
std_err, r_squared, rmse)

self.show_mrc_results()
self.btn_save_mrc.setEnabled(True)
self.draw_mrc()
self.sig_new_mrc.emit()

QApplication.restoreOverrideCursor()

def show_mrc_results(self):
"""Show MRC results if any."""
if self.wldset is None:
self.MRC_results.setHtml('')
return

mrc_data = self.wldset.get_mrc()

coeffs = mrc_data['params']
if pd.isnull(coeffs.A):
text = ''
else:
Expand All @@ -661,22 +683,20 @@ def btn_MRCalc_isClicked(self):
"h is the depth to water table in mbgs, "
"and A and B are the coefficients of the MRC.<br><br>"
"Goodness-of-fit statistics :<br>"
"RMSE = {:0.5f} m<br>"
"r² = {:0.5f}<br>"
"S = {:0.5f} m"
).format(A, B, rmse, r_squared, std_err)
).format(coeffs.A, coeffs.B)

fit_stats = {
'rmse': "RMSE = {} m<br>",
'r_squared': "r² = {}<br>",
'std_err': "S = {} m"}
for key, label in fit_stats.items():
value = mrc_data[key]
if value is None:
text += label.format('N/A')
else:
text += label.format('{:0.5f}'.format(value))
self.MRC_results.setHtml(text)

# Store and plot the results.
print('Saving MRC interpretation in dataset...')
self.wldset.set_mrc(
A, B, self._mrc_period_xdata, self.time, hp)
self.btn_save_mrc.setEnabled(True)
self.draw_mrc()
self.sig_new_mrc.emit()

QApplication.restoreOverrideCursor()

def load_mrc_from_wldset(self):
"""Load saved MRC results from the project hdf5 file."""
if self.wldset is not None:
Expand All @@ -687,6 +707,7 @@ def load_mrc_from_wldset(self):
self._mrc_period_xdata = []
self._mrc_period_memory = [[], ]
self.btn_save_mrc.setEnabled(False)
self.show_mrc_results()
self.draw_mrc()

def save_mrc_tofile(self, filename=None):
Expand Down
50 changes: 36 additions & 14 deletions gwhat/projet/reader_projet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import os.path as osp
from shutil import copyfile
from collections import namedtuple

# ---- Third party imports
import h5py
Expand Down Expand Up @@ -519,13 +520,14 @@ def get_wlmeas(self):
return grp['Time'][...], grp['WL'][...]

# ---- Master Recession Curve
def set_mrc(self, A, B, peak_indx, time, recess):
def set_mrc(self, A, B, periods, time, recess,
std_err, r_squared, rmse):
"""Save the mrc results to the hdf5 project file."""
self.dset['mrc/params'][:] = (A, B)

peak_indx = np.array(peak_indx).flatten()
self.dset['mrc/peak_indx'].resize(np.shape(peak_indx))
self.dset['mrc/peak_indx'][:] = np.array(peak_indx)
periods = np.array(periods).flatten()
self.dset['mrc/peak_indx'].resize(np.shape(periods))
self.dset['mrc/peak_indx'][:] = np.array(periods)

self.dset['mrc/time'].resize(np.shape(time))
self.dset['mrc/time'][:] = time
Expand All @@ -534,6 +536,9 @@ def set_mrc(self, A, B, peak_indx, time, recess):
self.dset['mrc/recess'][:] = recess

self.dset['mrc'].attrs['exists'] = 1
self.dset['mrc'].attrs['std_err'] = std_err
self.dset['mrc'].attrs['r_squared'] = r_squared
self.dset['mrc'].attrs['rmse'] = rmse

self.dset.file.flush()

Expand All @@ -545,11 +550,19 @@ def get_mrc(self):
peak_indx = list(map(
tuple, peak_indx[:n * m].reshape((n, m))
))
return {
'params': self['mrc/params'].tolist(),
coeffs = self['mrc/params'].tolist()

mrc_data = {
'params': namedtuple('Coeffs', ['A', 'B'])(*coeffs),
'peak_indx': peak_indx,
'time': self['mrc/time'].copy(),
'recess': self['mrc/recess'].copy()}
for key in ['std_err', 'r_squared', 'rmse']:
try:
mrc_data[key] = self.dset['mrc'].attrs[key]
except KeyError:
mrc_data[key] = None
return mrc_data

def mrc_exists(self):
"""Return whether a mrc results is saved in the hdf5 project file."""
Expand All @@ -560,23 +573,32 @@ def save_mrc_tofile(self, filename):
if not filename.lower().endswith('.csv'):
filename += '.csv'

mrc_data = self.get_mrc()

# Prepare the file header.
fheader = []
keys = ['Well', 'Well ID', 'Province', 'Latitude', 'Longitude',
'Elevation', 'Municipality']
for key in keys:
fheader.append([key, self[key]])

A, B = self['mrc/params']
fheader.extend([
[''],
[],
['∂h/∂t = -A * h + B'],
['A (1/day)', A],
['B (m/day)', B],
['RMSE (m)', calcul_rmse(self['WL'], self['mrc/recess'])],
[''],
['Observed and Predicted Water Level'],
])
['A (1/day)', mrc_data['params'].A],
['B (m/day)', mrc_data['params'].B],
[]])

labels = ['RMSE (m)', 'R-squared', 'S (m)']
keys = ['rmse', 'r_squared', 'std_err']
for label, key in zip(labels, keys):
value = mrc_data[key]
if value is None:
fheader.append([label, "N/A"])
else:
fheader.append([label, "{:0.5f} m".format(value)])
fheader.append([])
fheader.append([['Observed and Predicted Water Level']])

# Save the observed and simulated data to the CSV file.
df = pd.DataFrame(
Expand Down
55 changes: 50 additions & 5 deletions gwhat/projet/tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
# ---- Standard library imports
import os
import os.path as osp
from shutil import copyfile
os.environ['GWHAT_PYTEST'] = 'True'

# ---- Third party imports
import numpy as np
import pytest
import h5py

# ---- Local imports
from gwhat import __rootdir__
from gwhat.common.utils import save_content_to_file
from gwhat.projet.reader_projet import ProjetReader
from gwhat.projet.manager_projet import (
Expand Down Expand Up @@ -78,6 +81,17 @@ def projectfile(projectpath):
return projectpath


@pytest.fixture
def oldprojectfile(projectpath, tmp_path):
"""A path to a valid existing project file."""
fsrc = osp.join(
__rootdir__, 'projet', 'tests', 'test_project_gwhat_v0.5.0.gwt')
fdst = osp.join(
tmp_path, 'test_project_gwhat_v0.5.0.gwt')
copyfile(fsrc, fdst)
return fdst


@pytest.fixture
def bakfile(projectfile):
"""A path to a valid project backup file."""
Expand Down Expand Up @@ -405,15 +419,22 @@ def test_store_mrc(project, testfile):
periods = [(2, 100), (30000, 33000)]
recess_time = np.arange(0, 1000, 0.1).tolist()
recess_wlvl = np.random.rand(len(recess_time)).tolist()
std_err = 0.002341
r_squared = 0.2314
rmse = 0.123445

wldset.set_mrc(A, B, periods, recess_time, recess_wlvl)
wldset.set_mrc(A, B, periods, recess_time, recess_wlvl,
std_err, r_squared, rmse)
assert wldset.mrc_exists() is True

mrc_data = wldset.get_mrc()
assert mrc_data['params'] == [A, B]
assert mrc_data['params'] == (A, B)
assert mrc_data['peak_indx'] == periods
assert mrc_data['time'].tolist() == recess_time
assert mrc_data['recess'].tolist() == recess_wlvl
assert mrc_data['std_err'] == std_err
assert mrc_data['r_squared'] == r_squared
assert mrc_data['rmse'] == rmse


def test_mrc_backward_compatibility(project, testfile):
Expand All @@ -427,7 +448,7 @@ def test_mrc_backward_compatibility(project, testfile):
See jnsebgosselin/gwhat#370.
See jnsebgosselin/gwhat#377.
"""
# Add the dataset to the test project.
# Add the dataset to the project.
project.add_wldset('dataset_test', WLDataFrame(testfile))
wldset = project.get_wldset('dataset_test')

Expand Down Expand Up @@ -472,6 +493,30 @@ def test_mrc_backward_compatibility(project, testfile):
assert mrc_data['recess'].tolist() == []


def test_project_backward_compatibility(oldprojectfile):
"""
Test that old project files are opened as expected in newer versions
of GWHAT.
"""
# Make sure old recession periods are converted from indexes to xldates.
with h5py.File(oldprojectfile, mode='r') as hdf5file:
wldset = hdf5file['wldsets/PO01 - Calixa-Lavallée']
assert list(wldset['mrc/peak_indx']) == [6458, 8210, 13710, 15462,
20596, 22022, 3036, 3892,
5155, 5725]

project = ProjetReader(oldprojectfile)
wldset = project.get_wldset('PO01 - Calixa-Lavallée')
mrc_data = wldset.get_mrc()
assert mrc_data['peak_indx'] == [(41309.0, 41327.25),
(41384.541666666664, 41402.791666666664),
(41456.270833333336, 41471.125),
(41273.354166666664, 41282.270833333336),
(41295.427083333336, 41301.364583333336)]
assert mrc_data['std_err'] is None
assert mrc_data['r_squared'] is None
assert mrc_data['rmse'] is None


if __name__ == "__main__":
pytest.main(['-x', __file__, '-v', '-rw',
'-k', 'test_mrc_backward_compatibility'])
pytest.main(['-x', __file__, '-v', '-rw'])
Binary file added gwhat/projet/tests/test_project_gwhat_v0.5.0.gwt
Binary file not shown.
64 changes: 26 additions & 38 deletions gwhat/recession/recession_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@

# ---- Third party imports
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import linregress

# ---- Local imports
from gwhat.utils.math import calc_goodness_of_fit


def predict_recession(tdeltas: np.ndarray, B: float, A: float,
Expand Down Expand Up @@ -64,7 +67,7 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
Parameters
----------
t : np.ndarray
Time in days.
Time in days since epoch.
h : np.ndarray
Water levels in meters below the ground surface.
periods : list(tuple)
Expand All @@ -90,39 +93,30 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
The root mean square error (RMSE) of the water levels predicted
with the MRC.
"""
# Define the indices corresponding to the beginning and end of each
# recession segment.
iend = []
istart = []
index = np.arange(len(t)).astype(int)

# Extract relevant information and data for each segment identified as
# a recession period.
tdeltas = np.array([])
h_seg = np.array([])
t_seg = np.array([])
index_seg = np.array([], dtype=int)
B0_seg = []
for period in periods:
indx0 = np.argmin(np.abs(t - period[0]))
indx1 = np.argmin(np.abs(t - period[1]))
if np.abs(indx1 - indx0) < 2:
# Periods that are smaller than two time steps are ignored.
mask = (t >= min(period)) & (t <= max(period))
if len(mask) < 2:
# Segments that are smaller than two time steps are ignored.
continue
istart.append(min(indx0, indx1))
iend.append(max(indx0, indx1))

# Define the indexes corresponding to the recession segments.
seg_indexes = []
seg_tstart = []
for i, j in zip(istart, iend):
seg_tstart.extend([t[i]] * (j - i + 1))
seg_indexes.extend(range(i, j + 1))

# Sort periods indexes and time start so that both series are
# monotically increasing.
argsort_idx = np.argsort(seg_indexes)
seg_indexes = np.array(seg_indexes)[argsort_idx]
seg_tstart = np.array(seg_tstart)[argsort_idx]

t_seg = t[seg_indexes]
h_seg = h[seg_indexes]
tdeltas = (t_seg - seg_tstart)
h_seg = np.append(h_seg, h[mask])
t_seg = np.append(t_seg, t[mask])
index_seg = np.append(index_seg, index[mask])
tdeltas = np.append(tdeltas, t[mask] - t[mask][0])
B0_seg.append((h[mask][-1] - h[mask][0]) / (t[mask][-1] - t[mask][0]))

# Define initial guess for the parameters .
# Define initial guess for the parameters.
A0 = 0
B0 = np.mean((h[istart] - h[iend]) / (t[istart] - t[iend]))
B0 = np.mean(B0_seg)

if mrctype == 1: # exponential (dh/dt = -a*h + b)
coeffs, coeffs_cov = curve_fit(
Expand All @@ -141,16 +135,10 @@ def calculate_mrc(t, h, periods: list(tuple), mrctype: int = 1):
coeffs = namedtuple('Coeffs', ['B', 'A'])('B', 'A')(coeffs[0], 0)

hp = np.zeros(len(t)) * np.nan
hp[seg_indexes] = predict_recession(
hp[index_seg] = predict_recession(
tdeltas, A=coeffs[1], B=coeffs[0], h=h_seg)

# Calculate metrics of the fit.
# https://blog.minitab.com/en/adventures-in-statistics-2/regression-analysis-how-to-interpret-s-the-standard-error-of-the-regression
# https://statisticsbyjim.com/glossary/standard-error-regression/
slope, intercept, r_value, p_value, std_err = linregress(
h[seg_indexes], hp[seg_indexes])
r_squared = r_value**2
rmse = (np.nanmean((h - hp)**2))**0.5
std_err, r_squared, rmse = calc_goodness_of_fit(h, hp)

return coeffs, hp, std_err, r_squared, rmse

Expand Down
Loading