From 0846b040d403d2bb31e2328749324aee12fc2f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 10:32:15 -0400 Subject: [PATCH 01/28] Improve print statement to show the file name. --- gwhat/projet/reader_waterlvl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 602c20982..7fe95f08f 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -29,7 +29,8 @@ def open_water_level_datafile(filename): if ext not in FILE_EXTS: raise ValueError("Supported file format are: ", FILE_EXTS) else: - print('Loading waterlvl time-series from %s file...' % ext[1:]) + print('Loading waterlvl time-series from "%s"...' % + osp.basename(filename)) if ext == '.csv': with open(filename, 'r', encoding='utf8') as f: From 0bbbba2aea1834688a103ef1f72a34857add06c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 10:33:18 -0400 Subject: [PATCH 02/28] Refactor read_water_level_datafile function --- gwhat/projet/reader_waterlvl.py | 176 +++++++++++++++----------------- 1 file changed, 81 insertions(+), 95 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 7fe95f08f..24f4348b0 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -8,6 +8,7 @@ # ---- Standard library imports +import re import os import os.path as osp import numpy as np @@ -15,6 +16,8 @@ import csv from collections.abc import Mapping +# ---- Third party imports +import pandas as pd # ---- Local library imports from gwhat.common.utils import save_content_to_csv @@ -45,110 +48,93 @@ def open_water_level_datafile(filename): def read_water_level_datafile(filename): - """Load a water level dataset from a csv or Excel file.""" - data = open_water_level_datafile(filename) - if data is None: - return None - - df = {'filename': filename, - 'Well': '', - 'Well ID': '', - 'Province': '', - 'Latitude': 0, - 'Longitude': 0, - 'Elevation': 0, - 'Municipality': '', - 'Time': np.array([]), - 'WL': np.array([]), - 'BP': np.array([]), - 'ET': np.array([])} - - # ---- Read the Header - for row, line in enumerate(data): - if not len(line): - continue + """ + Load a water level dataset from a csv or an Excel file and format the + data in a Pandas dataframe with the dates used as index. + """ + reader = open_water_level_datafile(filename) - try: - label = line[0].lower().replace(":", "").replace("=", "").strip() - except AttributeError: + # Fetch the header. + header = {'name': '', 'id': '', 'province': '', 'municipality': '', + 'latitude': 0, 'longitude': 0, 'elevation': 0} + for i, row in enumerate(reader): + if not len(row): continue - - if label == 'well name': - df['Well'] = str(line[1]) - elif label == 'well id': - df['Well ID'] = str(line[1]) - elif label == 'province': - df['Province'] = str(line[1]) - elif label == 'latitude': - try: - df['Latitude'] = float(line[1]) - except ValueError: - print('Wrong format for entry "Latitude".') - df['Latitude'] = 0 - elif label == 'longitude': - try: - df['Longitude'] = float(line[1]) - except ValueError: - print('Wrong format for entry "Longitude".') - df['Longitude'] = 0 - elif label in ['altitude', 'elevation']: - try: - df['Elevation'] = float(line[1]) - except ValueError: - print('Wrong format for entry "Altitude".') - df['Elevation'] = 0 - elif label == 'municipality': - df['Municipality'] = str(line[1]) - elif label == 'date': - column_labels = line - break + label = str(row[0]) + for key in header.keys(): + regex = r'(? Date: Mon, 8 Apr 2019 10:55:39 -0400 Subject: [PATCH 03/28] Correctly convert data to numeric with pandas --- gwhat/projet/reader_waterlvl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 24f4348b0..53cf292df 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -104,7 +104,7 @@ def read_water_level_datafile(filename): # Format the data to floats. for colname in ['BP(m)', 'WL(mbgs)', 'ET(nm/s2)']: if colname in dataf.columns: - dataf[colname] = dataf[colname].astype('float64', errors='ignore') + dataf[colname] = pd.to_numeric(dataf[colname], errors='coerce') # Format the dates to datetimes. try: From c2edc0dd8c106f5b288eb3670f475c0300e9af40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 16:49:46 -0400 Subject: [PATCH 04/28] Define some global var --- gwhat/projet/reader_waterlvl.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 53cf292df..0ab08abd0 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -8,12 +8,16 @@ # ---- Standard library imports +from copy import deepcopy import re import os import os.path as osp import numpy as np import xlrd +from xlrd.xldate import xldate_from_datetime_tuple +from xlrd import xldate_as_tuple import csv +from collections import OrderedDict from collections.abc import Mapping # ---- Third party imports @@ -26,6 +30,30 @@ # ---- Read and Load Water Level Datafiles +INDEX = 'Time' + +COL_REGEX = OrderedDict([ + (INDEX, r'(date|time|datetime)'), + ('BP', r'(bp|baro|patm)'), + ('WL', r'(wl|waterlevels)'), + ('ET', r'(et|earthtides)') + ]) +COLUMNS = list(COL_REGEX.keys()) + +HEADER = {'Well Name': '', 'Well ID': '', + 'Province': '', 'Municipality': '', + 'Latitude': 0, 'Longitude': 0, 'Elevation': 0} +HEADER_REGEX = { + 'Well Name': r'(? Date: Mon, 8 Apr 2019 16:50:09 -0400 Subject: [PATCH 05/28] Check if filename is valid --- gwhat/projet/reader_waterlvl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 0ab08abd0..1357eb008 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -80,6 +80,8 @@ def read_water_level_datafile(filename): Load a water level dataset from a csv or an Excel file and format the data in a Pandas dataframe with the dates used as index. """ + if filename is None or not osp.exists(filename): + return None reader = open_water_level_datafile(filename) # Fetch the header. From 93f74db13e7581be514e99436576ad405806f6a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 16:50:34 -0400 Subject: [PATCH 06/28] Improve header parsing --- gwhat/projet/reader_waterlvl.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 1357eb008..f7e1cf706 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -84,17 +84,14 @@ def read_water_level_datafile(filename): return None reader = open_water_level_datafile(filename) - # Fetch the header. - header = {'name': '', 'id': '', 'province': '', 'municipality': '', - 'latitude': 0, 'longitude': 0, 'elevation': 0} + # Fetch the metadata from the header. + header = deepcopy(HEADER) for i, row in enumerate(reader): if not len(row): continue - label = str(row[0]) - for key in header.keys(): - regex = r'(? Date: Mon, 8 Apr 2019 16:50:58 -0400 Subject: [PATCH 07/28] Improve data parsing --- gwhat/projet/reader_waterlvl.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index f7e1cf706..89ea04fa8 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -109,49 +109,47 @@ def read_water_level_datafile(filename): # Cast the data into a Pandas dataframe. dataf = pd.DataFrame(reader[i+1:], columns=row) - colnames = {'Date': 'Date', - 'BP': 'BP(m)', - 'WL': 'WL(mbgs)', - 'ET': 'ET(nm/s2)'} for column in dataf.columns: - for key, name in colnames.items(): - if key in column: + for name, regex in COL_REGEX.items(): + str_ = column.replace(" ", "").replace("_", "") + if re.search(regex, str_, re.IGNORECASE): if name != column: dataf.rename(columns={column: name}, inplace=True) break else: del dataf[column] - # Check that Date and WL(mbgs) date were found in the datafile. - for colname in ['Date', 'WL(mbgs)']: + # Check that Time and WL data were found in the datafile. + for colname in [INDEX, 'WL']: if colname not in dataf.columns: print('ERROR: no "%s" data found in the datafile.' % colname) return None # Format the data to floats. - for colname in ['BP(m)', 'WL(mbgs)', 'ET(nm/s2)']: + for colname in COLUMNS[1:]: if colname in dataf.columns: dataf[colname] = pd.to_numeric(dataf[colname], errors='coerce') # Format the dates to datetimes. try: - # Assume first that the dates are stored in the Excel numeric format. - datetimes = dataf['Date'].astype('float64') + # We assume first that the dates are stored in the + # Excel numeric format. + datetimes = dataf['Time'].astype('float64', errors='raise') datetimes = pd.to_datetime(datetimes.apply( lambda date: xlrd.xldate.xldate_as_datetime(date, 0))) except ValueError: try: # Try converting the strings to datetime objects. datetimes = pd.to_datetime( - dataf['Date'], format="%Y-%m-%d %H:%M:%S") + dataf['Time'], format="%Y-%m-%d %H:%M:%S") except ValueError: print('ERROR: the dates are not formatted correctly.') return None finally: - dataf['Date'] = datetimes - dataf.set_index(['Date'], drop=True, inplace=True) + dataf['Time'] = datetimes + dataf.set_index(['Time'], drop=True, inplace=True) - # Check for duplicate dates. + # Check and remove duplicate data. if any(dataf.index.duplicated(keep='first')): print("WARNING: Duplicated values were found in the datafile. " "Only the first entries for each date were kept.") From 0da99cb728d4827d93f8f73cb4c8c385f8fcaa54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 16:51:16 -0400 Subject: [PATCH 08/28] Add EmptyWLDataset class --- gwhat/projet/reader_waterlvl.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 89ea04fa8..5bcca364f 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -130,7 +130,7 @@ def read_water_level_datafile(filename): if colname in dataf.columns: dataf[colname] = pd.to_numeric(dataf[colname], errors='coerce') - # Format the dates to datetimes. + # Format the dates to datetimes and set it as index. try: # We assume first that the dates are stored in the # Excel numeric format. @@ -277,6 +277,12 @@ def generate_HTML_table(name, lat, lon, alt, mun): return table +class EmptyWLDataset(pd.DataFrame): + def __init__(self): + super().__init__(np.empty((0, len(COLUMNS))), columns=COLUMNS) + self.set_index([INDEX], drop=True, inplace=True) + + class WLDataFrameBase(Mapping): """ A water level data frame base class. From 35eb19b3a8c7eb4cc0377e62e66674c1d8d09d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Mon, 8 Apr 2019 16:52:03 -0400 Subject: [PATCH 09/28] Codestyle --- gwhat/projet/reader_waterlvl.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 5bcca364f..eccdcb06a 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -251,9 +251,6 @@ def load_waterlvl_measures(filename, well): return time_mes, wl_mes -# ========================================================================= - - def generate_HTML_table(name, lat, lon, alt, mun): FIELDS = [['Well Name', name], From cd0b4ab252bf6b0b36b767e330152476dafa1486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:17:10 -0400 Subject: [PATCH 10/28] Rename attrs Well Name to Well --- gwhat/projet/reader_waterlvl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index eccdcb06a..0ea47d3d1 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -40,11 +40,11 @@ ]) COLUMNS = list(COL_REGEX.keys()) -HEADER = {'Well Name': '', 'Well ID': '', +HEADER = {'Well': '', 'Well ID': '', 'Province': '', 'Municipality': '', 'Latitude': 0, 'Longitude': 0, 'Elevation': 0} HEADER_REGEX = { - 'Well Name': r'(? Date: Tue, 9 Apr 2019 12:17:49 -0400 Subject: [PATCH 11/28] Move EmptyWLDataset up --- gwhat/projet/reader_waterlvl.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 0ea47d3d1..d23ad06ec 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -54,6 +54,11 @@ } +class EmptyWLDataset(pd.DataFrame): + def __init__(self): + super().__init__(np.empty((0, len(COLUMNS))), columns=COLUMNS) + self.set_index([INDEX], drop=True, inplace=True) + def open_water_level_datafile(filename): """Open a water level data file and return the data.""" root, ext = os.path.splitext(filename) @@ -274,12 +279,6 @@ def generate_HTML_table(name, lat, lon, alt, mun): return table -class EmptyWLDataset(pd.DataFrame): - def __init__(self): - super().__init__(np.empty((0, len(COLUMNS))), columns=COLUMNS) - self.set_index([INDEX], drop=True, inplace=True) - - class WLDataFrameBase(Mapping): """ A water level data frame base class. From f59d5683fa2024ee10f4a1a3d8b04ef826de62a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:23:53 -0400 Subject: [PATCH 12/28] Move some logics to a new WLDataset class --- gwhat/projet/reader_waterlvl.py | 102 ++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index d23ad06ec..97e26be9b 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -59,6 +59,60 @@ def __init__(self): super().__init__(np.empty((0, len(COLUMNS))), columns=COLUMNS) self.set_index([INDEX], drop=True, inplace=True) + +class WLDataset(EmptyWLDataset): + def __init__(self, data, columns): + super().__init__() + df = pd.DataFrame(data, columns=columns) + for column in columns: + for colname, regex in COL_REGEX.items(): + str_ = column.replace(" ", "").replace("_", "") + if re.search(regex, str_, re.IGNORECASE): + self[colname] = df[column].copy() + break + del df + self.format_numeric_data() + self.format_datetime_data() + + def format_numeric_data(self): + """Format the data to floats type.""" + for colname in COLUMNS: + if colname == INDEX: + pass + elif colname in self.columns: + self[colname] = pd.to_numeric(self[colname], errors='coerce') + else: + print('WARNING: no "%s" data found in the datafile.' % colname) + + def format_datetime_data(self): + """Format the dates to datetimes and set it as index.""" + if INDEX in self.columns: + try: + # We assume first that the dates are stored in the + # Excel numeric format. + datetimes = self['Time'].astype('float64', errors='raise') + datetimes = pd.to_datetime(datetimes.apply( + lambda date: xlrd.xldate.xldate_as_datetime(date, 0))) + except ValueError: + try: + # Try converting the strings to datetime objects. + datetimes = pd.to_datetime( + self['Time'], format="%Y-%m-%d %H:%M:%S") + except ValueError: + print('WARNING: the dates are not formatted correctly.') + finally: + self['Time'] = datetimes + self.set_index(['Time'], drop=True, inplace=True) + else: + print('WARNING: no "Time" data found in the datafile.') + + # Check and remove duplicate data. + if any(self.index.duplicated(keep='first')): + print("WARNING: Duplicated values were found in the datafile. " + "Only the first entries for each date were kept.") + self.drop_duplicates(keep='first', inplace=True) + + def open_water_level_datafile(filename): """Open a water level data file and return the data.""" root, ext = os.path.splitext(filename) @@ -113,56 +167,12 @@ def read_water_level_datafile(filename): return None # Cast the data into a Pandas dataframe. - dataf = pd.DataFrame(reader[i+1:], columns=row) - for column in dataf.columns: - for name, regex in COL_REGEX.items(): - str_ = column.replace(" ", "").replace("_", "") - if re.search(regex, str_, re.IGNORECASE): - if name != column: - dataf.rename(columns={column: name}, inplace=True) - break - else: - del dataf[column] - - # Check that Time and WL data were found in the datafile. - for colname in [INDEX, 'WL']: - if colname not in dataf.columns: - print('ERROR: no "%s" data found in the datafile.' % colname) - return None - - # Format the data to floats. - for colname in COLUMNS[1:]: - if colname in dataf.columns: - dataf[colname] = pd.to_numeric(dataf[colname], errors='coerce') - - # Format the dates to datetimes and set it as index. - try: - # We assume first that the dates are stored in the - # Excel numeric format. - datetimes = dataf['Time'].astype('float64', errors='raise') - datetimes = pd.to_datetime(datetimes.apply( - lambda date: xlrd.xldate.xldate_as_datetime(date, 0))) - except ValueError: - try: - # Try converting the strings to datetime objects. - datetimes = pd.to_datetime( - dataf['Time'], format="%Y-%m-%d %H:%M:%S") - except ValueError: - print('ERROR: the dates are not formatted correctly.') - return None - finally: - dataf['Time'] = datetimes - dataf.set_index(['Time'], drop=True, inplace=True) - - # Check and remove duplicate data. - if any(dataf.index.duplicated(keep='first')): - print("WARNING: Duplicated values were found in the datafile. " - "Only the first entries for each date were kept.") - dataf = dataf[~dataf.index.duplicated(keep='first')] + dataf = WLDataset(reader[i+1:], columns=row) # Add the metadata to the dataframe. for key in header.keys(): setattr(dataf, key, header[key]) + dataf.filename = filename return dataf From bd317cbe62ffa6687b01d3b2237b14a733dc7c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:25:11 -0400 Subject: [PATCH 13/28] implement the new dataset in WLDataFrameBase --- gwhat/projet/reader_waterlvl.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 97e26be9b..f11234948 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -298,8 +298,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.dset = None self._undo_stack = [] - self._waterlevels = np.array([]) - self._datetimes = np.array([]) + self._dataf = EmptyWLDataset() def __load_dataset__(self): """Loads the dataset and save it in a store.""" @@ -314,10 +313,29 @@ def __setitem__(self, key, value): def __iter__(self): return NotImplementedError - # ---- Water levels + # ---- Attributes @property - def datetimes(self): - return self._datetimes + def data(self): + return self._dataf + + @property + def xldates(self): + """ + Return a numpy array containing the Excel numerical dates + corresponding to the dates of the dataset. + """ + return np.array( + [xldate_from_datetime_tuple(date.timetuple()[:6], 0) for + date in self._dataf.index] + ) + + @property + def dates(self): + return self.data.index.values + + @property + def strftime(self): + return self.data.index.strftime("%Y-%m-%dT%H:%M:%S").values.tolist() @property def waterlevels(self): From 9d3bbeee69e78503bb9445b556ff8eaefa346c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:25:42 -0400 Subject: [PATCH 14/28] Access WL data fom panda dataframe directly --- gwhat/projet/reader_waterlvl.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index f11234948..1f27fae1a 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -339,8 +339,9 @@ def strftime(self): @property def waterlevels(self): - return self._waterlevels + return self.data['WL'].values + # ---- Versionning @property def has_uncommited_changes(self): """" @@ -355,8 +356,8 @@ def commit(self): def undo(self): """Undo the last changes made to the water level data.""" if self.has_uncommited_changes: - change = self._undo_stack.pop(-1) - self._waterlevels[change[0]] = change[1] + changes = self._undo_stack.pop(-1) + self._dataf['WL'][changes.index] = changes def clear_all_changes(self): """ @@ -370,7 +371,7 @@ def delete_waterlevels_at(self, indexes): """Delete the water level data at the specified indexes.""" if len(indexes): self._add_to_undo_stack(indexes) - self._waterlevels[indexes] = np.nan + self._dataf['WL'].iloc[indexes] = np.nan def _add_to_undo_stack(self, indexes): """ @@ -379,7 +380,7 @@ def _add_to_undo_stack(self, indexes): changes made to the water level data before commiting them. """ if len(indexes): - self._undo_stack.append((indexes, self.waterlevels[indexes])) + self._undo_stack.append(self._dataf['WL'].iloc[indexes].copy()) class WLDataFrame(WLDataFrameBase): From b5a72b8fc25bb04f8a2f4e17eb84c9fb24cc1f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:26:03 -0400 Subject: [PATCH 15/28] Refactor __getitem__ of WLDataFrame --- gwhat/projet/reader_waterlvl.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index 1f27fae1a..db74b690a 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -384,7 +384,10 @@ def _add_to_undo_stack(self, indexes): class WLDataFrame(WLDataFrameBase): - """A water level dataset container that loads its data from a file.""" + """ + A water level dataset container that loads its data from a csv + or an Excel file. + """ def __init__(self, filename, *args, **kwargs): super().__init__(*args, **kwargs) @@ -392,6 +395,15 @@ def __init__(self, filename, *args, **kwargs): def __getitem__(self, key): """Returns the value saved in the store at key.""" + if key == INDEX: + return self.strftime + elif key in COLUMNS: + return self.data[key].values + elif key in list(HEADER.keys()): + return getattr(self._dataf, key, HEADER[key]) + elif key == 'filename': + return self._dataf.filename + return self.dset.__getitem__(key) def __load_dataset__(self, filename): From e5657d048b1b9a751bbdd92ab1a5ee26b04e8c67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:26:17 -0400 Subject: [PATCH 16/28] WLDataFrame: Refactor __load_dataset__ --- gwhat/projet/reader_waterlvl.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index db74b690a..ce3051c8d 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -408,9 +408,7 @@ def __getitem__(self, key): def __load_dataset__(self, filename): """Loads the dataset from a file and saves it in the store.""" - self.dset = read_water_level_datafile(filename) - self._waterlevels = self.dset['WL'] - self._datetimes = self.dset['Time'] + self._dataf = read_water_level_datafile(filename) if __name__ == "__main__": From b2bf494ec19801bc01a31b90df6e66a17b8deb8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:27:03 -0400 Subject: [PATCH 17/28] WLCalc: refactor time property --- gwhat/HydroCalc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gwhat/HydroCalc2.py b/gwhat/HydroCalc2.py index ba3bb95cf..9fda09237 100644 --- a/gwhat/HydroCalc2.py +++ b/gwhat/HydroCalc2.py @@ -519,7 +519,7 @@ def water_lvl(self): @property def time(self): - return np.array([]) if self.wldset is None else self.wldset.datetimes + return np.array([]) if self.wldset is None else self.wldset.xldates @property def wldset(self): @@ -1228,8 +1228,8 @@ def _draw_obs_wl(self, draw=True): self.clear_selected_wl(draw=False) if self.wldset is not None: self._obs_wl_plt.set_data( - self.wldset.datetimes + (self.dt4xls2mpl * self.dformat), - self.wldset.waterlevels) + self.time + (self.dt4xls2mpl * self.dformat), + self.water_lvl) self._obs_wl_plt.set_visible(self.wldset is not None) if draw: self.draw() From d4f5bf1405d756ccd8bf191574233f01e160b59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:28:08 -0400 Subject: [PATCH 18/28] Codestyle --- gwhat/projet/reader_projet.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gwhat/projet/reader_projet.py b/gwhat/projet/reader_projet.py index 42773cd39..07d565421 100644 --- a/gwhat/projet/reader_projet.py +++ b/gwhat/projet/reader_projet.py @@ -124,7 +124,6 @@ def backup_project_file(self): print('done') return True - # ---- Project Properties @property def name(self): @@ -183,7 +182,6 @@ def lon(self, x): self.db.attrs['longitude'] = x # ---- Water Levels Dataset Handlers - @property def wldsets(self): """ @@ -280,7 +278,6 @@ def del_wldset(self, name): self.db.flush() # ---- Weather Dataset Handlers - @property def wxdsets(self): """ From eebaf03b462cab88d03fecae14e942b1e3cc8e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:29:11 -0400 Subject: [PATCH 19/28] Refactor ProjetReader and WLDataFrameHDF5 --- gwhat/projet/reader_projet.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/gwhat/projet/reader_projet.py b/gwhat/projet/reader_projet.py index 07d565421..8677f0d29 100644 --- a/gwhat/projet/reader_projet.py +++ b/gwhat/projet/reader_projet.py @@ -10,7 +10,6 @@ # ---- Standard library imports import os -import csv import os.path as osp from shutil import copyfile @@ -20,7 +19,7 @@ # ---- Local library imports from gwhat.meteo.weather_reader import WXDataFrameBase -from gwhat.projet.reader_waterlvl import WLDataFrameBase +from gwhat.projet.reader_waterlvl import WLDataFrameBase, WLDataset from gwhat.gwrecharge.glue import GLUEDataFrameBase from gwhat.common.utils import save_content_to_file from gwhat.utils.math import nan_as_text_tolist, calcul_rmse @@ -222,10 +221,16 @@ def add_wldset(self, name, df): grp = self.db['wldsets'].create_group(name) # Water level data - grp.create_dataset('Time', data=df['Time']) - grp.create_dataset('WL', data=df['WL']) - grp.create_dataset('BP', data=df['BP']) - grp.create_dataset('ET', data=df['ET']) + grp.create_dataset( + 'Time', + data=np.array(df['Time'], dtype=h5py.special_dtype(vlen=str))) + # See http://docs.h5py.org/en/latest/strings.html as to why this + # is necessary to do this in order to save a list of strings in + # a dataset with h5py. + + grp.create_dataset('WL', data=np.copy(df['WL'])) + grp.create_dataset('BP', data=np.copy(df['BP'])) + grp.create_dataset('ET', data=np.copy(df['ET'])) # Piezometric well info grp.attrs['filename'] = df['filename'] @@ -378,8 +383,16 @@ def __init__(self, hdf5group, *args, **kwargs): def __load_dataset__(self, hdf5group): self.dset = hdf5group self._undo_stack = [] - self._waterlevels = self.dset['WL'][...] - self._datetimes = self.dset['Time'][...] + + columns = [] + data = [] + for colname in ['Time', 'WL', 'BP', 'ET']: + if len(self.dset[colname][...]): + data.append(self.dset[colname][...]) + columns.append(colname) + data = np.vstack(tuple(data)).transpose() + columns = tuple(columns) + self._dataf = WLDataset(data, columns) # Make older datasets compatible with newer format. if 'Well ID' not in list(self.dset.attrs.keys()): From a1a47c3ea2cee8c191232eac89af10c23371e904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 12:32:44 -0400 Subject: [PATCH 20/28] Add pandas to the requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9cde16459..02eeba3dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ matplotlib>=2.0.2 requests h5py>=2.8 qtawesome +pandas From fb1b9afbb7d647b6699d5525e68f1f05f98d619b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 13:05:03 -0400 Subject: [PATCH 21/28] Fix BRFManager to use the new xlsdate property --- gwhat/brf_mod/kgs_gui.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/gwhat/brf_mod/kgs_gui.py b/gwhat/brf_mod/kgs_gui.py index ee6ff9b37..eb677602f 100644 --- a/gwhat/brf_mod/kgs_gui.py +++ b/gwhat/brf_mod/kgs_gui.py @@ -306,15 +306,13 @@ def set_wldset(self, wldset): self.btn_seldata.setAutoRaise(True) self.setEnabled(wldset is not None) if wldset is not None: - self.set_daterange((self.wldset['Time'][0], - self.wldset['Time'][-1])) + xldates = self.wldset.xldates + self.set_daterange((xldates[0], xldates[-1])) # Set the period over which the BRF would be evaluated. saved_brfperiod = wldset.get_brfperiod() - self.set_brfperiod( - (saved_brfperiod[0] or np.floor(self.wldset['Time'][0]), - saved_brfperiod[1] or np.floor(self.wldset['Time'][-1]) - )) + self.set_brfperiod((saved_brfperiod[0] or np.floor(xldates[0]), + saved_brfperiod[1] or np.floor(xldates[-1]))) def set_daterange(self, daterange): """ @@ -335,11 +333,11 @@ def calc_brf(self): brfperiod = self.get_brfperiod() t1 = min(brfperiod) - i1 = np.where(self.wldset['Time'] >= t1)[0][0] + i1 = np.where(self.wldset.xldates >= t1)[0][0] t2 = max(brfperiod) - i2 = np.where(self.wldset['Time'] <= t2)[0][-1] + i2 = np.where(self.wldset.xldates <= t2)[0][-1] - time = np.copy(self.wldset['Time'][i1:i2+1]) + time = np.copy(self.wldset.xldates[i1:i2+1]) wl = np.copy(self.wldset['WL'][i1:i2+1]) bp = np.copy(self.wldset['BP'][i1:i2+1]) if len(bp) == 0: From 9bfcc29815aecd09705180ca66003f1dbd51d8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 13:17:50 -0400 Subject: [PATCH 22/28] Fix manager_data.py --- gwhat/projet/manager_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gwhat/projet/manager_data.py b/gwhat/projet/manager_data.py index 7630536c7..a9e4e1496 100644 --- a/gwhat/projet/manager_data.py +++ b/gwhat/projet/manager_data.py @@ -24,10 +24,10 @@ from gwhat.utils import icons import gwhat.common.widgets as myqt from gwhat.hydrograph4 import LatLong2Dist -import gwhat.projet.reader_waterlvl as wlrd +from gwhat.projet.reader_waterlvl import WLDataFrame from gwhat.projet.reader_projet import (INVALID_CHARS, is_dsetname_valid, make_dsetname_valid) -import gwhat.meteo.weather_reader as wxrd +from gwhat.meteo.weather_reader import WXDataFrame from gwhat.widgets.buttons import ToolBarWidget from gwhat.widgets.spinboxes import StrSpinBox @@ -698,9 +698,9 @@ def load_dataset(self, filename): try: if self._datatype == 'water level': - self._dataset = wlrd.read_water_level_datafile(filename) + self._dataset = WLDataFrame(filename) elif self._datatype == 'daily weather': - self._dataset = wxrd.WXDataFrame(filename) + self._dataset = WXDataFrame(filename) except Exception: self._dataset = None From 5d0262300d845c70408e6d82f9471cfa85caaa6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 13:41:21 -0400 Subject: [PATCH 23/28] Fix HydroPrint --- gwhat/HydroPrint2.py | 2 +- gwhat/hydrograph4.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gwhat/HydroPrint2.py b/gwhat/HydroPrint2.py index af9507b98..254b2c087 100644 --- a/gwhat/HydroPrint2.py +++ b/gwhat/HydroPrint2.py @@ -480,7 +480,7 @@ def best_fit_waterlvl(self): def best_fit_time(self): wldset = self.dmngr.get_current_wldset() if wldset is not None: - date0, date1 = self.hydrograph.best_fit_time(wldset['Time']) + date0, date1 = self.hydrograph.best_fit_time(wldset.xldates) self.date_start_widget.setDate(QDate(date0[0], date0[1], date0[2])) self.date_end_widget.setDate(QDate(date1[0], date1[1], date1[2])) diff --git a/gwhat/hydrograph4.py b/gwhat/hydrograph4.py index 74754a847..6aec80c08 100644 --- a/gwhat/hydrograph4.py +++ b/gwhat/hydrograph4.py @@ -855,7 +855,7 @@ def draw_waterlvl(self): # ---- Logger Measures - time = self.wldset['Time'] + time = self.wldset.xldates if self.WLdatum == 1: # masl water_lvl = self.wldset['Elevation']-self.wldset['WL'] else: # mbgs -> yaxis is inverted From 5664aa316fed43e975a32150b313dda6fad2db23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 13:44:49 -0400 Subject: [PATCH 24/28] Use xldate attr instead of 'Time' --- gwhat/gwrecharge/gwrecharge_calc2.py | 4 ++-- gwhat/hydrograph4.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gwhat/gwrecharge/gwrecharge_calc2.py b/gwhat/gwrecharge/gwrecharge_calc2.py index 5e1606113..97c64d646 100644 --- a/gwhat/gwrecharge/gwrecharge_calc2.py +++ b/gwhat/gwrecharge/gwrecharge_calc2.py @@ -98,8 +98,8 @@ def load_data(self, wxdset, wldset): self.wldset = wldset self.A, self.B = wldset['mrc/params'] - self.twlvl, self.wlobs = self.make_data_daily(wldset['Time'], - wldset['WL']) + self.twlvl, self.wlobs = self.make_data_daily( + wldset.xldate, wldset['WL']) if not self.A and not self.B: error = ("Groundwater recharge cannot be computed because a" diff --git a/gwhat/hydrograph4.py b/gwhat/hydrograph4.py index 6aec80c08..a7330bb44 100644 --- a/gwhat/hydrograph4.py +++ b/gwhat/hydrograph4.py @@ -1333,7 +1333,7 @@ def LatLong2Dist(LAT1, LON1, LAT2, LON2): # 0: daily | 1: weekly | 2: monthly | 3: yearly hydrograph.RAINscale = 100 - hydrograph.best_fit_time(wldset['Time']) + hydrograph.best_fit_time(wldset.xldate) hydrograph.best_fit_waterlvl() hydrograph.generate_hydrograph() # From 18b8cb1b2d1b627e747d4eec063d5f7ab35215f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 14:08:30 -0400 Subject: [PATCH 25/28] Fix test_reading_waterlvl --- gwhat/tests/test_read_waterlvl.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/gwhat/tests/test_read_waterlvl.py b/gwhat/tests/test_read_waterlvl.py index 6e9facb1e..3fb617893 100644 --- a/gwhat/tests/test_read_waterlvl.py +++ b/gwhat/tests/test_read_waterlvl.py @@ -6,28 +6,27 @@ # This file is part of GWHAT (Ground-Water Hydrograph Analysis Toolbox). # Licensed under the terms of the GNU General Public License. -# Standard library imports +# ---- Standard library imports import sys import os -import csv +import os.path as osp -# Third party imports +# ---- Third party imports import pytest import numpy as np import xlsxwriter -# Local imports -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) +# ---- Local library imports from gwhat.common.utils import (save_content_to_excel, save_content_to_csv, delete_file) from gwhat.projet.reader_waterlvl import ( - load_waterlvl_measures, init_waterlvl_measures, - read_water_level_datafile) + load_waterlvl_measures, init_waterlvl_measures, WLDataFrame) # Test reading water level datafiles # ---------------------------------- +DATADIR = osp.join(osp.dirname(osp.realpath(__file__))) DATA = [['Well name = ', "êi!@':i*"], ['well id : ', '1234ABC'], ['Province', 'Qc'], @@ -41,18 +40,14 @@ [41241.70833, 3.665777025, 10.33127437, 387.7404819], [41241.71875, 3.665277031, 10.33097437, 396.9950643] ] -save_content_to_csv("water_level_datafile.csv", DATA) -save_content_to_excel("water_level_datafile.xls", DATA) -save_content_to_excel("water_level_datafile.xlsx", DATA) +save_content_to_csv(osp.join(DATADIR, "water_level_datafile.csv"), DATA) +save_content_to_excel(osp.join(DATADIR, "water_level_datafile.xls"), DATA) +save_content_to_excel(osp.join(DATADIR, "water_level_datafile.xlsx"), DATA) -def test_reading_waterlvl(): - df1 = read_water_level_datafile("water_level_datafile.csv") - df2 = read_water_level_datafile("water_level_datafile.xls") - df3 = read_water_level_datafile("water_level_datafile.xlsx") - - assert list(df1.keys()) == list(df2.keys()) - assert list(df2.keys()) == list(df3.keys()) +@pytest.mark.parametrize("ext", ['.csv', '.xls', '.xlsx']) +def test_reading_waterlvl(ext): + df = WLDataFrame(osp.join(DATADIR, "water_level_datafile" + ext)) expected_results = { 'Well': "êi!@':i*", From bec51b2aadd2daca2092a1b4e1ad6d48dd0d157f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 14:10:15 -0400 Subject: [PATCH 26/28] Fi x test_reading_waterlvl (2) --- gwhat/tests/test_read_waterlvl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gwhat/tests/test_read_waterlvl.py b/gwhat/tests/test_read_waterlvl.py index 3fb617893..f48e6afb5 100644 --- a/gwhat/tests/test_read_waterlvl.py +++ b/gwhat/tests/test_read_waterlvl.py @@ -10,6 +10,7 @@ import sys import os import os.path as osp +import csv # ---- Third party imports import pytest @@ -65,7 +66,7 @@ def test_reading_waterlvl(ext): keys = ['Well', 'Well ID', 'Province', 'Latitude', 'Longitude', 'Elevation', 'Municipality'] for key in keys: - assert df1[key] == expected_results[key] + assert df[key] == expected_results[key] # Test water_level_measurements.* From 543dd55924828525e737a7c65a74383d34b93d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Tue, 9 Apr 2019 14:34:15 -0400 Subject: [PATCH 27/28] Clean test_reading_waterlvl --- gwhat/tests/test_read_waterlvl.py | 34 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/gwhat/tests/test_read_waterlvl.py b/gwhat/tests/test_read_waterlvl.py index f48e6afb5..240ed1721 100644 --- a/gwhat/tests/test_read_waterlvl.py +++ b/gwhat/tests/test_read_waterlvl.py @@ -23,11 +23,6 @@ from gwhat.projet.reader_waterlvl import ( load_waterlvl_measures, init_waterlvl_measures, WLDataFrame) - -# Test reading water level datafiles -# ---------------------------------- - -DATADIR = osp.join(osp.dirname(osp.realpath(__file__))) DATA = [['Well name = ', "êi!@':i*"], ['well id : ', '1234ABC'], ['Province', 'Qc'], @@ -41,14 +36,27 @@ [41241.70833, 3.665777025, 10.33127437, 387.7404819], [41241.71875, 3.665277031, 10.33097437, 396.9950643] ] -save_content_to_csv(osp.join(DATADIR, "water_level_datafile.csv"), DATA) -save_content_to_excel(osp.join(DATADIR, "water_level_datafile.xls"), DATA) -save_content_to_excel(osp.join(DATADIR, "water_level_datafile.xlsx"), DATA) +FILENAME = "water_level_datafile" + +# ---- Pytest Fixtures +@pytest.fixture +def datatmpdir(tmpdir): + """Create a set of water level datafile in various format.""" + save_content_to_csv( + osp.join(str(tmpdir), FILENAME + '.csv'), DATA) + save_content_to_excel( + osp.join(str(tmpdir), FILENAME + '.xls'), DATA) + save_content_to_excel( + osp.join(str(tmpdir), FILENAME + '.xlsx'), DATA) + return str(tmpdir) + + +# ---- Test reading water level datafiles @pytest.mark.parametrize("ext", ['.csv', '.xls', '.xlsx']) -def test_reading_waterlvl(ext): - df = WLDataFrame(osp.join(DATADIR, "water_level_datafile" + ext)) +def test_reading_waterlvl(datatmpdir, ext): + df = WLDataFrame(osp.join(datatmpdir, FILENAME + ext)) expected_results = { 'Well': "êi!@':i*", @@ -68,8 +76,12 @@ def test_reading_waterlvl(ext): for key in keys: assert df[key] == expected_results[key] + for key in ['WL', 'BP', 'ET']: + assert np.abs(np.min(df[key] - expected_results[key])) < 10e-6 + assert np.abs(np.min(df.xldates - expected_results['Time'])) < 10e-6 + -# Test water_level_measurements.* +# Test water_level_measurements. # ------------------------------- delete_file("waterlvl_manual_measurements.csv") From 8b2b9a90b05c9c3be9a67229cb0b006c5cb77809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20Gosselin?= Date: Wed, 10 Apr 2019 09:40:48 -0400 Subject: [PATCH 28/28] Improve how datetimes are converted to xldates --- gwhat/projet/reader_waterlvl.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gwhat/projet/reader_waterlvl.py b/gwhat/projet/reader_waterlvl.py index ce3051c8d..775a62372 100644 --- a/gwhat/projet/reader_waterlvl.py +++ b/gwhat/projet/reader_waterlvl.py @@ -324,10 +324,14 @@ def xldates(self): Return a numpy array containing the Excel numerical dates corresponding to the dates of the dataset. """ - return np.array( - [xldate_from_datetime_tuple(date.timetuple()[:6], 0) for - date in self._dataf.index] - ) + if 'XLDATES' not in self._dataf.columns: + print('Converting datetimes to xldates...', end=' ') + timedeltas = ( + self._dataf.index - xlrd.xldate.xldate_as_datetime(4000, 0)) + self._dataf['XLDATES'] = ( + timedeltas.total_seconds()/(3600 * 24) + 4000) + print('done') + return self._dataf['XLDATES'].values @property def dates(self):