Skip to content

Commit

Permalink
Merge branch 'NOAA-GFDL:main' into container
Browse files Browse the repository at this point in the history
  • Loading branch information
aradhakrishnanGFDL authored Aug 15, 2024
2 parents da319e4 + bee4e51 commit cb7068b
Show file tree
Hide file tree
Showing 27 changed files with 448 additions and 844 deletions.
17 changes: 9 additions & 8 deletions .github/workflows/mdtf_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ jobs:
- name: Generate Model Data
run: |
cd ../
echo "${PWD}"
micromamba activate _MDTF_synthetic_data
pip install mdtf-test-data
mkdir mdtf_test_data ; cd mdtf_test_data
Expand Down Expand Up @@ -133,9 +134,9 @@ jobs:
echo "${POD_OUTPUT}"
micromamba activate _MDTF_base
# trivial check that install script worked
./mdtf_framework.py --version
./mdtf_framework.py --help
# run the test PODs
./mdtf -v -f ${{matrix.json-file}}
./mdtf -f ${{matrix.json-file}}
# Debug POD log(s)
# cat ${POD_OUTPUT}/MDTF_NCAR.Synthetic_1975_1981/Wheeler_Kiladis/Wheeler_Kiladis.log
- name: Get observational data for set 2
Expand All @@ -161,7 +162,7 @@ jobs:
run: |
micromamba activate _MDTF_base
# run the test PODs
./mdtf -v -f ${{matrix.json-file-set2}}
./mdtf -f ${{matrix.json-file-set2}}
# Uncomment the following line for debugging
#cat ../wkdir/MDTF_GFDL.Synthetic_1_10/MJO_prop_amp/MJO_prop_amp.log
- name: Get observational data for set 3
Expand Down Expand Up @@ -200,8 +201,8 @@ jobs:
run: |
micromamba activate _MDTF_base
# run the test PODs
./mdtf -v -f ${{matrix.json-file-set3}}
- name: Run unit tests
run: |
micromamba activate _MDTF_base
python -m unittest discover
./mdtf -f ${{matrix.json-file-set3}}
#- name: Run unit tests
# run: |
# micromamba activate _MDTF_base
# python -m unittest discover
16 changes: 15 additions & 1 deletion data/fieldlist_CESM.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@
"ndim": 4
},
"Z500": {
"standard_name": "geopotential_height_500mb",
"standard_name": "geopotential_height",
"long_name": "geopotential height at 500 hPa",
"realm": "atmos",
"units": "m",
// note: 4d name is 'Z3' but Z500 = height at 500 mb, etc.
Expand Down Expand Up @@ -271,6 +272,19 @@
"units": "W m-2",
"ndim": 3
},
"tave": {
"standard_name": "vertically_integrated_temperature",
"realm": "atmos",
"units": "K",
"ndim": 3
},
"qsat_int": {
"standard_name": "specific_humidity",
"long_name": "Vertically integrated saturated specific humidity (surface to 200 mb)",
"realm": "atmos",
"units": "kg m-2",
"ndim": 3
},
"zos": {
"standard_name": "sea_surface_height_above_geoid",
"realm": "ocean",
Expand Down
10 changes: 10 additions & 0 deletions doc/sphinx/ref_catalogs.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.. role:: console(code)
:language: console
:class: highlight
.. _ref-catalogs:

ESM-intake catalogs
===================

The MDTF-diagnostics uses `intake-ESM <https://intake-esm.readthedocs.io/en/stable/>`__ catalogs and APIs to access
model datasets and verify POD data requirements. The MDTF-diagnostics package provides a basic
1 change: 1 addition & 0 deletions doc/sphinx/ref_toc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Framework reference
.. toctree::
:maxdepth: 2

ref_catalogs
ref_cli
ref_conventions
ref_data
Expand Down
3 changes: 3 additions & 0 deletions doc/sphinx/start_config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ a `discussion<https://github.com/NOAA-GFDL/CatalogBuilder/discussions>`__ if you
We encourage MDTF-diagnostics users to try running both catalog builders. Feel free to extend either tool to suit your needs, and consider submitting your additions to the appropriate
repository(ies).

See :doc:`the catalog documentation <ref-catalogs>` for more information on the implementation of
ESM-intake catalogs in the framework and the required column information for preprocessor functionality.

Adding your observational data files
++++++++++++++++++++++++++++++++++++

Expand Down
1 change: 1 addition & 0 deletions mdtf_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def backup_config(config):
ctx.config = util.NameSpace()
# parse the runtime config file
ctx.config = cli.parse_config_file(configfile)
ctx.config = cli.verify_conda_envs(ctx.config, configfile)
# Test ctx.config
# print(ctx.config.WORK_DIR)
ctx.config.CODE_ROOT = os.path.dirname(os.path.realpath(__file__))
Expand Down
22 changes: 22 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,25 @@ def verify_runtime_config_options(config: util.NameSpace):
new_output_dir = verify_dirpath(config.OUTPUT_DIR, config.CODE_ROOT)
update_config(config, 'OUTPUT_DIR', new_output_dir)
verify_case_atts(config.case_list)

def verify_conda_envs(config: util.NameSpace, filename: str):
m_exists = os.path.exists(config['micromamba_exe'])
c_exists = os.path.exists(config['conda_root'])
cenv_exists = os.path.exists(config['conda_env_root'])
if not m_exists and not c_exists:
raise util.exceptions.MDTFBaseException(
f"Could not find conda or micromamba executable; please check the runtime config file: "
f'{filename}'
)
if c_exists and not cenv_exists:
new_env_root = os.path.join(config['conda_root'], "envs")
if os.path.exists(new_env_root):
config.update({'conda_env_root':new_env_root})
else:
raise util.exceptions.MDTFBaseException(
f"Count not find conda enviroment directory; please check the runtime config file: "
f'{filename}'
)

return config

2 changes: 1 addition & 1 deletion src/conda/env_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- pandas=2.2.2
- pint=0.24.3
- dask=2024.7.1
- ecgtools=2023.7.13
- ecgtools=2024.7.31
- cfunits=3.3.6
- intake=0.7.0
- intake-esm=2024.2.6
Expand Down
2 changes: 1 addition & 1 deletion src/conda/env_base_micromamba.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies:
- pandas=2.2.0
- pint=0.24.3
- dask=2024.1.1
- ecgtools=2023.7.13
- ecgtools=2024.7.31
- cfunits=3.3.6
- intake=0.7.0
- intake-esm=2024.2.6
Expand Down
44 changes: 28 additions & 16 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ def check_group_daterange(self, group_df: pd.DataFrame, case_dr,
log: log file
"""
date_col = "date_range"
delimiters = ",.!?/&-:;@_'"
delimiters = ",.!?/&-:;@_'\\s+"
if not hasattr(group_df, 'start_time') or not hasattr(group_df, 'end_time'):
if hasattr(group_df, 'time_range'):
start_times = []
Expand All @@ -821,11 +821,13 @@ def check_group_daterange(self, group_df: pd.DataFrame, case_dr,
new_end_time_vals = []

for s in start_time_vals:
new_start_time_vals.append(int(''.join(w for w in re.split("[" + "\\".join(delimiters) + "]", s)
new_start_time_vals.append(int(''.join(w for w in re.split("[" + "\\".join(delimiters) + "]",
s)
if w)))
for e in end_time_vals:
new_end_time_vals.append(int(''.join(w for w in re.split("[" + "\\".join(delimiters) + "]", e)
if w)))
new_end_time_vals.append(int(''.join(w for w in re.split("[" + "\\".join(delimiters) + "]",
e)
if w)))

start_time_vals = new_start_time_vals
end_time_vals = new_end_time_vals
Expand Down Expand Up @@ -854,14 +856,16 @@ def check_group_daterange(self, group_df: pd.DataFrame, case_dr,
# throw out df entries not in date_range
for i in sorted_df.index:
cat_row = sorted_df.iloc[i]
stin = dl.Date(cat_row['start_time']) in case_dr
etin = dl.Date(cat_row['end_time']) in case_dr
if pd.isnull(cat_row['start_time']):
continue
else:
stin = dl.Date(cat_row['start_time']) in case_dr
etin = dl.Date(cat_row['end_time']) in case_dr
if (not stin and not etin) or (stin and not etin):
mask = sorted_df == cat_row['start_time']
sorted_df = sorted_df[~mask]
mask = np.isnat(sorted_df['start_time']) | np.isnat(sorted_df['end_time'])
sorted_df = sorted_df[~mask]

return sorted_df
except ValueError:
log.error("Non-contiguous or malformed date range in files:", group_df["path"].values)
Expand Down Expand Up @@ -898,6 +902,15 @@ def query_catalog(self,
if 'date_range' not in [c.lower() for c in cols]:
cols.append('date_range')

drop_atts = ['average_T2',
'time_bnds',
'lat_bnds',
'lon_bnds',
'average_DT',
'average_T1',
'height',
'date']

for case_name, case_d in case_dict.items():
# path_regex = re.compile(r'(?i)(?<!\\S){}(?!\\S+)'.format(case_name))
path_regex = re.compile(r'({})'.format(case_name))
Expand All @@ -913,7 +926,6 @@ def query_catalog(self,
# the variable is translated
case_d.query['frequency'] = freq
case_d.query['path'] = [path_regex]
case_d.query['variable_id'] = var.translation.name
case_d.query['realm'] = realm_regex
case_d.query['standard_name'] = var.translation.standard_name

Expand All @@ -934,10 +946,8 @@ def query_catalog(self,
for a in var.alternates:
if hasattr(a, 'translation'):
if a.translation is not None:
case_d.query.update({'variable_id': a.translation.name})
case_d.query.update({'standard_name': a.translation.standard_name})
else:
case_d.query.update({'variable_id': a.name})
case_d.query.update({'standard_name': a.standard_name})
if any(var.translation.scalar_coords):
found_z_entry = False
Expand Down Expand Up @@ -979,6 +989,9 @@ def query_catalog(self,
)

range_attr_string = 'intake_esm_attrs:time_range'
if not hasattr(cat_subset_df[list(cat_subset_df)[0]].attrs, range_attr_string):
range_attr_string = 'intake_esm_attrs:date_range'

date_range_dict = {f: cat_subset_df[f].attrs[range_attr_string]
for f in list(cat_subset_df)}
date_range_dict = dict(sorted(date_range_dict.items(), key=lambda item: item[1]))
Expand All @@ -990,10 +1003,13 @@ def query_catalog(self,
var_xr = cat_subset_df[k]
else:
var_xr = xr.concat([var_xr, cat_subset_df[k]], "time")
for att in drop_atts:
if var_xr.get(att, None) is not None:
var_xr = var_xr.drop_vars(att)
if case_name not in cat_dict:
cat_dict[case_name] = var_xr
else:
cat_dict[case_name] = xr.merge([cat_dict[case_name], var_xr])
cat_dict[case_name] = xr.merge([cat_dict[case_name], var_xr], compat='no_conflicts')
# check that start and end times include runtime startdate and enddate
try:
self.check_time_bounds(cat_dict[case_name], var.translation, freq)
Expand Down Expand Up @@ -1299,13 +1315,9 @@ def process(self,
# get the initial model data subset from the ESM-intake catalog
cat_subset = self.query_catalog(case_list, config.DATA_CATALOG)
for case_name, case_xr_dataset in cat_subset.items():
# delete height attribute because it
# creates issues when merging: xr cannot determine if it is a coordinate
# TODO: implement something less kluge-y to remove problem attributes/variables
if case_xr_dataset.get('height', None) is not None:
del case_xr_dataset['height']
for v in case_list[case_name].varlist.iter_vars():
tv_name = v.translation.name
# todo: maybe skip this if no standard_name attribute for v in case_xr_dataset
var_xr_dataset = self.parse_ds(v, case_xr_dataset)
varlist_ex = [v_l.translation.name for v_l in case_list[case_name].varlist.iter_vars()]
if tv_name in varlist_ex:
Expand Down
2 changes: 1 addition & 1 deletion src/util/datelabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def date_fmt(date):
case 12:
fmt = '%Y%m%d%H%M'
case 14:
fmt = '%Y%m%d-%H%M%S'
fmt = '%Y%m%d%H%M%S'
return fmt


Expand Down
4 changes: 2 additions & 2 deletions src/xr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,8 +900,8 @@ def reconcile_names(self, our_var, ds, ds_var_name: str, overwrite_ours=None):
overwrite_ours = True
else:
# attempt to match on standard_name attribute if present in data
ds_names = [v.name for v in ds.variables
if v.attrs.get('standard_name', "") == our_var.standard_name]
ds_names = [ds.variables[v].name for v in ds.variables
if ds.variables[v].attrs.get('standard_name', "") == our_var.standard_name]
if len(ds_names) == 1:
# success, narrowed down to one guess
self.log.info(("Selecting '%s' as the intended name for '%s' "
Expand Down
Loading

0 comments on commit cb7068b

Please sign in to comment.