Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove inline comments #98

Merged
merged 5 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Internal Changes
to include the README in the docs to avoid duplication
(`#102 <https://github.com/MESMER-group/mesmer/issues/102>`_).
By `Mathias Hauser <https://github.com/mathause>`_.
- Internal refactor: moved a number of inline comments to their own line (especially if
this allows to have the code on one line instead of several) and other minor cleanups
(`#98 <https://github.com/MESMER-group/mesmer/issues/98>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

v0.8.1 - 2021-07-15
-------------------
Expand Down
6 changes: 4 additions & 2 deletions mesmer/calibrate_mesmer/calibrate_mesmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,8 @@ def _calibrate_and_draw_realisations(
preds_lv = {"gvtas": gv_novolc_T_s} # predictors_list

# Create local variability due to global variability warming samples
# used for training the local variability module. Samples are cheap to create so not an issue to have here.
# used for training the local variability module. Samples are cheap to create so
# not an issue to have here.
lv_gv_s = create_emus_lv(
params_lv, preds_lv, cfg, save_emus=False, submethod="OLS"
)
Expand All @@ -306,9 +307,10 @@ def _calibrate_and_draw_realisations(

LOGGER.debug("Loading auxiliary files")
aux = {}
# better results with default values L, but faster + less space needed
aux["phi_gc"] = load_phi_gc(
lon, lat, ls, cfg, L_start=1750, L_end=2000, L_interval=250
) # better results with default values L, but like this much faster + less space needed
)

LOGGER.debug("Finalising training of local variability module on derived data")
targs_res_lv = {"tas": res_lv_s}
Expand Down
40 changes: 24 additions & 16 deletions mesmer/calibrate_mesmer/train_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):

scenarios_tr = list(var.keys())

# initialize parameters dictionary and fill in the metadata which does not depend on the applied method
# initialize parameters dictionary and fill in the metadata which does not depend on
# the applied method
params_gt = {}
params_gt["targ"] = targ
params_gt["esm"] = esm
Expand All @@ -83,15 +84,17 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
# apply the chosen method to the type of ensenble
gt = {}
if "LOWESS" in params_gt["method"]:
for scen in scenarios_tr: # ie derive gt for each scen individually
# i.e. derive gt for each scen individually
for scen in scenarios_tr:
gt[scen], frac_lowess_name = train_gt_ic_LOWESS(var[scen])
params_gt["frac_lowess"] = frac_lowess_name
else:
raise ValueError("No alternative method to LOWESS is implemented for now.")

params_gt["time"] = {}

if scenarios_tr[0][:2] == "h-": # ie if hist included
# i.e. if hist included
if scenarios_tr[0][:2] == "h-":

if gen == 5:
start_year_fut = 2005
Expand Down Expand Up @@ -124,13 +127,14 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
elif params_gt["method"] == "LOWESS":
params_gt["hist"] = gt_lowess_hist

scenarios_tr_f = list(
map(lambda x: x.replace("h-", ""), scenarios_tr)
) # isolte future scen names
# isolate future scen names
scenarios_tr_f = [scen.replace("h-", "") for scen in scenarios_tr]

else:
idx_start_year_fut = 0 # because first year would be already in future
scenarios_tr_f = scenarios_tr # because only future covered anyways
# because first year would be already in future
idx_start_year_fut = 0
# because only future covered anyways
scenarios_tr_f = scenarios_tr

for scen_f, scen in zip(scenarios_tr_f, scenarios_tr):
params_gt["time"][scen_f] = time[scen][idx_start_year_fut:]
Expand Down Expand Up @@ -183,9 +187,9 @@ def train_gt_ic_LOWESS(var):
av_var = np.mean(var, axis=0)

# apply lowess smoother to further smooth the Tglob time series
frac_lowess = (
50 / nr_ts
) # rather arbitrarily chosen value that gives a smooth enough trend,
# rather arbitrarily chosen value that gives a smooth enough trend,
frac_lowess = 50 / nr_ts

# open to changes but if much smaller, var trend ends up very wiggly
frac_lowess_name = "50/nr_ts"

Expand Down Expand Up @@ -243,19 +247,23 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):

if nr_ts != nr_aod_obs:
raise ValueError(
f"The number of time steps of the variable ({nr_ts}) and the saod ({nr_aod_obs}) do not match."
f"The number of time steps of the variable ({nr_ts}) and the saod "
"({nr_aod_obs}) do not match."
)
# extract global variability (which still includes volc eruptions) by removing smooth trend from Tglob in historic period

# extract global variability (which still includes volc eruptions) by removing
# smooth trend from Tglob in historic period
gv_all_for_aod = np.zeros(nr_runs * nr_aod_obs)
i = 0
for run in np.arange(nr_runs):
gv_all_for_aod[i : i + nr_aod_obs] = var[run] - gt_lowess
i += nr_aod_obs
# fit linear regression of gv to aod (because some ESMs react very strongly to volcanoes)
# fit linear regression of gv to aod (because some ESMs react very strongly to
# volcanoes)
# no intercept to not artifically move the ts
linreg_gv_volc = LinearRegression(fit_intercept=False).fit(
aod_obs_all, gv_all_for_aod
) # no intercept to not artifically
# move the ts
)

# extract the saod coefficient
coef_saod = linreg_gv_volc.coef_[0]
Expand Down
5 changes: 2 additions & 3 deletions mesmer/calibrate_mesmer/train_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,8 @@ def train_gv(gv, targ, esm, cfg, save_params=True, **kwargs):
kwargs["sel_crit"] = "bic"
params_gv = train_gv_AR(params_gv, gv, kwargs["max_lag"], kwargs["sel_crit"])
else:
raise ValueError(
"The chosen method and / or weighting approach is currently not implemented."
)
msg = "The chosen method and / or weighting approach is currently not implemented."
raise ValueError(msg)

# save the global variability paramters if requested
if save_params:
Expand Down
3 changes: 2 additions & 1 deletion mesmer/calibrate_mesmer/train_lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
params_lt["coef_" + pred][targ][gp] = reg.coef_[targ_idx, coef_idx]
coef_idx += 1

if len(preds_lv) > 0: # assumption: coefs of lv are behind coefs of lt
# assumption: coefs of lv are behind coefs of lt
if len(preds_lv) > 0:
for pred in params_lv["preds"]:
params_lv["coef_" + pred][targ][gp] = reg.coef_[
targ_idx, coef_idx
Expand Down
31 changes: 14 additions & 17 deletions mesmer/calibrate_mesmer/train_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def train_l_prepare_X_y_wgteq(preds, targs):
preds : dict
empty dictionary if none, else nested dictionary of predictors with keys

- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
scenario)
targs : dict
nested dictionary of targets with keys

Expand All @@ -42,9 +43,8 @@ def train_l_prepare_X_y_wgteq(preds, targs):
pred_names = list(preds.keys())

# identify characteristics of the predictors and the targets
targ = targs[
targ_name
] # predictors are not influenced by whether there is a single or there are multiple targets
# predictors are not influenced by whether there is a single or multiple targets
targ = targs[targ_name]
scens = list(targ.keys())

# assumption: nr_runs per scen and nr_ts for these runs can vary
Expand All @@ -70,33 +70,30 @@ def train_l_prepare_X_y_wgteq(preds, targs):
s = 0 # index for samples
pred_raw = preds[pred_name] # values of predictor p
for scen in scens:
if (
len(pred_raw[scen].shape) == 2
): # if 1 time series per run for predictor (e.g., gv)
k = (
pred_raw[scen].shape[0] * pred_raw[scen].shape[1]
) # nr_runs*nr_ts for this specific scenario
# if 1 time series per run for predictor (e.g., gv)
if len(pred_raw[scen].shape) == 2:
# nr_runs*nr_ts for this specific scenario
k = pred_raw[scen].shape[0] * pred_raw[scen].shape[1]
X[s : s + k, p] = pred_raw[scen].flatten()
s += k
elif (
len(pred_raw[scen].shape) == 1
): # if single time series as predictor (e.g. gt): repeat ts as many times as runs available
# if single time series as predictor (e.g. gt): repeat ts as many times
# as runs available
elif len(pred_raw[scen].shape) == 1:
nr_runs, nr_ts, nr_gps = targ[scen].shape
nr_samples_scen = nr_runs * nr_ts
X[s : s + nr_samples_scen, p] = np.tile(pred_raw[scen], nr_runs)
s += nr_samples_scen
else:
raise ValueError("Predictors of this shape cannot be processed.")

# derive y (ie array of targets)
# derive y (i.e. array of targets)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very important question: Do we follow the American or the British style in MESMER comments? Because since you turned this into a proper "i.e.", I noticed the missing comma after the expression (& learned after a quick google search, that I'm apparently used to the American style whereas you apply the British style here ^^)

y = np.zeros([nr_samples, nr_gps, nr_targs])
for t, targ_name in enumerate(targ_names):
targ = targs[targ_name]
s = 0
for scen in scens:
k = (
targ[scen].shape[0] * targ[scen].shape[1]
) # nr_runs*nr_ts for this scenario
# nr_runs * nr_ts for this scenario
k = targ[scen].shape[0] * targ[scen].shape[1]
y[s : s + k, :, t] = targ[scen].reshape(k, -1)
s += k

Expand Down
3 changes: 2 additions & 1 deletion mesmer/create_emulations/create_emus_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def create_emus_gt(params_gt, preds_gt, cfg, concat_h_f=False, save_emus=True):
preds_gt : dict
nested dictionary of predictors for global trend with keys

- [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific
scenario)
cfg : module
config file containing metadata
concat_h_f : bool, optional
Expand Down
6 changes: 2 additions & 4 deletions mesmer/create_emulations/create_emus_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,8 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
emus_gv[i, t] = (
ar_int
+ sum(
[
ar_coefs[k] * emus_gv[i, t - ar_lags[k]]
for k in np.arange(len(ar_lags))
]
ar_coefs[k] * emus_gv[i, t - ar_lags[k]]
for k in np.arange(len(ar_lags))
)
+ innovs_emus_gv[i, t]
)
Expand Down
15 changes: 8 additions & 7 deletions mesmer/create_emulations/create_emus_lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
preds_lt : dict
nested dictionary of predictors for local trends with keys

- [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific
scenario)
cfg : module
config file containing metadata
concat_h_f : bool, optional
Expand Down Expand Up @@ -87,8 +88,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
else:
scens_out = scens_out_f = scenarios_emus

# check predictors
if pred_names != params_lt["preds"]: # check if correct predictors
# check if correct predictors
if pred_names != params_lt["preds"]:
raise ValueError(
"Wrong predictors were passed. The emulations cannot be created."
)
Expand All @@ -102,9 +103,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
if params_lt["method_each_gp_sep"]:
method_lt = method_lt + "_each_gp_sep"
else:
raise ValueError(
f"No such method ({params_lt['method_each_gp_sep']}) is currently implemented."
)
meth = params_lt["method_each_gp_sep"]
raise ValueError(f"No such method ({meth}) is currently implemented.")

create_emus_method_lt = create_emus_method_func_mapping[method_lt]

Expand Down Expand Up @@ -165,7 +165,8 @@ def create_emus_OLS_each_gp_sep(params_lt, preds_lt, scen):
preds_lt : dict
nested dictionary of predictors for local trends with keys

- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
scenario)
scen : str
emulated scenario

Expand Down
24 changes: 13 additions & 11 deletions mesmer/create_emulations/create_emus_lv.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def create_emus_lv(params_lv, preds_lv, cfg, save_emus=True, submethod=""):
preds_lv : dict
nested dictionary of predictors for local variability with keys

- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
scenario)
cfg : module
config file containing metadata
save_emus : bool, optional
Expand Down Expand Up @@ -118,7 +119,7 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
emus_lv : dict
local variability emulations dictionary with keys

- [scen] (3d array (emu, time, gp) of local variability from previous submethods)
- [scen] 3d array (emu, time, gp) of local variability from previous submethods
- empty dict if no previous submethod
params_lv : dict
dictionary with the trained local variability parameters
Expand All @@ -132,7 +133,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
preds_lv : dict
nested dictionary of predictors for local variability with keys

- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] 1d/ 2d arrays (time)/(run, time) of predictor for specific
scenario
cfg : module
config file containing metadata

Expand Down Expand Up @@ -175,7 +177,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
seed = seed_all_scens[scen]["lv"]
nr_gps = len(params_lv["AR1_int"][targ])

# in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on existing one
# in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on
# existing one
if len(emus_lv[scen]) == 0:

emus_lv[scen][targ] = np.zeros(nr_emus_v, nr_ts_emus_stoch_v, nr_gps)
Expand All @@ -195,7 +198,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
)

print(
"Compute the contribution to emus_lv by the AR(1) process with the spatially correlated innovations"
"Compute the contribution to emus_lv by the AR(1) process with the "
"spatially correlated innovations"
)
emus_lv_tmp = np.zeros([nr_emus_v, nr_ts_emus_stoch_v + buffer, nr_gps])
for t in np.arange(1, nr_ts_emus_stoch_v + buffer):
Expand Down Expand Up @@ -229,7 +233,8 @@ def create_emus_lv_OLS(params_lv, preds_lv):
preds_lv : dict
nested dictionary of predictors for local variability with keys

- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
- [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
scenario)

Returns
-------
Expand Down Expand Up @@ -266,10 +271,7 @@ def create_emus_lv_OLS(params_lv, preds_lv):
for run in np.arange(nr_emus_v):
for gp in np.arange(nr_gps):
emus_lv[scen][targ][run, :, gp] = sum(
[
params_lv["coef_" + pred][targ][gp]
* preds_lv[pred][scen][run]
for pred in params_lv["preds"]
]
params_lv["coef_" + pred][targ][gp] * preds_lv[pred][scen][run]
for pred in params_lv["preds"]
)
return emus_lv
28 changes: 12 additions & 16 deletions mesmer/io/load_constant_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,29 +284,25 @@ def load_regs_ls_wgt_lon_lat(reg_type, lon, lat):
reg_dict["type"] = reg_type
reg_dict["abbrevs"] = reg.abbrevs
reg_dict["names"] = reg.names
reg_dict["grids"] = mask_percentage(
reg, lon["c"], lat["c"]
).values # have fraction of grid cells
reg_dict["grid_b"] = reg.mask(
lon["c"], lat["c"]
).values # not sure yet if needed: "binary" grid with each grid point assigned to single country
reg_dict[
"full"
] = reg # to be used for plotting outlines (mainly useful for srex regs)
# have fraction of grid cells
reg_dict["grids"] = mask_percentage(reg, lon["c"], lat["c"]).values
# not sure if needed: "binary" grid with each grid point assigned to single country
reg_dict["grid_b"] = reg.mask(lon["c"], lat["c"]).values
# to be used for plotting outlines (mainly useful for srex regs)
reg_dict["full"] = reg

# obtain a (subsampled) land-sea mask
ls = {}
ls["grid_raw"] = np.squeeze(
mask_percentage(
regionmask.defined_regions.natural_earth.land_110, lon["c"], lat["c"]
).values
)
# gives fraction of land -> in extract_land() script decide above which land fraction threshold to consider a grid point as a land grid point
land_110 = regionmask.defined_regions.natural_earth.land_110

# gives fraction of land -> in extract_land() script decide above which land
# fraction threshold to consider a grid point as a land grid point
ls["grid_raw"] = np.squeeze(mask_percentage(land_110, lon["c"], lat["c"]).values)

# remove Antarctica
idx_ANT = np.where(lat["c"] < -60)[0]
ls["grid_no_ANT"] = copy.deepcopy(ls["grid_raw"])
ls["grid_no_ANT"][idx_ANT] = 0 #
ls["grid_no_ANT"][idx_ANT] = 0

# derive the weights
lon["grid"], lat["grid"] = np.meshgrid(lon["c"], lat["c"])
Expand Down
Loading