MESMER-group · mathause · Oct 19, 2021 · Oct 4, 2021 · Oct 5, 2021 · Oct 19, 2021
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -56,6 +56,10 @@ Internal Changes
   to include the README in the docs to avoid duplication
   (`#102 <https://github.com/MESMER-group/mesmer/issues/102>`_).
   By `Mathias Hauser <https://github.com/mathause>`_.
+- Internal refactor: moved a number of inline comments to their own line (especially if
+  this allows to have the code on one line instead of several) and other minor cleanups
+  (`#98 <https://github.com/MESMER-group/mesmer/issues/98>`_).
+  By `Mathias Hauser <https://github.com/mathause>`_.
 
 v0.8.1 - 2021-07-15
 -------------------

diff --git a/mesmer/calibrate_mesmer/calibrate_mesmer.py b/mesmer/calibrate_mesmer/calibrate_mesmer.py
@@ -290,7 +290,8 @@ def _calibrate_and_draw_realisations(
         preds_lv = {"gvtas": gv_novolc_T_s}  # predictors_list
 
         # Create local variability due to global variability warming samples
-        # used for training the local variability module. Samples are cheap to create so not an issue to have here.
+        # used for training the local variability module. Samples are cheap to create so
+        # not an issue to have here.
         lv_gv_s = create_emus_lv(
             params_lv, preds_lv, cfg, save_emus=False, submethod="OLS"
         )
@@ -306,9 +307,10 @@ def _calibrate_and_draw_realisations(
 
         LOGGER.debug("Loading auxiliary files")
         aux = {}
+        # better results with default values L, but faster + less space needed
         aux["phi_gc"] = load_phi_gc(
             lon, lat, ls, cfg, L_start=1750, L_end=2000, L_interval=250
-        )  # better results with default values L, but like this much faster + less space needed
+        )
 
         LOGGER.debug("Finalising training of local variability module on derived data")
         targs_res_lv = {"tas": res_lv_s}

diff --git a/mesmer/calibrate_mesmer/train_gt.py b/mesmer/calibrate_mesmer/train_gt.py
@@ -72,7 +72,8 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
 
     scenarios_tr = list(var.keys())
 
-    # initialize parameters dictionary and fill in the metadata which does not depend on the applied method
+    # initialize parameters dictionary and fill in the metadata which does not depend on
+    # the applied method
     params_gt = {}
     params_gt["targ"] = targ
     params_gt["esm"] = esm
@@ -83,15 +84,17 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
     # apply the chosen method to the type of ensenble
     gt = {}
     if "LOWESS" in params_gt["method"]:
-        for scen in scenarios_tr:  # ie derive gt for each scen individually
+        # i.e. derive gt for each scen individually
+        for scen in scenarios_tr:
             gt[scen], frac_lowess_name = train_gt_ic_LOWESS(var[scen])
         params_gt["frac_lowess"] = frac_lowess_name
     else:
         raise ValueError("No alternative method to LOWESS is implemented for now.")
 
     params_gt["time"] = {}
 
-    if scenarios_tr[0][:2] == "h-":  # ie if hist included
+    # i.e. if hist included
+    if scenarios_tr[0][:2] == "h-":
 
         if gen == 5:
             start_year_fut = 2005
@@ -124,13 +127,14 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
         elif params_gt["method"] == "LOWESS":
             params_gt["hist"] = gt_lowess_hist
 
-        scenarios_tr_f = list(
-            map(lambda x: x.replace("h-", ""), scenarios_tr)
-        )  # isolte future scen names
+        # isolate future scen names
+        scenarios_tr_f = [scen.replace("h-", "") for scen in scenarios_tr]
 
     else:
-        idx_start_year_fut = 0  # because first year would be already in future
-        scenarios_tr_f = scenarios_tr  # because only future covered anyways
+        # because first year would be already in future
+        idx_start_year_fut = 0
+        # because only future covered anyways
+        scenarios_tr_f = scenarios_tr
 
     for scen_f, scen in zip(scenarios_tr_f, scenarios_tr):
         params_gt["time"][scen_f] = time[scen][idx_start_year_fut:]
@@ -183,9 +187,9 @@ def train_gt_ic_LOWESS(var):
     av_var = np.mean(var, axis=0)
 
     # apply lowess smoother to further smooth the Tglob time series
-    frac_lowess = (
-        50 / nr_ts
-    )  # rather arbitrarily chosen value that gives a smooth enough trend,
+    # rather arbitrarily chosen value that gives a smooth enough trend,
+    frac_lowess = 50 / nr_ts
+
     # open to changes but if much smaller, var trend ends up very wiggly
     frac_lowess_name = "50/nr_ts"
 
@@ -243,19 +247,23 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
 
     if nr_ts != nr_aod_obs:
         raise ValueError(
-            f"The number of time steps of the variable ({nr_ts}) and the saod ({nr_aod_obs}) do not match."
+            f"The number of time steps of the variable ({nr_ts}) and the saod "
+            "({nr_aod_obs}) do not match."
         )
-    # extract global variability (which still includes volc eruptions) by removing smooth trend from Tglob in historic period
+
+    # extract global variability (which still includes volc eruptions) by removing
+    # smooth trend from Tglob in historic period
     gv_all_for_aod = np.zeros(nr_runs * nr_aod_obs)
     i = 0
     for run in np.arange(nr_runs):
         gv_all_for_aod[i : i + nr_aod_obs] = var[run] - gt_lowess
         i += nr_aod_obs
-    # fit linear regression of gv to aod (because some ESMs react very strongly to volcanoes)
+    # fit linear regression of gv to aod (because some ESMs react very strongly to
+    # volcanoes)
+    # no intercept to not artifically move the ts
     linreg_gv_volc = LinearRegression(fit_intercept=False).fit(
         aod_obs_all, gv_all_for_aod
-    )  # no intercept to not artifically
-    # move the ts
+    )
 
     # extract the saod coefficient
     coef_saod = linreg_gv_volc.coef_[0]

diff --git a/mesmer/calibrate_mesmer/train_gv.py b/mesmer/calibrate_mesmer/train_gv.py
@@ -85,9 +85,8 @@ def train_gv(gv, targ, esm, cfg, save_params=True, **kwargs):
             kwargs["sel_crit"] = "bic"
         params_gv = train_gv_AR(params_gv, gv, kwargs["max_lag"], kwargs["sel_crit"])
     else:
-        raise ValueError(
-            "The chosen method and / or weighting approach is currently not implemented."
-        )
+        msg = "The chosen method and / or weighting approach is currently not implemented."
+        raise ValueError(msg)
 
     # save the global variability paramters if requested
     if save_params:

diff --git a/mesmer/calibrate_mesmer/train_lt.py b/mesmer/calibrate_mesmer/train_lt.py
@@ -174,7 +174,8 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
                     params_lt["coef_" + pred][targ][gp] = reg.coef_[targ_idx, coef_idx]
                     coef_idx += 1
 
-                if len(preds_lv) > 0:  # assumption: coefs of lv are behind coefs of lt
+                # assumption: coefs of lv are behind coefs of lt
+                if len(preds_lv) > 0:
                     for pred in params_lv["preds"]:
                         params_lv["coef_" + pred][targ][gp] = reg.coef_[
                             targ_idx, coef_idx

diff --git a/mesmer/calibrate_mesmer/train_utils.py b/mesmer/calibrate_mesmer/train_utils.py
@@ -20,7 +20,8 @@ def train_l_prepare_X_y_wgteq(preds, targs):
     preds : dict
         empty dictionary if none, else nested dictionary of predictors with keys
 
-        - [pred][scen]  (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen]  (1d/ 2d arrays (time)/(run, time) of predictor for specific
+          scenario)
     targs : dict
         nested dictionary of targets with keys
 
@@ -42,9 +43,8 @@ def train_l_prepare_X_y_wgteq(preds, targs):
     pred_names = list(preds.keys())
 
     # identify characteristics of the predictors and the targets
-    targ = targs[
-        targ_name
-    ]  # predictors are not influenced by whether there is a single or there are multiple targets
+    # predictors are not influenced by whether there is a single or multiple targets
+    targ = targs[targ_name]
     scens = list(targ.keys())
 
     # assumption: nr_runs per scen and nr_ts for these runs can vary
@@ -70,33 +70,30 @@ def train_l_prepare_X_y_wgteq(preds, targs):
             s = 0  # index for samples
             pred_raw = preds[pred_name]  # values of predictor p
             for scen in scens:
-                if (
-                    len(pred_raw[scen].shape) == 2
-                ):  # if 1 time series per run for predictor (e.g., gv)
-                    k = (
-                        pred_raw[scen].shape[0] * pred_raw[scen].shape[1]
-                    )  # nr_runs*nr_ts for this specific scenario
+                # if 1 time series per run for predictor (e.g., gv)
+                if len(pred_raw[scen].shape) == 2:
+                    # nr_runs*nr_ts for this specific scenario
+                    k = pred_raw[scen].shape[0] * pred_raw[scen].shape[1]
                     X[s : s + k, p] = pred_raw[scen].flatten()
                     s += k
-                elif (
-                    len(pred_raw[scen].shape) == 1
-                ):  # if single time series as predictor (e.g. gt): repeat ts as many times as runs available
+                # if single time series as predictor (e.g. gt): repeat ts as many times
+                # as runs available
+                elif len(pred_raw[scen].shape) == 1:
                     nr_runs, nr_ts, nr_gps = targ[scen].shape
                     nr_samples_scen = nr_runs * nr_ts
                     X[s : s + nr_samples_scen, p] = np.tile(pred_raw[scen], nr_runs)
                     s += nr_samples_scen
                 else:
                     raise ValueError("Predictors of this shape cannot be processed.")
 
-    # derive y (ie array of targets)
+    # derive y (i.e. array of targets)
     y = np.zeros([nr_samples, nr_gps, nr_targs])
     for t, targ_name in enumerate(targ_names):
         targ = targs[targ_name]
         s = 0
         for scen in scens:
-            k = (
-                targ[scen].shape[0] * targ[scen].shape[1]
-            )  # nr_runs*nr_ts for this scenario
+            # nr_runs * nr_ts for this scenario
+            k = targ[scen].shape[0] * targ[scen].shape[1]
             y[s : s + k, :, t] = targ[scen].reshape(k, -1)
             s += k
 

diff --git a/mesmer/create_emulations/create_emus_gt.py b/mesmer/create_emulations/create_emus_gt.py
@@ -35,7 +35,8 @@ def create_emus_gt(params_gt, preds_gt, cfg, concat_h_f=False, save_emus=True):
     preds_gt : dict
         nested dictionary of predictors for global trend with keys
 
-        - [pred][scen]  (1d/2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen]  (1d/2d arrays (time)/(run, time) of predictor for specific
+          scenario)
     cfg : module
         config file containing metadata
     concat_h_f : bool, optional

diff --git a/mesmer/create_emulations/create_emus_gv.py b/mesmer/create_emulations/create_emus_gv.py
@@ -176,10 +176,8 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
             emus_gv[i, t] = (
                 ar_int
                 + sum(
-                    [
-                        ar_coefs[k] * emus_gv[i, t - ar_lags[k]]
-                        for k in np.arange(len(ar_lags))
-                    ]
+                    ar_coefs[k] * emus_gv[i, t - ar_lags[k]]
+                    for k in np.arange(len(ar_lags))
                 )
                 + innovs_emus_gv[i, t]
             )

diff --git a/mesmer/create_emulations/create_emus_lt.py b/mesmer/create_emulations/create_emus_lt.py
@@ -36,7 +36,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
     preds_lt : dict
         nested dictionary of predictors for local trends with keys
 
-        - [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen] (1d/2d arrays (time)/(run, time) of predictor for specific
+          scenario)
     cfg : module
         config file containing metadata
     concat_h_f : bool, optional
@@ -87,8 +88,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
         else:
             scens_out = scens_out_f = scenarios_emus
 
-    # check predictors
-    if pred_names != params_lt["preds"]:  # check if correct predictors
+    # check if correct predictors
+    if pred_names != params_lt["preds"]:
         raise ValueError(
             "Wrong predictors were passed. The emulations cannot be created."
         )
@@ -102,9 +103,8 @@ def create_emus_lt(params_lt, preds_lt, cfg, concat_h_f=False, save_emus=True):
     if params_lt["method_each_gp_sep"]:
         method_lt = method_lt + "_each_gp_sep"
     else:
-        raise ValueError(
-            f"No such method ({params_lt['method_each_gp_sep']}) is currently implemented."
-        )
+        meth = params_lt["method_each_gp_sep"]
+        raise ValueError(f"No such method ({meth}) is currently implemented.")
 
     create_emus_method_lt = create_emus_method_func_mapping[method_lt]
 
@@ -165,7 +165,8 @@ def create_emus_OLS_each_gp_sep(params_lt, preds_lt, scen):
     preds_lt : dict
         nested dictionary of predictors for local trends with keys
 
-        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
+          scenario)
     scen : str
         emulated scenario
 

diff --git a/mesmer/create_emulations/create_emus_lv.py b/mesmer/create_emulations/create_emus_lv.py
@@ -30,7 +30,8 @@ def create_emus_lv(params_lv, preds_lv, cfg, save_emus=True, submethod=""):
     preds_lv : dict
         nested dictionary of predictors for local variability with keys
 
-        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
+          scenario)
     cfg : module
         config file containing metadata
     save_emus : bool, optional
@@ -118,7 +119,7 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
     emus_lv : dict
         local variability emulations dictionary with keys
 
-        - [scen] (3d array (emu, time, gp) of local variability from previous submethods)
+        - [scen] 3d array (emu, time, gp) of local variability from previous submethods
         - empty dict if no previous submethod
     params_lv : dict
         dictionary with the trained local variability parameters
@@ -132,7 +133,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
     preds_lv : dict
         nested dictionary of predictors for local variability with keys
 
-        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen] 1d/ 2d arrays (time)/(run, time) of predictor for specific
+          scenario
     cfg : module
         config file containing metadata
 
@@ -175,7 +177,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
             seed = seed_all_scens[scen]["lv"]
             nr_gps = len(params_lv["AR1_int"][targ])
 
-            # in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on existing one
+            # in case no emus_lv[scen] exist yet, initialize it. Otherwise build up on
+            # existing one
             if len(emus_lv[scen]) == 0:
 
                 emus_lv[scen][targ] = np.zeros(nr_emus_v, nr_ts_emus_stoch_v, nr_gps)
@@ -195,7 +198,8 @@ def create_emus_lv_AR1_sci(emus_lv, params_lv, preds_lv, cfg):
             )
 
             print(
-                "Compute the contribution to emus_lv by the AR(1) process with the spatially correlated innovations"
+                "Compute the contribution to emus_lv by the AR(1) process with the "
+                "spatially correlated innovations"
             )
             emus_lv_tmp = np.zeros([nr_emus_v, nr_ts_emus_stoch_v + buffer, nr_gps])
             for t in np.arange(1, nr_ts_emus_stoch_v + buffer):
@@ -229,7 +233,8 @@ def create_emus_lv_OLS(params_lv, preds_lv):
     preds_lv : dict
         nested dictionary of predictors for local variability with keys
 
-        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific scenario)
+        - [pred][scen] (1d/ 2d arrays (time)/(run, time) of predictor for specific
+          scenario)
 
     Returns
     -------
@@ -266,10 +271,7 @@ def create_emus_lv_OLS(params_lv, preds_lv):
             for run in np.arange(nr_emus_v):
                 for gp in np.arange(nr_gps):
                     emus_lv[scen][targ][run, :, gp] = sum(
-                        [
-                            params_lv["coef_" + pred][targ][gp]
-                            * preds_lv[pred][scen][run]
-                            for pred in params_lv["preds"]
-                        ]
+                        params_lv["coef_" + pred][targ][gp] * preds_lv[pred][scen][run]
+                        for pred in params_lv["preds"]
                     )
     return emus_lv
diff --git a/mesmer/io/load_constant_files.py b/mesmer/io/load_constant_files.py
@@ -284,29 +284,25 @@ def load_regs_ls_wgt_lon_lat(reg_type, lon, lat):
     reg_dict["type"] = reg_type
     reg_dict["abbrevs"] = reg.abbrevs
     reg_dict["names"] = reg.names
-    reg_dict["grids"] = mask_percentage(
-        reg, lon["c"], lat["c"]
-    ).values  # have fraction of grid cells
-    reg_dict["grid_b"] = reg.mask(
-        lon["c"], lat["c"]
-    ).values  # not sure yet if needed: "binary" grid with each grid point assigned to single country
-    reg_dict[
-        "full"
-    ] = reg  # to be used for plotting outlines (mainly useful for srex regs)
+    # have fraction of grid cells
+    reg_dict["grids"] = mask_percentage(reg, lon["c"], lat["c"]).values
+    # not sure if needed: "binary" grid with each grid point assigned to single country
+    reg_dict["grid_b"] = reg.mask(lon["c"], lat["c"]).values
+    # to be used for plotting outlines (mainly useful for srex regs)
+    reg_dict["full"] = reg
 
     # obtain a (subsampled) land-sea mask
     ls = {}
-    ls["grid_raw"] = np.squeeze(
-        mask_percentage(
-            regionmask.defined_regions.natural_earth.land_110, lon["c"], lat["c"]
-        ).values
-    )
-    # gives fraction of land -> in extract_land() script decide above which land fraction threshold to consider a grid point as a land grid point
+    land_110 = regionmask.defined_regions.natural_earth.land_110
+
+    # gives fraction of land -> in extract_land() script decide above which land
+    # fraction threshold to consider a grid point as a land grid point
+    ls["grid_raw"] = np.squeeze(mask_percentage(land_110, lon["c"], lat["c"]).values)
 
     # remove Antarctica
     idx_ANT = np.where(lat["c"] < -60)[0]
     ls["grid_no_ANT"] = copy.deepcopy(ls["grid_raw"])
-    ls["grid_no_ANT"][idx_ANT] = 0  #
+    ls["grid_no_ANT"][idx_ANT] = 0
 
     # derive the weights
     lon["grid"], lat["grid"] = np.meshgrid(lon["c"], lat["c"])