diff --git a/hep_tfds b/hep_tfds index 41156e296..31baf14de 160000 --- a/hep_tfds +++ b/hep_tfds @@ -1 +1 @@ -Subproject commit 41156e296113b23f4bb21359129ba3ac4578b590 +Subproject commit 31baf14defc53dcd1d7555e4a3945083e45e9304 diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py index 5c202b4d4..4a55dd24b 100644 --- a/mlpf/pipeline.py +++ b/mlpf/pipeline.py @@ -1,3 +1,5 @@ +from comet_ml import OfflineExperiment, Experiment # isort:skip + try: import horovod.tensorflow.keras as hvd except ModuleNotFoundError: @@ -114,7 +116,6 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, try: if comet_offline: print("Using comet-ml OfflineExperiment, saving logs locally.") - from comet_ml import OfflineExperiment experiment = OfflineExperiment( project_name="particleflow-tf", @@ -127,7 +128,6 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, ) else: print("Using comet-ml Experiment, streaming logs to www.comet.ml.") - from comet_ml import Experiment experiment = Experiment( project_name="particleflow-tf", diff --git a/mlpf/tallinn/cms-mlpf-test.sh b/mlpf/tallinn/cms-mlpf-test.sh index b286d4ae7..112015ac3 100755 --- a/mlpf/tallinn/cms-mlpf-test.sh +++ b/mlpf/tallinn/cms-mlpf-test.sh @@ -1,13 +1,15 @@ #!/bin/bash #SBATCH -p gpu -#SBATCH --gpus 2 +#SBATCH --gpus 1 #SBATCH --mem-per-gpu=8G IMG=/home/software/singularity/tf-2.9.0.simg cd ~/particleflow +env + #TF training singularity exec -B /scratch-persistent --nv \ --env PYTHONPATH=hep_tfds \ --env TFDS_DATA_DIR=/scratch-persistent/joosep/tensorflow_datasets \ - $IMG python mlpf/pipeline.py train -c $1 --plot-freq 1 --ntrain 1000 --ntest 1000 + $IMG python mlpf/pipeline.py train -c $1 --plot-freq 1 --ntrain 5000 --ntest 1000 diff --git a/mlpf/tallinn/submit-test-eventloss.sh b/mlpf/tallinn/submit-test-eventloss.sh index bf6c553ac..86470f9a0 100644 --- a/mlpf/tallinn/submit-test-eventloss.sh +++ b/mlpf/tallinn/submit-test-eventloss.sh @@ -1,3 +1,8 @@ sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/baseline.yaml +sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/baseline-mask_reg_cls0.yaml +sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/genjet_logcosh_mask_reg_cls0.yaml +sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/baseline-clspt.yaml sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/swd.yaml sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/h2d.yaml +sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/genjet_mse.yaml +sbatch mlpf/tallinn/cms-mlpf-test.sh parameters/test-eventloss/genjet_logcosh.yaml diff --git a/mlpf/tfmodel/datasets/BaseDatasetFactory.py b/mlpf/tfmodel/datasets/BaseDatasetFactory.py index 7175d7e96..cda86130c 100644 --- a/mlpf/tfmodel/datasets/BaseDatasetFactory.py +++ b/mlpf/tfmodel/datasets/BaseDatasetFactory.py @@ -51,6 +51,10 @@ def func(data_item): target = unpack_target(y, num_output_classes, self.cfg) + cls_weights = msk_elems + if self.cfg["dataset"]["cls_weight_by_pt"]: + cls_weights *= target["pt"] + # inputs: X # targets: dict by classification (cls) and regression feature columns # weights: dict of weights for each target @@ -58,7 +62,7 @@ def func(data_item): X, target, { - "cls": msk_elems, + "cls": cls_weights, "charge": msk_elems * msk_signal, "pt": msk_elems * msk_signal, "eta": msk_elems * msk_signal, diff --git a/mlpf/tfmodel/model.py b/mlpf/tfmodel/model.py index d4036f288..4bdb04732 100644 --- a/mlpf/tfmodel/model.py +++ b/mlpf/tfmodel/model.py @@ -526,7 +526,6 @@ def __init__( energy_num_layers=3, layernorm=False, mask_reg_cls0=True, - energy_multimodal=True, event_set_output=False, **kwargs ): @@ -539,8 +538,6 @@ def __init__( self.mask_reg_cls0 = mask_reg_cls0 - self.energy_multimodal = energy_multimodal - self.do_layernorm = layernorm if self.do_layernorm: self.layernorm = tf.keras.layers.LayerNormalization(axis=-1, name="output_layernorm") @@ -598,7 +595,7 @@ def __init__( ) self.ffn_energy = point_wise_feed_forward_network( - num_output_classes if self.energy_multimodal else 1, + 1, energy_hidden_dim, "ffn_energy", num_layers=energy_num_layers, @@ -625,7 +622,6 @@ def call(self, args, training=False): msk_input_outtype = tf.cast(msk_input, out_id_logits.dtype) out_id_softmax = tf.nn.softmax(out_id_logits, axis=-1) - out_id_hard_softmax = tf.stop_gradient(tf.nn.softmax(100 * out_id_logits, axis=-1)) out_charge = self.ffn_charge(X_encoded, training=training) out_charge = out_charge * msk_input_outtype @@ -665,10 +661,7 @@ def call(self, args, training=False): pred_energy_corr = pred_energy_corr * msk_input_outtype # In case of a multimodal prediction, weight the per-class energy predictions by the approximately one-hot vector - if self.energy_multimodal: - pred_energy = orig_energy + tf.reduce_sum(out_id_hard_softmax * pred_energy_corr, axis=-1, keepdims=True) - else: - pred_energy = orig_energy + pred_energy_corr + pred_energy = orig_energy + pred_energy_corr pred_energy = tf.abs(pred_energy) # compute pt=E/cosh(eta) @@ -682,15 +675,15 @@ def call(self, args, training=False): pred_pt = tf.abs(pred_pt) # mask the regression outputs for the nodes with a class prediction 0 - msk_output = tf.expand_dims(tf.cast(tf.argmax(out_id_hard_softmax, axis=-1) != 0, tf.float32), axis=-1) + sigmoid_turnon = tf.sigmoid(-out_id_logits[..., 0:1]) if self.mask_reg_cls0: - out_charge = out_charge * msk_output - pred_pt = pred_pt * msk_output - pred_eta = pred_eta * msk_output - pred_sin_phi = pred_sin_phi * msk_output - pred_cos_phi = pred_cos_phi * msk_output - pred_energy = pred_energy * msk_output + out_charge = out_charge * sigmoid_turnon + pred_pt = pred_pt * sigmoid_turnon + pred_eta = pred_eta * sigmoid_turnon + pred_sin_phi = pred_sin_phi * sigmoid_turnon + pred_cos_phi = pred_cos_phi * sigmoid_turnon + pred_energy = pred_energy * sigmoid_turnon ret = { "cls": out_id_softmax, diff --git a/mlpf/tfmodel/model_setup.py b/mlpf/tfmodel/model_setup.py index 1e5861e30..c606c9820 100644 --- a/mlpf/tfmodel/model_setup.py +++ b/mlpf/tfmodel/model_setup.py @@ -13,6 +13,7 @@ import fastjet import matplotlib.pyplot as plt import numpy as np +import scipy import tensorflow as tf import tensorflow_addons as tfa import tf2onnx @@ -53,22 +54,23 @@ def on_epoch_end(self, epoch, logs=None): class CustomCallback(tf.keras.callbacks.Callback): - def __init__(self, outpath, dataset, config, plot_freq=1, horovod_enabled=False): + def __init__(self, outpath, dataset, config, plot_freq=1, horovod_enabled=False, comet_experiment=None): super(CustomCallback, self).__init__() self.plot_freq = plot_freq self.dataset = dataset self.outpath = outpath self.config = config self.horovod_enabled = horovod_enabled + self.comet_experiment = comet_experiment self.writer = tf.summary.create_file_writer(outpath) def on_epoch_end(self, epoch, logs=None): if not self.horovod_enabled or hvd.rank() == 0: - epoch_end(self, epoch, logs) + epoch_end(self, epoch, logs, comet_experiment=self.comet_experiment) -def epoch_end(self, epoch, logs): +def epoch_end(self, epoch, logs, comet_experiment=None): # first epoch is 1, not 0 epoch = epoch + 1 @@ -92,50 +94,106 @@ def epoch_end(self, epoch, logs): yvals = {} for fi in glob.glob(str(cp_dir / "*.npz")): dd = np.load(fi) + os.remove(fi) keys_in_file = list(dd.keys()) for k in keys_in_file: - if k == "X": - continue if not (k in yvals): yvals[k] = [] yvals[k].append(dd[k]) yvals = {k: np.concatenate(v) for k, v in yvals.items()} - gen_px = yvals["gen_pt"] * yvals["gen_cos_phi"] - gen_py = yvals["gen_pt"] * yvals["gen_sin_phi"] - pred_px = yvals["pred_pt"] * yvals["pred_cos_phi"] - pred_py = yvals["pred_pt"] * yvals["pred_sin_phi"] - cand_px = yvals["cand_pt"] * yvals["cand_cos_phi"] - cand_py = yvals["cand_pt"] * yvals["cand_sin_phi"] + # compute the mask of badly-predicted particles and save to a file bad.npz + denom = np.maximum(yvals["gen_pt"], yvals["pred_pt"]) + ratio = np.abs(yvals["gen_pt"] - yvals["pred_pt"]) / denom + ratio[np.isnan(ratio)] = 0 + msk_bad = (ratio > 0.8)[:, :, 0] + yvals_bad = { + k: yvals[k][msk_bad] + for k in yvals.keys() + if (k.startswith("gen_") or k.startswith("pred_") or k.startswith("cand_")) + } + print("Number of bad particles: {}".format(len(yvals_bad["gen_cls"]))) + with open("{}/bad.npz".format(str(cp_dir)), "wb") as fi: + np.savez(fi, **yvals_bad) + + msk_gen = (np.argmax(yvals["gen_cls"], axis=-1, keepdims=True) != 0).astype(np.float32) + gen_px = yvals["gen_pt"] * yvals["gen_cos_phi"] * msk_gen + gen_py = yvals["gen_pt"] * yvals["gen_sin_phi"] * msk_gen + + msk_pred = (np.argmax(yvals["pred_cls"], axis=-1, keepdims=True) != 0).astype(np.float32) + pred_px = yvals["pred_pt"] * yvals["pred_cos_phi"] * msk_pred + pred_py = yvals["pred_pt"] * yvals["pred_sin_phi"] * msk_pred + + msk_cand = (np.argmax(yvals["cand_cls"], axis=-1, keepdims=True) != 0).astype(np.float32) + cand_px = yvals["cand_pt"] * yvals["cand_cos_phi"] * msk_cand + cand_py = yvals["cand_pt"] * yvals["cand_sin_phi"] * msk_cand gen_met = np.sqrt(np.sum(gen_px**2 + gen_py**2, axis=1)) pred_met = np.sqrt(np.sum(pred_px**2 + pred_py**2, axis=1)) cand_met = np.sqrt(np.sum(cand_px**2 + cand_py**2, axis=1)) with self.writer.as_default(): - jet_ratio = yvals["jets_pt_gen_to_pred"][:, 1] / yvals["jets_pt_gen_to_pred"][:, 0] + jet_ratio_pred = (yvals["jets_pt_gen_to_pred"][:, 1] - yvals["jets_pt_gen_to_pred"][:, 0]) / yvals[ + "jets_pt_gen_to_pred" + ][:, 0] + jet_ratio_cand = (yvals["jets_pt_gen_to_cand"][:, 1] - yvals["jets_pt_gen_to_cand"][:, 0]) / yvals[ + "jets_pt_gen_to_cand" + ][:, 0] + met_ratio_pred = (pred_met[:, 0] - gen_met[:, 0]) / gen_met[:, 0] + met_ratio_cand = (cand_met[:, 0] - gen_met[:, 0]) / gen_met[:, 0] plt.figure() - b = np.linspace(0, 5, 100) - plt.hist(yvals["jets_pt_gen_to_cand"][:, 1] / yvals["jets_pt_gen_to_cand"][:, 0], bins=b, histtype="step", lw=2) - plt.hist(yvals["jets_pt_gen_to_pred"][:, 1] / yvals["jets_pt_gen_to_pred"][:, 0], bins=b, histtype="step", lw=2) - plt.savefig(str(cp_dir / "jet_res.png"), bbox_inches="tight", dpi=100) + b = np.linspace(-2, 5, 100) + plt.hist(jet_ratio_cand, bins=b, histtype="step", lw=2, label="PF") + plt.hist(jet_ratio_pred, bins=b, histtype="step", lw=2, label="MLPF") + plt.xlabel("jet pT (reco-gen)/gen") + plt.ylabel("number of matched jets") + plt.legend(loc="best") + image_path = str(cp_dir / "jet_res.png") + plt.savefig(image_path, bbox_inches="tight", dpi=100) plt.clf() + if comet_experiment: + comet_experiment.log_image(image_path, step=epoch - 1) plt.figure() - b = np.linspace(0, 5, 100) - plt.hist(cand_met / gen_met, bins=b, histtype="step", lw=2) - plt.hist(pred_met / gen_met, bins=b, histtype="step", lw=2) - plt.savefig(str(cp_dir / "met_res.png"), bbox_inches="tight", dpi=100) + b = np.linspace(-1, 1, 100) + plt.hist(met_ratio_cand, bins=b, histtype="step", lw=2, label="PF") + plt.hist(met_ratio_pred, bins=b, histtype="step", lw=2, label="MLPF") + plt.xlabel("MET (reco-gen)/gen") + plt.ylabel("number of events") + plt.legend(loc="best") + image_path = str(cp_dir / "met_res.png") + plt.savefig(image_path, bbox_inches="tight", dpi=100) plt.clf() - - tf.summary.histogram("jet_pt_pred_over_gen", jet_ratio, step=epoch - 1, buckets=None, description=None) - tf.summary.scalar("jet_pt_pred_over_gen_mean", np.mean(jet_ratio), step=epoch - 1, description=None) - tf.summary.scalar("jet_pt_pred_over_gen_std", np.std(jet_ratio), step=epoch - 1, description=None) - - tf.summary.histogram("met_pred_over_gen", pred_met / gen_met, step=epoch - 1, buckets=None, description=None) - tf.summary.scalar("met_pred_over_gen_mean", np.mean(pred_met / gen_met), step=epoch - 1, description=None) - tf.summary.scalar("met_pred_over_gen_std", np.std(pred_met / gen_met), step=epoch - 1, description=None) + if comet_experiment: + comet_experiment.log_image(image_path, step=epoch - 1) + + jet_pred_wd = scipy.stats.wasserstein_distance( + yvals["jets_pt_gen_to_pred"][:, 0], yvals["jets_pt_gen_to_pred"][:, 1] + ) + jet_pred_p25 = np.percentile(jet_ratio_pred, 25) + jet_pred_p50 = np.percentile(jet_ratio_pred, 50) + jet_pred_p75 = np.percentile(jet_ratio_pred, 75) + jet_pred_iqr = jet_pred_p75 - jet_pred_p25 + + met_pred_wd = scipy.stats.wasserstein_distance(gen_met[:, 0], pred_met[:, 0]) + met_pred_p25 = np.percentile(met_ratio_pred, 25) + met_pred_p50 = np.percentile(met_ratio_pred, 50) + met_pred_p75 = np.percentile(met_ratio_pred, 75) + met_pred_iqr = met_pred_p75 - met_pred_p25 + + for name, val in [ + ("jet_wd", jet_pred_wd), + ("jet_iqr", jet_pred_iqr), + ("jet_med", jet_pred_p50), + ("met_wd", met_pred_wd), + ("met_iqr", met_pred_iqr), + ("met_med", met_pred_p50), + ]: + tf.summary.scalar(name, val, step=epoch - 1, description=None) + + if comet_experiment: + comet_experiment.log_metric(name, val, step=epoch - 1) def prepare_callbacks( @@ -179,6 +237,7 @@ def get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, h config, plot_freq=config["callbacks"]["plot_freq"], horovod_enabled=horovod_enabled, + comet_experiment=comet_experiment, ) callbacks += [cb] @@ -282,7 +341,7 @@ def deltar(a, b): # Given a model, evaluates it on each batch of the validation dataset # For each batch, save the inputs, the generator-level target, the candidate-level target, and the prediction -def eval_model(model, dataset, config, outdir): +def eval_model(model, dataset, config, outdir, jet_ptcut=5.0, jet_match_dr=0.1): ibatch = 0 @@ -291,6 +350,12 @@ def eval_model(model, dataset, config, outdir): for elem in tqdm(dataset, desc="Evaluating model"): y_pred = model.predict(elem["X"], verbose=False) + # mask the predictions where there was no predicted particles + msk = (np.argmax(y_pred["cls"], axis=-1, keepdims=True) != 0).astype(np.float32) + for k in y_pred.keys(): + if k != "cls": + y_pred[k] = y_pred[k] * msk + np_outfile = "{}/pred_batch{}.npz".format(outdir, ibatch) ygen = unpack_target(elem["ygen"], config["dataset"]["num_output_classes"], config) @@ -321,8 +386,8 @@ def eval_model(model, dataset, config, outdir): jets = cluster.inclusive_jets() jet_constituents = cluster.constituent_index() - jets_coll[typ] = jets[jets.pt > 5.0] - jets_const[typ] = jet_constituents[jets.pt > 5.0] + jets_coll[typ] = jets[jets.pt > jet_ptcut] + jets_const[typ] = jet_constituents[jets.pt > jet_ptcut] for key in ["pt", "eta", "phi", "energy"]: outs["jets_gen_{}".format(key)] = awkward.to_numpy(awkward.flatten(getattr(jets_coll["gen"], key))) @@ -333,7 +398,7 @@ def eval_model(model, dataset, config, outdir): cart = awkward.cartesian([jets_coll["gen"], jets_coll["pred"]], nested=True) jets_a, jets_b = awkward.unzip(cart) drs = deltar(jets_a, jets_b) - match_gen_to_pred = [awkward.where(d < 0.1) for d in drs] + match_gen_to_pred = [awkward.where(d < jet_match_dr) for d in drs] m0 = awkward.from_iter([m[0] for m in match_gen_to_pred]) m1 = awkward.from_iter([m[1] for m in match_gen_to_pred]) j1s = jets_coll["gen"][m0] @@ -346,7 +411,7 @@ def eval_model(model, dataset, config, outdir): cart = awkward.cartesian([jets_coll["gen"], jets_coll["cand"]], nested=True) jets_a, jets_b = awkward.unzip(cart) drs = deltar(jets_a, jets_b) - match_gen_to_pred = [awkward.where(d < 0.1) for d in drs] + match_gen_to_pred = [awkward.where(d < jet_match_dr) for d in drs] m0 = awkward.from_iter([m[0] for m in match_gen_to_pred]) m1 = awkward.from_iter([m[1] for m in match_gen_to_pred]) j1s = jets_coll["gen"][m0] diff --git a/mlpf/tfmodel/utils.py b/mlpf/tfmodel/utils.py index ca401f47c..98107ce3a 100644 --- a/mlpf/tfmodel/utils.py +++ b/mlpf/tfmodel/utils.py @@ -14,24 +14,34 @@ @tf.function -def histogram_2d(eta, phi, weights_px, weights_py, eta_range, phi_range, nbins, bin_dtype=tf.float32): +def histogram_2d(mask, eta, phi, weights_px, weights_py, eta_range, phi_range, nbins, bin_dtype=tf.float32): eta_bins = tf.histogram_fixed_width_bins(eta, eta_range, nbins=nbins, dtype=bin_dtype) phi_bins = tf.histogram_fixed_width_bins(phi, phi_range, nbins=nbins, dtype=bin_dtype) + # create empty histograms hist_px = tf.zeros((nbins, nbins), dtype=weights_px.dtype) hist_py = tf.zeros((nbins, nbins), dtype=weights_py.dtype) - indices = tf.transpose(tf.stack([phi_bins, eta_bins])) + indices = tf.transpose(tf.stack([eta_bins, phi_bins])) - hist_px = tf.tensor_scatter_nd_add(hist_px, indices, weights_px) - hist_py = tf.tensor_scatter_nd_add(hist_py, indices, weights_py) + indices_masked = tf.boolean_mask(indices, mask) + weights_px_masked = tf.boolean_mask(weights_px, mask) + weights_py_masked = tf.boolean_mask(weights_py, mask) + + hist_px = tf.tensor_scatter_nd_add(hist_px, indices=indices_masked, updates=weights_px_masked) + hist_py = tf.tensor_scatter_nd_add(hist_py, indices=indices_masked, updates=weights_py_masked) hist_pt = tf.sqrt(hist_px**2 + hist_py**2) return hist_pt @tf.function -def batched_histogram_2d(eta, phi, w_px, w_py, x_range, y_range, nbins, bin_dtype=tf.float32): - return tf.vectorized_map( - lambda a: histogram_2d(a[0], a[1], a[2], a[3], x_range, y_range, nbins, bin_dtype), (eta, phi, w_px, w_py) +def batched_histogram_2d(mask, eta, phi, w_px, w_py, x_range, y_range, nbins, bin_dtype=tf.float32): + return tf.map_fn( + lambda a: histogram_2d(a[0], a[1], a[2], a[3], a[4], x_range, y_range, nbins, bin_dtype), + (mask, eta, phi, w_px, w_py), + fn_output_signature=tf.TensorSpec( + [nbins, nbins], + dtype=tf.float32, + ), ) @@ -244,50 +254,6 @@ def get_tuner(cfg_hypertune, model_builder, outdir, recreate, strategy): ) -def compute_weights_invsqrt(X, y, w): - wn = tf.cast(tf.shape(w)[-1], tf.float32) / tf.sqrt(w) - wn *= tf.cast(X[:, 0] != 0, tf.float32) - # wn /= tf.reduce_sum(wn) - return X, y, wn - - -def compute_weights_none(X, y, w): - wn = tf.ones_like(w) - wn *= tf.cast(X[:, 0] != 0, tf.float32) - return X, y, wn - - -def make_weight_function(config): - def weight_func(X, y, w): - - w_signal_only = tf.where(y[:, 0] == 0, 0.0, 1.0) - w_signal_only *= tf.cast(X[:, 0] != 0, tf.float32) - - w_none = tf.ones_like(w) - w_none *= tf.cast(X[:, 0] != 0, tf.float32) - - w_invsqrt = tf.cast(tf.shape(w)[-1], tf.float32) / tf.sqrt(w) - w_invsqrt *= tf.cast(X[:, 0] != 0, tf.float32) - - w_signal_only_invsqrt = tf.where(y[:, 0] == 0, 0.0, tf.cast(tf.shape(w)[-1], tf.float32) / tf.sqrt(w)) - w_signal_only_invsqrt *= tf.cast(X[:, 0] != 0, tf.float32) - - weight_d = { - "none": w_none, - "signal_only": w_signal_only, - "signal_only_inverse_sqrt": w_signal_only_invsqrt, - "inverse_sqrt": w_invsqrt, - } - - ret_w = {} - for loss_component, weight_type in config["sample_weights"].items(): - ret_w[loss_component] = weight_d[weight_type] - - return X, y, ret_w - - return weight_func - - def targets_multi_output(num_output_classes): def func(X, y, w): @@ -469,12 +435,13 @@ def sliced_wasserstein_loss(y_true, y_pred, num_projections=1000): @tf.function -def hist_loss_2d(y_true, y_pred): +def hist_2d_loss(y_true, y_pred): eta_true = y_true[..., 2] eta_pred = y_pred[..., 2] - phi_true = tf.math.atan2(y_true[..., 3], y_true[..., 4]) - phi_pred = tf.math.atan2(y_pred[..., 3], y_pred[..., 4]) + + sin_phi_true = y_true[..., 3] + sin_phi_pred = y_pred[..., 3] pt_true = y_true[..., 0] pt_pred = y_pred[..., 0] @@ -484,12 +451,30 @@ def hist_loss_2d(y_true, y_pred): px_pred = pt_pred * y_pred[..., 4] py_pred = pt_pred * y_pred[..., 3] + mask = eta_true != 0.0 + + # bin in (eta, sin_phi), as calculating phi=atan2(sin_phi, cos_phi) + # introduces a numerical instability which can lead to NaN. pt_hist_true = batched_histogram_2d( - eta_true, phi_true, px_true, py_true, tf.cast([-6.0, 6.0], tf.float32), tf.cast([-4.0, 4.0], tf.float32), 20 + mask, + eta_true, + sin_phi_true, + px_true, + py_true, + tf.cast([-6.0, 6.0], tf.float32), + tf.cast([-1.0, 1.0], tf.float32), + 20, ) pt_hist_pred = batched_histogram_2d( - eta_pred, phi_pred, px_pred, py_pred, tf.cast([-6.0, 6.0], tf.float32), tf.cast([-4.0, 4.0], tf.float32), 20 + mask, + eta_pred, + sin_phi_pred, + px_pred, + py_pred, + tf.cast([-6.0, 6.0], tf.float32), + tf.cast([-1.0, 1.0], tf.float32), + 20, ) mse = tf.math.sqrt(tf.reduce_mean((pt_hist_true - pt_hist_pred) ** 2, axis=[-1, -2])) @@ -552,24 +537,42 @@ def batched_jet_reco(px, py, jet_idx, max_jets): ) +# y_true: [nbatch, nptcl, 5] array of true particle properties. +# y_pred: [nbatch, nptcl, 5] array of predicted particle properties +# last dim corresponds to [pt, energy, eta, sin_phi, cos_phi, gen_jet_idx] +# max_jets: integer of the max number of jets to consider +# returns: dict of true and predicted jet pts. @tf.function -def gen_jet_loss(y_true, y_pred): +def compute_jet_pt(y_true, y_pred, max_jets=201): y = {} y["true"] = y_true y["pred"] = y_pred jet_pt = {} - max_jets = 201 jet_idx = tf.cast(y["true"][..., 5], dtype=tf.int32) for typ in ["true", "pred"]: px = y[typ][..., 0] * y[typ][..., 4] py = y[typ][..., 0] * y[typ][..., 3] jet_pt[typ] = batched_jet_reco(px, py, jet_idx, max_jets) + return jet_pt + +@tf.function +def gen_jet_mse_loss(y_true, y_pred): + + jet_pt = compute_jet_pt(y_true, y_pred) mse = tf.math.sqrt(tf.reduce_mean((jet_pt["true"] - jet_pt["pred"]) ** 2, axis=[-1, -2])) return mse +@tf.function +def gen_jet_logcosh_loss(y_true, y_pred): + + jet_pt = compute_jet_pt(y_true, y_pred) + loss = tf.keras.losses.log_cosh(jet_pt["true"], jet_pt["pred"]) + return loss + + def get_loss_dict(config): cls_loss = get_class_loss(config) @@ -593,16 +596,19 @@ def get_loss_dict(config): "energy": config["loss"]["energy_loss_coef"], } + if config["loss"]["event_loss"] != "none": + loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"] + if config["loss"]["event_loss"] == "sliced_wasserstein": loss_dict["pt_e_eta_phi"] = sliced_wasserstein_loss - loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"] if config["loss"]["event_loss"] == "hist_2d": - loss_dict["pt_e_eta_phi"] = hist_loss_2d - loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"] + loss_dict["pt_e_eta_phi"] = hist_2d_loss - if config["loss"]["event_loss"] == "gen_jet": - loss_dict["pt_e_eta_phi"] = gen_jet_loss - loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"] + if config["loss"]["event_loss"] == "gen_jet_mse": + loss_dict["pt_e_eta_phi"] = gen_jet_mse_loss + + if config["loss"]["event_loss"] == "gen_jet_logcosh": + loss_dict["pt_e_eta_phi"] = gen_jet_logcosh_loss return loss_dict, loss_weights diff --git a/notebooks/cms-mlpf.ipynb b/notebooks/cms-mlpf.ipynb index 4d8ca90fc..a7ef8ac87 100644 --- a/notebooks/cms-mlpf.ipynb +++ b/notebooks/cms-mlpf.ipynb @@ -42,6 +42,16 @@ "mplhep.set_style(mplhep.styles.CMS)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "fab4be8d", + "metadata": {}, + "outputs": [], + "source": [ + "!ls -lrt ../experiments/" + ] + }, { "cell_type": "code", "execution_count": null, @@ -56,7 +66,7 @@ "# These can be overriden from the command line using `papermill cms-mlpf.ipynb -p path new/path/...`\n", "backend = \"tf\"\n", "sample = \"ttbar\"\n", - "path = \"../experiments/cms-gen_20220818_115500_239359.joosep-desktop-work/evaluation/epoch_10/cms_pf_ttbar/\"\n", + "path = \"../experiments/genjet_logcosh_20220828_091350_455904.gpu0.local/evaluation/epoch_49/cms_pf_ttbar/\"\n", "PAPERMILL_OUTPUT_PATH = \"./\"" ] }, @@ -214,6 +224,16 @@ " yvals_f[k] = yvals_f[k][..., -1]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f61f3b32", + "metadata": {}, + "outputs": [], + "source": [ + "yvals.keys()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -314,6 +334,20 @@ "plt.savefig(\"{}/jets_eta.pdf\".format(outpath), bbox_inches=\"tight\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "43053dc0", + "metadata": {}, + "outputs": [], + "source": [ + "def med_iqr(arr):\n", + " p25 = np.percentile(arr, 25)\n", + " p50 = np.percentile(arr, 50)\n", + " p75 = np.percentile(arr, 75)\n", + " return p50, p75 - p25" + ] + }, { "cell_type": "code", "execution_count": null, @@ -326,16 +360,12 @@ "fig = plt.figure()\n", "ax = plt.axes()\n", "vals = (yvals[\"jets_pt_gen_to_cand\"][:, 1] - yvals[\"jets_pt_gen_to_cand\"][:, 0]) / yvals[\"jets_pt_gen_to_cand\"][:, 0]\n", - "m = np.mean(vals)\n", - "s = np.std(vals)\n", - "plt.hist(vals, bins=b, histtype=\"step\", lw=2, label=r\"PF, $\\mu={:.2f}, \\sigma={:.2f}$\".format(m, s))\n", - "print(m, s)\n", + "p = med_iqr(vals)\n", + "plt.hist(vals, bins=b, histtype=\"step\", lw=2, label=r\"PF (M={:.2f}, IQR={:.2f})\".format(p[0], p[1]))\n", "\n", "vals = (yvals[\"jets_pt_gen_to_pred\"][:, 1] - yvals[\"jets_pt_gen_to_pred\"][:, 0]) / yvals[\"jets_pt_gen_to_pred\"][:, 0]\n", - "m = np.mean(vals)\n", - "s = np.std(vals)\n", - "plt.hist(vals, bins=b, histtype=\"step\", lw=2, label=r\"MLPF, $\\mu={:.2f}, \\sigma={:.2f}$\".format(m, s))\n", - "print(m, s)\n", + "p = med_iqr(vals)\n", + "plt.hist(vals, bins=b, histtype=\"step\", lw=2, label=r\"MLPF (M={:.2f}, IQR={:.2f})\".format(p[0], p[1]))\n", "\n", "plt.yscale(\"log\")\n", "plt.ylim(1, 1e7)\n", @@ -378,8 +408,9 @@ "sum_py = np.sum(yvals[\"cand_py\"], axis=1)\n", "cand_met = np.sqrt(sum_px**2 + sum_py**2)[:, 0]\n", "\n", - "sum_px = np.sum(yvals[\"pred_px\"], axis=1)\n", - "sum_py = np.sum(yvals[\"pred_py\"], axis=1)\n", + "msk_pred = (yvals[\"pred_cls_id\"] != 0).astype(np.float32)\n", + "sum_px = np.sum(yvals[\"pred_px\"] * msk_pred, axis=1)\n", + "sum_py = np.sum(yvals[\"pred_py\"] * msk_pred, axis=1)\n", "pred_met = np.sqrt(sum_px**2 + sum_py**2)[:, 0]" ] }, @@ -392,29 +423,106 @@ "source": [ "fig = plt.figure()\n", "ax = plt.axes()\n", - "b = np.linspace(-2, 100, 101)\n", + "b = np.linspace(-2, 20, 101)\n", "vals_a = (cand_met - gen_met) / gen_met\n", "vals_b = (pred_met - gen_met) / gen_met\n", - "plt.hist(\n", - " vals_a, bins=b, histtype=\"step\", lw=2, label=\"PF, $\\mu={:.2f}$, $\\sigma={:.2f}$\".format(np.mean(vals_a), np.std(vals_a))\n", - ")\n", + "\n", + "p = med_iqr(vals_a)\n", + "plt.hist(vals_a, bins=b, histtype=\"step\", lw=2, label=\"PF, $(M={:.2f}, IQR={:.2f})$\".format(p[0], p[1]))\n", + "\n", + "p = med_iqr(vals_b)\n", "plt.hist(\n", " vals_b,\n", " bins=b,\n", " histtype=\"step\",\n", " lw=2,\n", - " label=\"MLPF, $\\mu={:.2f}$, $\\sigma={:.2f}$\".format(np.mean(vals_b), np.std(vals_b)),\n", + " label=\"MLPF, $(M={:.2f}, IQR={:.2f})$\".format(p[0], p[1]),\n", ")\n", "plt.yscale(\"log\")\n", "cms_label(ax)\n", "sample_label(ax)\n", "plt.ylim(1, 1e5)\n", - "plt.legend(loc=(0.4, 0.7))\n", + "plt.legend(loc=(0.35, 0.7))\n", "plt.xlabel(r\"$\\frac{\\mathrm{MET}_{\\mathrm{reco}} - \\mathrm{MET}_{\\mathrm{gen}}}{\\mathrm{MET}_{\\mathrm{gen}}}$\")\n", "plt.ylabel(\"Number of events / bin\")\n", "plt.savefig(\"{}/met.pdf\".format(outpath), bbox_inches=\"tight\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "606b754b", + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\n", + " \"typ\",\n", + " \"pt\",\n", + " \"eta\",\n", + " \"phi\",\n", + " \"e\",\n", + " \"layer\",\n", + " \"depth\",\n", + " \"charge\",\n", + " \"trajpoint\",\n", + " \"eta_ecal\",\n", + " \"phi_ecal\",\n", + " \"eta_hcal\",\n", + " \"phi_hcal\",\n", + " \"muon_dt_hits\",\n", + " \"muon_csc_hits\",\n", + " \"muon_type\",\n", + " \"px\",\n", + " \"py\",\n", + " \"pz\",\n", + " \"deltap\",\n", + " \"sigmadeltap\",\n", + " \"gsf_electronseed_trkorecal\",\n", + " \"gsf_electronseed_dnn1\",\n", + " \"gsf_electronseed_dnn2\",\n", + " \"gsf_electronseed_dnn3\",\n", + " \"gsf_electronseed_dnn4\",\n", + " \"gsf_electronseed_dnn5\",\n", + " \"num_hits\",\n", + " \"cluster_flags\",\n", + " \"corr_energy\",\n", + " \"corr_energy_err\",\n", + " \"vx\",\n", + " \"vy\",\n", + " \"vz\",\n", + " \"pterror\",\n", + " \"etaerror\",\n", + " \"phierror\",\n", + " \"lambd\",\n", + " \"lambdaerror\",\n", + " \"theta\",\n", + " \"thetaerror\",\n", + " \"gen_cls_id\",\n", + " \"cand_cls_id\",\n", + " \"pred_cls_id\",\n", + " \"gen_pt\",\n", + " \"cand_pt\",\n", + " \"pred_pt\",\n", + "]\n", + "pt_diff = np.abs(yvals_f[\"gen_pt\"] - yvals_f[\"pred_pt\"])\n", + "msk = pt_diff > 10.0\n", + "df = pandas.DataFrame(\n", + " np.concatenate(\n", + " [\n", + " X_f[msk],\n", + " np.expand_dims(yvals_f[\"gen_cls_id\"][msk], axis=-1),\n", + " np.expand_dims(yvals_f[\"cand_cls_id\"][msk], axis=-1),\n", + " np.expand_dims(yvals_f[\"pred_cls_id\"][msk], axis=-1),\n", + " np.expand_dims(yvals_f[\"gen_pt\"][msk], axis=-1),\n", + " np.expand_dims(yvals_f[\"cand_pt\"][msk], axis=-1),\n", + " np.expand_dims(yvals_f[\"pred_pt\"][msk], axis=-1),\n", + " ],\n", + " axis=-1,\n", + " ),\n", + " columns=cols,\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1346,7 +1454,7 @@ "val_reg_loss = sum(\n", " [history[\"val_{}_loss\".format(l)].values for l in [\"energy\", \"pt\", \"eta\", \"sin_phi\", \"cos_phi\", \"charge\"]]\n", ")\n", - "p0 = loss_plot(reg_loss, val_reg_loss, margin=0.5)\n", + "p0 = loss_plot(reg_loss, val_reg_loss, margin=0.1)\n", "plt.ylabel(\"Regression loss\")\n", "plt.savefig(\"{}/reg_loss.pdf\".format(outpath), bbox_inches=\"tight\")" ] diff --git a/parameters/cms-gen.yaml b/parameters/cms-gen.yaml index ea66f094f..a6589ba55 100644 --- a/parameters/cms-gen.yaml +++ b/parameters/cms-gen.yaml @@ -23,6 +23,7 @@ dataset: padded_num_elem_size: 6400 #(pt, eta, sin phi, cos phi, E) num_momentum_outputs: 5 + cls_weight_by_pt: no loss: classification_loss_coef: 1.0 @@ -163,7 +164,6 @@ parameters: energy_num_layers: 2 layernorm: yes mask_reg_cls0: no - energy_multimodal: yes skip_connection: yes debug: no diff --git a/parameters/cms.yaml b/parameters/cms.yaml index a343a367c..8dff67ab7 100644 --- a/parameters/cms.yaml +++ b/parameters/cms.yaml @@ -23,6 +23,7 @@ dataset: padded_num_elem_size: 12000 #(pt, eta, sin phi, cos phi, E) num_momentum_outputs: 5 + cls_weight_by_pt: no loss: classification_loss_coef: 100.0 @@ -147,8 +148,6 @@ parameters: layernorm: yes mask_reg_cls0: no - energy_multimodal: no - skip_connection: yes debug: no diff --git a/parameters/delphes.yaml b/parameters/delphes.yaml index a15b5ab2f..82348a1bc 100644 --- a/parameters/delphes.yaml +++ b/parameters/delphes.yaml @@ -11,6 +11,7 @@ dataset: num_output_classes: 6 num_momentum_outputs: 5 padded_num_elem_size: 6400 + cls_weight_by_pt: no loss: classification_loss_coef: 1.0 diff --git a/parameters/test-eventloss/baseline-clspt.yaml b/parameters/test-eventloss/baseline-clspt.yaml new file mode 100644 index 000000000..a6730445a --- /dev/null +++ b/parameters/test-eventloss/baseline-clspt.yaml @@ -0,0 +1,232 @@ +backend: tensorflow + +dataset: + schema: cms + target_particles: gen + num_input_features: 41 + num_output_features: 7 +# NONE = 0, +# TRACK = 1, +# PS1 = 2, +# PS2 = 3, +# ECAL = 4, +# HCAL = 5, +# GSF = 6, +# BREM = 7, +# HFEM = 8, +# HFHAD = 9, +# SC = 10, +# HO = 11, + num_input_classes: 12 + #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8) + num_output_classes: 9 + padded_num_elem_size: 6400 + #(pt, eta, sin phi, cos phi, E) + num_momentum_outputs: 5 + cls_weight_by_pt: yes + +loss: + classification_loss_coef: 1.0 + charge_loss_coef: 1.0 + pt_loss_coef: 1.0 + eta_loss_coef: 1.0 + sin_phi_loss_coef: 1.0 + cos_phi_loss_coef: 1.0 + energy_loss_coef: 1.0 + energy_loss: + type: Huber + pt_loss: + type: Huber + sin_phi_loss: + type: Huber + delta: 0.1 + cos_phi_loss: + type: Huber + delta: 0.1 + eta_loss: + type: Huber + delta: 0.1 + event_loss: none + event_loss_coef: 0.0 + +tensorflow: + eager: no + +setup: + train: yes + weights: + weights_config: + lr: 0.0005 + num_events_validation: 100 + num_epochs: 50 + dtype: float32 + trainable: + classification_loss_type: sigmoid_focal_crossentropy + lr_schedule: none # exponentialdecay, onecycle, none + optimizer: adam # adam, adamw, sgd + horovod_enabled: False + +optimizer: + adam: + amsgrad: no + #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16 + pcgrad: yes + adamw: + amsgrad: yes + weight_decay: 0.001 + sgd: + nesterov: no + momentum: 0.9 + +# LR Schedules +exponentialdecay: + decay_steps: 2000 + decay_rate: 0.99 + staircase: yes +onecycle: + mom_min: 0.85 + mom_max: 0.95 + warmup_ratio: 0.3 + div_factor: 25.0 + final_div: 100000.0 + +parameters: + model: gnn_dense + input_encoding: cms + node_update_mode: concat + do_node_encoding: no + node_encoding_hidden_dim: 128 + + combined_graph_layer: + bin_size: 100 + max_num_bins: 200 + distance_dim: 64 + layernorm: yes + dropout: 0.0 + dist_activation: elu + ffn_dist_num_layers: 2 + ffn_dist_hidden_dim: 128 + + # MPNN + #kernel: + # type: NodePairTrainableKernel + # activation: elu + #num_node_messages: 1 + #node_message: + # type: NodeMessageLearnable + # output_dim: 64 + # hidden_dim: 128 + # num_layers: 2 + # activation: elu + #activation: elu + + # GCN + kernel: + type: NodePairGaussianKernel + dist_mult: 0.1 + clip_value_low: 0.0 + dist_norm: l2 + num_node_messages: 2 + node_message: + type: GHConvDense + output_dim: 128 + activation: elu + #if this is enabled, it will break float16 training + normalize_degrees: yes + activation: elu + + num_graph_layers_id: 2 + num_graph_layers_reg: 2 + output_decoding: + activation: elu + regression_use_classification: yes + dropout: 0.0 + + id_dim_decrease: yes + charge_dim_decrease: yes + pt_dim_decrease: yes + eta_dim_decrease: yes + phi_dim_decrease: yes + energy_dim_decrease: yes + + id_hidden_dim: 256 + charge_hidden_dim: 256 + pt_hidden_dim: 256 + eta_hidden_dim: 256 + phi_hidden_dim: 256 + energy_hidden_dim: 256 + + id_num_layers: 2 + charge_num_layers: 2 + pt_num_layers: 2 + eta_num_layers: 2 + phi_num_layers: 2 + energy_num_layers: 2 + layernorm: yes + mask_reg_cls0: no + + skip_connection: yes + debug: no + +timing: + num_ev: 100 + num_iter: 3 + +callbacks: + checkpoint: + monitor: "val_loss" + plot_freq: 1 + tensorboard: + dump_history: yes + hist_freq: 1 + +hypertune: + algorithm: hyperband # random, bayesian, hyperband + random: + objective: val_loss + max_trials: 100 + bayesian: + objective: val_loss + max_trials: 100 + num_initial_points: 2 + hyperband: + objective: val_loss + max_epochs: 10 + factor: 3 + iterations: 1 + executions_per_trial: 1 + +raytune: + local_dir: # Note: please specify an absolute path + sched: asha # asha, hyperband + search_alg: # bayes, bohb, hyperopt, nevergrad, scikit + default_metric: "val_loss" + default_mode: "min" + # Tune schedule specific parameters + asha: + max_t: 200 + reduction_factor: 4 + brackets: 1 + grace_period: 10 + hyperband: + max_t: 200 + reduction_factor: 4 + hyperopt: + n_random_steps: 10 + nevergrad: + n_random_steps: 10 + +train_test_datasets: + physical: + batch_per_gpu: 5 + datasets: + - cms_pf_ttbar + +validation_datasets: + - cms_pf_ttbar + +datasets: + cms_pf_ttbar: + version: 1.4.0 + data_dir: + manual_dir: diff --git a/parameters/test-eventloss/baseline-mask_reg_cls0.yaml b/parameters/test-eventloss/baseline-mask_reg_cls0.yaml new file mode 100644 index 000000000..3a59e47c3 --- /dev/null +++ b/parameters/test-eventloss/baseline-mask_reg_cls0.yaml @@ -0,0 +1,232 @@ +backend: tensorflow + +dataset: + schema: cms + target_particles: gen + num_input_features: 41 + num_output_features: 7 +# NONE = 0, +# TRACK = 1, +# PS1 = 2, +# PS2 = 3, +# ECAL = 4, +# HCAL = 5, +# GSF = 6, +# BREM = 7, +# HFEM = 8, +# HFHAD = 9, +# SC = 10, +# HO = 11, + num_input_classes: 12 + #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8) + num_output_classes: 9 + padded_num_elem_size: 6400 + #(pt, eta, sin phi, cos phi, E) + num_momentum_outputs: 5 + cls_weight_by_pt: no + +loss: + classification_loss_coef: 1.0 + charge_loss_coef: 1.0 + pt_loss_coef: 1.0 + eta_loss_coef: 1.0 + sin_phi_loss_coef: 1.0 + cos_phi_loss_coef: 1.0 + energy_loss_coef: 1.0 + energy_loss: + type: Huber + pt_loss: + type: Huber + sin_phi_loss: + type: Huber + delta: 0.1 + cos_phi_loss: + type: Huber + delta: 0.1 + eta_loss: + type: Huber + delta: 0.1 + event_loss: none + event_loss_coef: 0.0 + +tensorflow: + eager: no + +setup: + train: yes + weights: + weights_config: + lr: 0.0005 + num_events_validation: 100 + num_epochs: 50 + dtype: float32 + trainable: + classification_loss_type: sigmoid_focal_crossentropy + lr_schedule: none # exponentialdecay, onecycle, none + optimizer: adam # adam, adamw, sgd + horovod_enabled: False + +optimizer: + adam: + amsgrad: no + #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16 + pcgrad: yes + adamw: + amsgrad: yes + weight_decay: 0.001 + sgd: + nesterov: no + momentum: 0.9 + +# LR Schedules +exponentialdecay: + decay_steps: 2000 + decay_rate: 0.99 + staircase: yes +onecycle: + mom_min: 0.85 + mom_max: 0.95 + warmup_ratio: 0.3 + div_factor: 25.0 + final_div: 100000.0 + +parameters: + model: gnn_dense + input_encoding: cms + node_update_mode: concat + do_node_encoding: no + node_encoding_hidden_dim: 128 + + combined_graph_layer: + bin_size: 100 + max_num_bins: 200 + distance_dim: 64 + layernorm: yes + dropout: 0.0 + dist_activation: elu + ffn_dist_num_layers: 2 + ffn_dist_hidden_dim: 128 + + # MPNN + #kernel: + # type: NodePairTrainableKernel + # activation: elu + #num_node_messages: 1 + #node_message: + # type: NodeMessageLearnable + # output_dim: 64 + # hidden_dim: 128 + # num_layers: 2 + # activation: elu + #activation: elu + + # GCN + kernel: + type: NodePairGaussianKernel + dist_mult: 0.1 + clip_value_low: 0.0 + dist_norm: l2 + num_node_messages: 2 + node_message: + type: GHConvDense + output_dim: 128 + activation: elu + #if this is enabled, it will break float16 training + normalize_degrees: yes + activation: elu + + num_graph_layers_id: 2 + num_graph_layers_reg: 2 + output_decoding: + activation: elu + regression_use_classification: yes + dropout: 0.0 + + id_dim_decrease: yes + charge_dim_decrease: yes + pt_dim_decrease: yes + eta_dim_decrease: yes + phi_dim_decrease: yes + energy_dim_decrease: yes + + id_hidden_dim: 256 + charge_hidden_dim: 256 + pt_hidden_dim: 256 + eta_hidden_dim: 256 + phi_hidden_dim: 256 + energy_hidden_dim: 256 + + id_num_layers: 2 + charge_num_layers: 2 + pt_num_layers: 2 + eta_num_layers: 2 + phi_num_layers: 2 + energy_num_layers: 2 + layernorm: yes + mask_reg_cls0: yes + + skip_connection: yes + debug: no + +timing: + num_ev: 100 + num_iter: 3 + +callbacks: + checkpoint: + monitor: "val_loss" + plot_freq: 1 + tensorboard: + dump_history: yes + hist_freq: 1 + +hypertune: + algorithm: hyperband # random, bayesian, hyperband + random: + objective: val_loss + max_trials: 100 + bayesian: + objective: val_loss + max_trials: 100 + num_initial_points: 2 + hyperband: + objective: val_loss + max_epochs: 10 + factor: 3 + iterations: 1 + executions_per_trial: 1 + +raytune: + local_dir: # Note: please specify an absolute path + sched: asha # asha, hyperband + search_alg: # bayes, bohb, hyperopt, nevergrad, scikit + default_metric: "val_loss" + default_mode: "min" + # Tune schedule specific parameters + asha: + max_t: 200 + reduction_factor: 4 + brackets: 1 + grace_period: 10 + hyperband: + max_t: 200 + reduction_factor: 4 + hyperopt: + n_random_steps: 10 + nevergrad: + n_random_steps: 10 + +train_test_datasets: + physical: + batch_per_gpu: 5 + datasets: + - cms_pf_ttbar + +validation_datasets: + - cms_pf_ttbar + +datasets: + cms_pf_ttbar: + version: 1.4.0 + data_dir: + manual_dir: diff --git a/parameters/test-eventloss/baseline.yaml b/parameters/test-eventloss/baseline.yaml index a06aaac63..52dc9acef 100644 --- a/parameters/test-eventloss/baseline.yaml +++ b/parameters/test-eventloss/baseline.yaml @@ -23,6 +23,7 @@ dataset: padded_num_elem_size: 6400 #(pt, eta, sin phi, cos phi, E) num_momentum_outputs: 5 + cls_weight_by_pt: no loss: classification_loss_coef: 1.0 @@ -55,9 +56,9 @@ setup: train: yes weights: weights_config: - lr: 0.00005 + lr: 0.0005 num_events_validation: 100 - num_epochs: 20 + num_epochs: 50 dtype: float32 trainable: classification_loss_type: sigmoid_focal_crossentropy @@ -134,17 +135,13 @@ parameters: normalize_degrees: yes activation: elu - num_graph_layers_id: 3 - num_graph_layers_reg: 3 + num_graph_layers_id: 2 + num_graph_layers_reg: 2 output_decoding: activation: elu regression_use_classification: yes dropout: 0.0 - pt_skip_gate: no - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -166,8 +163,7 @@ parameters: phi_num_layers: 2 energy_num_layers: 2 layernorm: yes - mask_reg_cls0: yes - energy_multimodal: yes + mask_reg_cls0: no skip_connection: yes debug: no @@ -231,18 +227,6 @@ validation_datasets: datasets: cms_pf_ttbar: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_ztt: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd_high_pt: - version: 1.3.1 + version: 1.4.0 data_dir: manual_dir: diff --git a/parameters/test-eventloss/genjet_logcosh.yaml b/parameters/test-eventloss/genjet_logcosh.yaml new file mode 100644 index 000000000..5f9120dcd --- /dev/null +++ b/parameters/test-eventloss/genjet_logcosh.yaml @@ -0,0 +1,232 @@ +backend: tensorflow + +dataset: + schema: cms + target_particles: gen + num_input_features: 41 + num_output_features: 7 +# NONE = 0, +# TRACK = 1, +# PS1 = 2, +# PS2 = 3, +# ECAL = 4, +# HCAL = 5, +# GSF = 6, +# BREM = 7, +# HFEM = 8, +# HFHAD = 9, +# SC = 10, +# HO = 11, + num_input_classes: 12 + #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8) + num_output_classes: 9 + padded_num_elem_size: 6400 + #(pt, eta, sin phi, cos phi, E) + num_momentum_outputs: 5 + cls_weight_by_pt: no + +loss: + classification_loss_coef: 1.0 + charge_loss_coef: 1.0 + pt_loss_coef: 1.0 + eta_loss_coef: 1.0 + sin_phi_loss_coef: 1.0 + cos_phi_loss_coef: 1.0 + energy_loss_coef: 1.0 + energy_loss: + type: Huber + pt_loss: + type: Huber + sin_phi_loss: + type: Huber + delta: 0.1 + cos_phi_loss: + type: Huber + delta: 0.1 + eta_loss: + type: Huber + delta: 0.1 + event_loss: gen_jet_logcosh + event_loss_coef: 1.0 + +tensorflow: + eager: no + +setup: + train: yes + weights: + weights_config: + lr: 0.0005 + num_events_validation: 100 + num_epochs: 50 + dtype: float32 + trainable: + classification_loss_type: sigmoid_focal_crossentropy + lr_schedule: none # exponentialdecay, onecycle, none + optimizer: adam # adam, adamw, sgd + horovod_enabled: False + +optimizer: + adam: + amsgrad: no + #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16 + pcgrad: yes + adamw: + amsgrad: yes + weight_decay: 0.001 + sgd: + nesterov: no + momentum: 0.9 + +# LR Schedules +exponentialdecay: + decay_steps: 2000 + decay_rate: 0.99 + staircase: yes +onecycle: + mom_min: 0.85 + mom_max: 0.95 + warmup_ratio: 0.3 + div_factor: 25.0 + final_div: 100000.0 + +parameters: + model: gnn_dense + input_encoding: cms + node_update_mode: concat + do_node_encoding: no + node_encoding_hidden_dim: 128 + + combined_graph_layer: + bin_size: 100 + max_num_bins: 200 + distance_dim: 64 + layernorm: yes + dropout: 0.0 + dist_activation: elu + ffn_dist_num_layers: 2 + ffn_dist_hidden_dim: 128 + + # MPNN + #kernel: + # type: NodePairTrainableKernel + # activation: elu + #num_node_messages: 1 + #node_message: + # type: NodeMessageLearnable + # output_dim: 64 + # hidden_dim: 128 + # num_layers: 2 + # activation: elu + #activation: elu + + # GCN + kernel: + type: NodePairGaussianKernel + dist_mult: 0.1 + clip_value_low: 0.0 + dist_norm: l2 + num_node_messages: 2 + node_message: + type: GHConvDense + output_dim: 128 + activation: elu + #if this is enabled, it will break float16 training + normalize_degrees: yes + activation: elu + + num_graph_layers_id: 2 + num_graph_layers_reg: 2 + output_decoding: + activation: elu + regression_use_classification: yes + dropout: 0.0 + + id_dim_decrease: yes + charge_dim_decrease: yes + pt_dim_decrease: yes + eta_dim_decrease: yes + phi_dim_decrease: yes + energy_dim_decrease: yes + + id_hidden_dim: 256 + charge_hidden_dim: 256 + pt_hidden_dim: 256 + eta_hidden_dim: 256 + phi_hidden_dim: 256 + energy_hidden_dim: 256 + + id_num_layers: 2 + charge_num_layers: 2 + pt_num_layers: 2 + eta_num_layers: 2 + phi_num_layers: 2 + energy_num_layers: 2 + layernorm: yes + mask_reg_cls0: no + + skip_connection: yes + debug: no + +timing: + num_ev: 100 + num_iter: 3 + +callbacks: + checkpoint: + monitor: "val_loss" + plot_freq: 1 + tensorboard: + dump_history: yes + hist_freq: 1 + +hypertune: + algorithm: hyperband # random, bayesian, hyperband + random: + objective: val_loss + max_trials: 100 + bayesian: + objective: val_loss + max_trials: 100 + num_initial_points: 2 + hyperband: + objective: val_loss + max_epochs: 10 + factor: 3 + iterations: 1 + executions_per_trial: 1 + +raytune: + local_dir: # Note: please specify an absolute path + sched: asha # asha, hyperband + search_alg: # bayes, bohb, hyperopt, nevergrad, scikit + default_metric: "val_loss" + default_mode: "min" + # Tune schedule specific parameters + asha: + max_t: 200 + reduction_factor: 4 + brackets: 1 + grace_period: 10 + hyperband: + max_t: 200 + reduction_factor: 4 + hyperopt: + n_random_steps: 10 + nevergrad: + n_random_steps: 10 + +train_test_datasets: + physical: + batch_per_gpu: 5 + datasets: + - cms_pf_ttbar + +validation_datasets: + - cms_pf_ttbar + +datasets: + cms_pf_ttbar: + version: 1.4.0 + data_dir: + manual_dir: diff --git a/parameters/test-eventloss/genjet_logcosh_mask_reg_cls0.yaml b/parameters/test-eventloss/genjet_logcosh_mask_reg_cls0.yaml new file mode 100644 index 000000000..c326d1763 --- /dev/null +++ b/parameters/test-eventloss/genjet_logcosh_mask_reg_cls0.yaml @@ -0,0 +1,232 @@ +backend: tensorflow + +dataset: + schema: cms + target_particles: gen + num_input_features: 41 + num_output_features: 7 +# NONE = 0, +# TRACK = 1, +# PS1 = 2, +# PS2 = 3, +# ECAL = 4, +# HCAL = 5, +# GSF = 6, +# BREM = 7, +# HFEM = 8, +# HFHAD = 9, +# SC = 10, +# HO = 11, + num_input_classes: 12 + #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8) + num_output_classes: 9 + padded_num_elem_size: 6400 + #(pt, eta, sin phi, cos phi, E) + num_momentum_outputs: 5 + cls_weight_by_pt: no + +loss: + classification_loss_coef: 1.0 + charge_loss_coef: 1.0 + pt_loss_coef: 1.0 + eta_loss_coef: 1.0 + sin_phi_loss_coef: 1.0 + cos_phi_loss_coef: 1.0 + energy_loss_coef: 1.0 + energy_loss: + type: Huber + pt_loss: + type: Huber + sin_phi_loss: + type: Huber + delta: 0.1 + cos_phi_loss: + type: Huber + delta: 0.1 + eta_loss: + type: Huber + delta: 0.1 + event_loss: gen_jet_logcosh + event_loss_coef: 1.0 + +tensorflow: + eager: no + +setup: + train: yes + weights: + weights_config: + lr: 0.0005 + num_events_validation: 100 + num_epochs: 50 + dtype: float32 + trainable: + classification_loss_type: sigmoid_focal_crossentropy + lr_schedule: none # exponentialdecay, onecycle, none + optimizer: adam # adam, adamw, sgd + horovod_enabled: False + +optimizer: + adam: + amsgrad: no + #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16 + pcgrad: yes + adamw: + amsgrad: yes + weight_decay: 0.001 + sgd: + nesterov: no + momentum: 0.9 + +# LR Schedules +exponentialdecay: + decay_steps: 2000 + decay_rate: 0.99 + staircase: yes +onecycle: + mom_min: 0.85 + mom_max: 0.95 + warmup_ratio: 0.3 + div_factor: 25.0 + final_div: 100000.0 + +parameters: + model: gnn_dense + input_encoding: cms + node_update_mode: concat + do_node_encoding: no + node_encoding_hidden_dim: 128 + + combined_graph_layer: + bin_size: 100 + max_num_bins: 200 + distance_dim: 64 + layernorm: yes + dropout: 0.0 + dist_activation: elu + ffn_dist_num_layers: 2 + ffn_dist_hidden_dim: 128 + + # MPNN + #kernel: + # type: NodePairTrainableKernel + # activation: elu + #num_node_messages: 1 + #node_message: + # type: NodeMessageLearnable + # output_dim: 64 + # hidden_dim: 128 + # num_layers: 2 + # activation: elu + #activation: elu + + # GCN + kernel: + type: NodePairGaussianKernel + dist_mult: 0.1 + clip_value_low: 0.0 + dist_norm: l2 + num_node_messages: 2 + node_message: + type: GHConvDense + output_dim: 128 + activation: elu + #if this is enabled, it will break float16 training + normalize_degrees: yes + activation: elu + + num_graph_layers_id: 2 + num_graph_layers_reg: 2 + output_decoding: + activation: elu + regression_use_classification: yes + dropout: 0.0 + + id_dim_decrease: yes + charge_dim_decrease: yes + pt_dim_decrease: yes + eta_dim_decrease: yes + phi_dim_decrease: yes + energy_dim_decrease: yes + + id_hidden_dim: 256 + charge_hidden_dim: 256 + pt_hidden_dim: 256 + eta_hidden_dim: 256 + phi_hidden_dim: 256 + energy_hidden_dim: 256 + + id_num_layers: 2 + charge_num_layers: 2 + pt_num_layers: 2 + eta_num_layers: 2 + phi_num_layers: 2 + energy_num_layers: 2 + layernorm: yes + mask_reg_cls0: yes + + skip_connection: yes + debug: no + +timing: + num_ev: 100 + num_iter: 3 + +callbacks: + checkpoint: + monitor: "val_loss" + plot_freq: 1 + tensorboard: + dump_history: yes + hist_freq: 1 + +hypertune: + algorithm: hyperband # random, bayesian, hyperband + random: + objective: val_loss + max_trials: 100 + bayesian: + objective: val_loss + max_trials: 100 + num_initial_points: 2 + hyperband: + objective: val_loss + max_epochs: 10 + factor: 3 + iterations: 1 + executions_per_trial: 1 + +raytune: + local_dir: # Note: please specify an absolute path + sched: asha # asha, hyperband + search_alg: # bayes, bohb, hyperopt, nevergrad, scikit + default_metric: "val_loss" + default_mode: "min" + # Tune schedule specific parameters + asha: + max_t: 200 + reduction_factor: 4 + brackets: 1 + grace_period: 10 + hyperband: + max_t: 200 + reduction_factor: 4 + hyperopt: + n_random_steps: 10 + nevergrad: + n_random_steps: 10 + +train_test_datasets: + physical: + batch_per_gpu: 5 + datasets: + - cms_pf_ttbar + +validation_datasets: + - cms_pf_ttbar + +datasets: + cms_pf_ttbar: + version: 1.4.0 + data_dir: + manual_dir: diff --git a/parameters/test-eventloss/genjet_mse.yaml b/parameters/test-eventloss/genjet_mse.yaml new file mode 100644 index 000000000..9c5c7e70d --- /dev/null +++ b/parameters/test-eventloss/genjet_mse.yaml @@ -0,0 +1,232 @@ +backend: tensorflow + +dataset: + schema: cms + target_particles: gen + num_input_features: 41 + num_output_features: 7 +# NONE = 0, +# TRACK = 1, +# PS1 = 2, +# PS2 = 3, +# ECAL = 4, +# HCAL = 5, +# GSF = 6, +# BREM = 7, +# HFEM = 8, +# HFHAD = 9, +# SC = 10, +# HO = 11, + num_input_classes: 12 + #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8) + num_output_classes: 9 + padded_num_elem_size: 6400 + #(pt, eta, sin phi, cos phi, E) + num_momentum_outputs: 5 + cls_weight_by_pt: no + +loss: + classification_loss_coef: 1.0 + charge_loss_coef: 1.0 + pt_loss_coef: 1.0 + eta_loss_coef: 1.0 + sin_phi_loss_coef: 1.0 + cos_phi_loss_coef: 1.0 + energy_loss_coef: 1.0 + energy_loss: + type: Huber + pt_loss: + type: Huber + sin_phi_loss: + type: Huber + delta: 0.1 + cos_phi_loss: + type: Huber + delta: 0.1 + eta_loss: + type: Huber + delta: 0.1 + event_loss: gen_jet_mse + event_loss_coef: 1.0 + +tensorflow: + eager: no + +setup: + train: yes + weights: + weights_config: + lr: 0.0005 + num_events_validation: 100 + num_epochs: 50 + dtype: float32 + trainable: + classification_loss_type: sigmoid_focal_crossentropy + lr_schedule: none # exponentialdecay, onecycle, none + optimizer: adam # adam, adamw, sgd + horovod_enabled: False + +optimizer: + adam: + amsgrad: no + #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16 + pcgrad: yes + adamw: + amsgrad: yes + weight_decay: 0.001 + sgd: + nesterov: no + momentum: 0.9 + +# LR Schedules +exponentialdecay: + decay_steps: 2000 + decay_rate: 0.99 + staircase: yes +onecycle: + mom_min: 0.85 + mom_max: 0.95 + warmup_ratio: 0.3 + div_factor: 25.0 + final_div: 100000.0 + +parameters: + model: gnn_dense + input_encoding: cms + node_update_mode: concat + do_node_encoding: no + node_encoding_hidden_dim: 128 + + combined_graph_layer: + bin_size: 100 + max_num_bins: 200 + distance_dim: 64 + layernorm: yes + dropout: 0.0 + dist_activation: elu + ffn_dist_num_layers: 2 + ffn_dist_hidden_dim: 128 + + # MPNN + #kernel: + # type: NodePairTrainableKernel + # activation: elu + #num_node_messages: 1 + #node_message: + # type: NodeMessageLearnable + # output_dim: 64 + # hidden_dim: 128 + # num_layers: 2 + # activation: elu + #activation: elu + + # GCN + kernel: + type: NodePairGaussianKernel + dist_mult: 0.1 + clip_value_low: 0.0 + dist_norm: l2 + num_node_messages: 2 + node_message: + type: GHConvDense + output_dim: 128 + activation: elu + #if this is enabled, it will break float16 training + normalize_degrees: yes + activation: elu + + num_graph_layers_id: 2 + num_graph_layers_reg: 2 + output_decoding: + activation: elu + regression_use_classification: yes + dropout: 0.0 + + id_dim_decrease: yes + charge_dim_decrease: yes + pt_dim_decrease: yes + eta_dim_decrease: yes + phi_dim_decrease: yes + energy_dim_decrease: yes + + id_hidden_dim: 256 + charge_hidden_dim: 256 + pt_hidden_dim: 256 + eta_hidden_dim: 256 + phi_hidden_dim: 256 + energy_hidden_dim: 256 + + id_num_layers: 2 + charge_num_layers: 2 + pt_num_layers: 2 + eta_num_layers: 2 + phi_num_layers: 2 + energy_num_layers: 2 + layernorm: yes + mask_reg_cls0: no + + skip_connection: yes + debug: no + +timing: + num_ev: 100 + num_iter: 3 + +callbacks: + checkpoint: + monitor: "val_loss" + plot_freq: 1 + tensorboard: + dump_history: yes + hist_freq: 1 + +hypertune: + algorithm: hyperband # random, bayesian, hyperband + random: + objective: val_loss + max_trials: 100 + bayesian: + objective: val_loss + max_trials: 100 + num_initial_points: 2 + hyperband: + objective: val_loss + max_epochs: 10 + factor: 3 + iterations: 1 + executions_per_trial: 1 + +raytune: + local_dir: # Note: please specify an absolute path + sched: asha # asha, hyperband + search_alg: # bayes, bohb, hyperopt, nevergrad, scikit + default_metric: "val_loss" + default_mode: "min" + # Tune schedule specific parameters + asha: + max_t: 200 + reduction_factor: 4 + brackets: 1 + grace_period: 10 + hyperband: + max_t: 200 + reduction_factor: 4 + hyperopt: + n_random_steps: 10 + nevergrad: + n_random_steps: 10 + +train_test_datasets: + physical: + batch_per_gpu: 5 + datasets: + - cms_pf_ttbar + +validation_datasets: + - cms_pf_ttbar + +datasets: + cms_pf_ttbar: + version: 1.4.0 + data_dir: + manual_dir: diff --git a/parameters/test-eventloss/h2d.yaml b/parameters/test-eventloss/h2d.yaml index 085d2959f..71281af1e 100644 --- a/parameters/test-eventloss/h2d.yaml +++ b/parameters/test-eventloss/h2d.yaml @@ -23,6 +23,7 @@ dataset: padded_num_elem_size: 6400 #(pt, eta, sin phi, cos phi, E) num_momentum_outputs: 5 + cls_weight_by_pt: no loss: classification_loss_coef: 1.0 @@ -46,7 +47,7 @@ loss: type: Huber delta: 0.1 event_loss: hist_2d - event_loss_coef: 0.0001 + event_loss_coef: 1.0 tensorflow: eager: no @@ -55,9 +56,9 @@ setup: train: yes weights: weights_config: - lr: 0.00005 + lr: 0.0005 num_events_validation: 100 - num_epochs: 20 + num_epochs: 50 dtype: float32 trainable: classification_loss_type: sigmoid_focal_crossentropy @@ -134,17 +135,13 @@ parameters: normalize_degrees: yes activation: elu - num_graph_layers_id: 3 - num_graph_layers_reg: 3 + num_graph_layers_id: 2 + num_graph_layers_reg: 2 output_decoding: activation: elu regression_use_classification: yes dropout: 0.0 - pt_skip_gate: no - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -166,8 +163,7 @@ parameters: phi_num_layers: 2 energy_num_layers: 2 layernorm: yes - mask_reg_cls0: yes - energy_multimodal: yes + mask_reg_cls0: no skip_connection: yes debug: no @@ -231,18 +227,6 @@ validation_datasets: datasets: cms_pf_ttbar: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_ztt: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd_high_pt: - version: 1.3.1 + version: 1.4.0 data_dir: manual_dir: diff --git a/parameters/test-eventloss/swd.yaml b/parameters/test-eventloss/swd.yaml index fa3a66f4e..c0cf78a2f 100644 --- a/parameters/test-eventloss/swd.yaml +++ b/parameters/test-eventloss/swd.yaml @@ -23,6 +23,7 @@ dataset: padded_num_elem_size: 6400 #(pt, eta, sin phi, cos phi, E) num_momentum_outputs: 5 + cls_weight_by_pt: no loss: classification_loss_coef: 1.0 @@ -46,7 +47,7 @@ loss: type: Huber delta: 0.1 event_loss: sliced_wasserstein - event_loss_coef: 0.0001 + event_loss_coef: 1.0 tensorflow: eager: no @@ -55,9 +56,9 @@ setup: train: yes weights: weights_config: - lr: 0.00005 + lr: 0.0005 num_events_validation: 100 - num_epochs: 20 + num_epochs: 50 dtype: float32 trainable: classification_loss_type: sigmoid_focal_crossentropy @@ -134,17 +135,13 @@ parameters: normalize_degrees: yes activation: elu - num_graph_layers_id: 3 - num_graph_layers_reg: 3 + num_graph_layers_id: 2 + num_graph_layers_reg: 2 output_decoding: activation: elu regression_use_classification: yes dropout: 0.0 - pt_skip_gate: no - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -166,8 +163,7 @@ parameters: phi_num_layers: 2 energy_num_layers: 2 layernorm: yes - mask_reg_cls0: yes - energy_multimodal: yes + mask_reg_cls0: no skip_connection: yes debug: no @@ -231,18 +227,6 @@ validation_datasets: datasets: cms_pf_ttbar: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_ztt: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd: - version: 1.3.1 - data_dir: - manual_dir: - cms_pf_qcd_high_pt: - version: 1.3.1 + version: 1.4.0 data_dir: manual_dir: diff --git a/requirements.txt b/requirements.txt index 8e1cc106e..926c39655 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ awkward boost_histogram click +comet-ml fastjet keras keras-tuner diff --git a/scripts/generate_tfds.sh b/scripts/generate_tfds.sh index 3372c44be..933672f72 100755 --- a/scripts/generate_tfds.sh +++ b/scripts/generate_tfds.sh @@ -8,5 +8,5 @@ IMG=/home/software/singularity/tf-2.9.0.simg singularity exec --env PYTHONPATH=$PYTHONPATH -B /hdfs -B /scratch-persistent $IMG tfds build hep_tfds/heptfds/cms_pf/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/ttbar.log & singularity exec --env PYTHONPATH=$PYTHONPATH -B /hdfs -B /scratch-persistent $IMG tfds build hep_tfds/heptfds/cms_pf/qcd --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/qcd.log & singularity exec --env PYTHONPATH=$PYTHONPATH -B /hdfs -B /scratch-persistent $IMG tfds build hep_tfds/heptfds/cms_pf/ztt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/ztt.log & -singularity exec --env PYTHONPATH=$PYTHONPATH -B /hdfs -B /scratch-persistent $IMG tfds build hep_tfds/heptfds/cms_pf/qcd_highpt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/qcd_highpt.log & +singularity exec --env PYTHONPATH=$PYTHONPATH -B /hdfs -B /scratch-persistent $IMG tfds build hep_tfds/heptfds/cms_pf/qcd_high_pt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/qcd_high_pt.log & wait diff --git a/scripts/validate_cms.sh b/scripts/validate_cms.sh new file mode 100644 index 000000000..ec5e2c88e --- /dev/null +++ b/scripts/validate_cms.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +source /cvmfs/cms.cern.ch/cmsset_default.sh + +env +df -h + +WORKDIR=/scratch/$USER/${SLURM_JOB_ID} +SAMPLE=$1 +SEED=$2 + +mkdir -p $WORKDIR +cd $WORKDIR