Gen jet loss (#126)

* first attempt gen jet loss * update * try python 3.9 * try again * pip install ./hep_tfds * WIP debugging * update hep_tfds for bugfix * disable mask_reg_cls0, revert debugging * compute correct sum_pt in histogram bins * Update cms-gen.yaml * Update delphes.yaml Co-authored-by: Joosep Pata <joosep.pata@gmail.com> Former-commit-id: ac15a9d
jpata · Aug 24, 2022 · ef9ea15 · ef9ea15
1 parent 91688f4
commit ef9ea15
Show file tree

Hide file tree

Showing 10 changed files with 140 additions and 61 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,47 +12,47 @@ jobs:
   delphes-pipeline:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
       - name: Install python deps
         run: |
-          sudo apt install python3 python3-pip wget
-          sudo python3 -m pip install --upgrade pip
-          sudo python3 -m pip install --upgrade setuptools
-          sudo python3 -m pip install tensorflow==2.9 setGPU \
+          pip install tensorflow==2.9 setGPU \
             sklearn matplotlib boost_histogram mplhep pandas scipy uproot \
             awkward vector pyarrow fastjet keras-tuner networkx \
             tensorflow-probability tensorflow-addons \
             tqdm click tensorflow-datasets 'ray[default]'==1.6.0 'ray[tune]==1.6.0' \
             tf-models-official tensorflow-text \
             tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \
             tensorflow-estimator keras \
-            notebook papermill
-          HOROVOD_WITH_TENSORFLOW=1 sudo python3 -m pip install horovod[tensorflow,keras]
-          git submodule init
-          git submodule update
+            notebook papermill ./hep_tfds
+          HOROVOD_WITH_TENSORFLOW=1 pip install horovod[tensorflow,keras]
       - name: Run delphes TF model
         run: ./scripts/local_test_delphes_pipeline.sh
 
   cms-pipeline:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
       - name: Install python deps
         run: |
-          sudo apt install python3 python3-pip wget
-          sudo python3 -m pip install --upgrade pip
-          sudo python3 -m pip install --upgrade setuptools
-          sudo python3 -m pip install tensorflow==2.9 setGPU \
+          pip install tensorflow==2.9 setGPU \
             sklearn matplotlib boost_histogram mplhep pandas scipy uproot \
             awkward vector pyarrow fastjet keras-tuner networkx \
             tensorflow-probability tensorflow-addons \
             tqdm click tensorflow-datasets 'ray[default]'==1.6.0 'ray[tune]==1.6.0' \
             tf-models-official tensorflow-text \
             tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \
             tensorflow-estimator keras \
-            notebook papermill
-          HOROVOD_WITH_TENSORFLOW=1 sudo python3 -m pip install horovod[tensorflow,keras]
-          git submodule init
-          git submodule update
+            notebook papermill ./hep_tfds
+          HOROVOD_WITH_TENSORFLOW=1 pip install horovod[tensorflow,keras]
       - name: Run CMS TF model using the pipeline
         run: ./scripts/local_test_cms_pipeline.sh
diff --git a/hep_tfds b/hep_tfds
diff --git a/mlpf/tfmodel/datasets/BaseDatasetFactory.py b/mlpf/tfmodel/datasets/BaseDatasetFactory.py
@@ -5,15 +5,14 @@
 #Unpacks a flat target array along the feature axis to a feature dict
 #the feature order is defined in the data prep stage (postprocessing2.py)
 def unpack_target(y, num_output_classes, config):
-    from tfmodel.utils import batched_histogram_2d, histogram_2d
     msk_pid = tf.cast(y[..., 0:1]!=0, tf.float32)
 
     pt = y[..., 2:3]*msk_pid
     energy = y[..., 6:7]*msk_pid
     eta = y[..., 3:4]*msk_pid
     sin_phi = y[..., 4:5]*msk_pid
     cos_phi = y[..., 5:6]*msk_pid
-    phi = tf.math.atan2(sin_phi, cos_phi)*msk_pid
+    jet_idx = y[..., 7:8]*msk_pid
 
     ret = {
         "cls": tf.one_hot(tf.cast(y[..., 0], tf.int32), num_output_classes),
@@ -26,7 +25,7 @@ def unpack_target(y, num_output_classes, config):
     }
 
     if config["loss"]["event_loss"] != "none":
-        pt_e_eta_phi = tf.concat([pt, energy, eta, sin_phi, cos_phi], axis=-1)
+        pt_e_eta_phi = tf.concat([pt, energy, eta, sin_phi, cos_phi, jet_idx], axis=-1)
         ret["pt_e_eta_phi"] = pt_e_eta_phi
 
     return ret

diff --git a/mlpf/tfmodel/model.py b/mlpf/tfmodel/model.py
@@ -664,11 +664,11 @@ def call(self, args, training=False):
 
         if self.event_set_output:
             pt_e_eta_phi = tf.concat([
-                pred_pt*msk_input_outtype*msk_output,
-                pred_energy*msk_input_outtype*msk_output,
-                pred_eta*msk_input_outtype*msk_output,
-                pred_sin_phi*msk_input_outtype*msk_output,
-                pred_cos_phi*msk_input_outtype*msk_output
+                pred_pt*msk_input_outtype,
+                pred_energy*msk_input_outtype,
+                pred_eta*msk_input_outtype,
+                pred_sin_phi*msk_input_outtype,
+                pred_cos_phi*msk_input_outtype
                 ], axis=-1)
             ret["pt_e_eta_phi"] = pt_e_eta_phi
 

diff --git a/mlpf/tfmodel/utils.py b/mlpf/tfmodel/utils.py
@@ -19,17 +19,22 @@
 
 
 @tf.function
-def histogram_2d(x, y, weights, x_range, y_range, nbins, bin_dtype=tf.float32):
-    x_bins = tf.histogram_fixed_width_bins(x, x_range, nbins=nbins, dtype=bin_dtype)
-    y_bins = tf.histogram_fixed_width_bins(y, y_range, nbins=nbins, dtype=bin_dtype)
-    hist = tf.zeros((nbins, nbins), dtype=weights.dtype)
-    indices = tf.transpose(tf.stack([y_bins, x_bins]))
-    hist = tf.tensor_scatter_nd_add(hist, indices, weights)
-    return hist
+def histogram_2d(eta, phi, weights_px, weights_py, eta_range, phi_range, nbins, bin_dtype=tf.float32):
+    eta_bins = tf.histogram_fixed_width_bins(eta, eta_range, nbins=nbins, dtype=bin_dtype)
+    phi_bins = tf.histogram_fixed_width_bins(phi, phi_range, nbins=nbins, dtype=bin_dtype)
+
+    hist_px = tf.zeros((nbins, nbins), dtype=weights_px.dtype)
+    hist_py = tf.zeros((nbins, nbins), dtype=weights_py.dtype)
+    indices = tf.transpose(tf.stack([phi_bins, eta_bins]))
+
+    hist_px = tf.tensor_scatter_nd_add(hist_px, indices, weights_px)
+    hist_py = tf.tensor_scatter_nd_add(hist_py, indices, weights_py)
+    hist_pt = tf.sqrt(hist_px**2 + hist_py**2)
+    return hist_pt
 
 @tf.function
-def batched_histogram_2d(x, y, w, x_range, y_range, nbins, bin_dtype=tf.float32):
-    return tf.vectorized_map(lambda a: histogram_2d(a[0], a[1], a[2], x_range, y_range, nbins, bin_dtype), (x,y,w))
+def batched_histogram_2d(eta, phi, w_px, w_py, x_range, y_range, nbins, bin_dtype=tf.float32):
+    return tf.vectorized_map(lambda a: histogram_2d(a[0], a[1], a[2], a[3], x_range, y_range, nbins, bin_dtype), (eta, phi, w_px, w_py))
 
 def load_config(config_file_path):
     with open(config_file_path, "r") as ymlfile:
@@ -511,11 +516,18 @@ def get_loss_from_params(input_dict):
     return loss_cls(**input_dict)
 
 #batched version of https://github.com/VinAIResearch/DSW/blob/master/gsw.py#L19
+@tf.function
 def sliced_wasserstein_loss(y_true, y_pred, num_projections=1000):
 
+    #take everything but the jet_idx
+    y_true = y_true[..., :5]
+    y_pred = y_pred[..., :5]
+
+    #create normalized random basis vectors
     theta = tf.random.normal((num_projections, y_true.shape[-1]))
     theta = theta / tf.sqrt(tf.reduce_sum(theta**2, axis=1, keepdims=True))
 
+    #project the features with the random basis
     A = tf.linalg.matmul(y_true, theta, False, True)
     B = tf.linalg.matmul(y_pred, theta, False, True)
 
@@ -525,29 +537,95 @@ def sliced_wasserstein_loss(y_true, y_pred, num_projections=1000):
     ret = tf.math.sqrt(tf.reduce_sum(tf.math.pow(A_sorted - B_sorted, 2), axis=[-1,-2]))
     return ret
 
-
+@tf.function
 def hist_loss_2d(y_true, y_pred):
 
+    eta_true = y_true[..., 2]
+    eta_pred = y_pred[..., 2]
     phi_true = tf.math.atan2(y_true[..., 3], y_true[..., 4])
     phi_pred = tf.math.atan2(y_pred[..., 3], y_pred[..., 4])
 
+    pt_true = y_true[..., 0]
+    pt_pred = y_pred[..., 0]
+
+    px_true = pt_true*y_true[..., 4]
+    py_true = pt_true*y_true[..., 3]
+    px_pred = pt_pred*y_pred[..., 4]
+    py_pred = pt_pred*y_pred[..., 3]
+
     pt_hist_true = batched_histogram_2d(
-        y_true[..., 2],
+        eta_true,
         phi_true,
-        y_true[..., 0],
+        px_true,
+        py_true,
         tf.cast([-6.0,6.0], tf.float32), tf.cast([-4.0,4.0], tf.float32), 20
     )
 
     pt_hist_pred = batched_histogram_2d(
-        y_pred[..., 2],
+        eta_pred,
         phi_pred,
-        y_pred[..., 0],
+        px_pred,
+        py_pred,
         tf.cast([-6.0,6.0], tf.float32), tf.cast([-4.0,4.0], tf.float32), 20
     )
 
     mse = tf.math.sqrt(tf.reduce_mean((pt_hist_true-pt_hist_pred)**2, axis=[-1,-2]))
     return mse
 
+
+@tf.function
+def jet_reco(px, py, jet_idx, max_jets):
+
+    tf.debugging.assert_shapes([
+        (px, ('N')),
+        (py, ('N')),
+        (jet_idx, ('N')),
+    ])
+
+    jet_idx_capped = tf.where(jet_idx <= max_jets, jet_idx, 0)
+
+    jet_px = tf.zeros([max_jets, ], dtype=px.dtype)
+    jet_py = tf.zeros([max_jets, ], dtype=py.dtype)
+
+    jet_px_new = tf.tensor_scatter_nd_add(jet_px, indices=tf.expand_dims(jet_idx_capped, axis=-1), updates=px)
+    jet_py_new = tf.tensor_scatter_nd_add(jet_py, indices=tf.expand_dims(jet_idx_capped, axis=-1), updates=py)
+
+    jet_pt = tf.math.sqrt(jet_px_new**2 + jet_py_new**2)
+
+    return jet_pt
+
+
+@tf.function
+def batched_jet_reco(px, py, jet_idx, max_jets):
+    tf.debugging.assert_shapes([
+        (px, ('B', 'N')),
+        (py, ('B', 'N')),
+        (jet_idx, ('B', 'N')),
+    ])
+
+    return tf.map_fn(
+        lambda a: jet_reco(a[0], a[1], a[2], max_jets), (px, py, jet_idx),
+        fn_output_signature=tf.TensorSpec([max_jets, ], dtype=tf.float32)
+    )
+
+@tf.function
+def gen_jet_loss(y_true, y_pred):
+    y = {}
+    y["true"] = y_true
+    y["pred"] = y_pred
+    jet_pt = {}
+
+    max_jets = 201
+    jet_idx = tf.cast(y["true"][..., 5], dtype=tf.int32)
+    for typ in ["true", "pred"]:
+        px = y[typ][..., 0]*y[typ][..., 4]
+        py = y[typ][..., 0]*y[typ][..., 3]
+        jet_pt[typ] = batched_jet_reco(px, py, jet_idx, max_jets)
+
+    mse = tf.math.sqrt(tf.reduce_mean((jet_pt['true']-jet_pt['pred'])**2, axis=[-1,-2]))
+    return mse
+
+
 def get_loss_dict(config):
     cls_loss = get_class_loss(config)
 
@@ -579,4 +657,8 @@ def get_loss_dict(config):
         loss_dict["pt_e_eta_phi"] = hist_loss_2d
         loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"]
 
+    if config["loss"]["event_loss"] == "gen_jet":
+        loss_dict["pt_e_eta_phi"] = gen_jet_loss
+        loss_weights["pt_e_eta_phi"] = config["loss"]["event_loss_coef"]
+
     return loss_dict, loss_weights
diff --git a/parameters/cms-gen.yaml b/parameters/cms-gen.yaml
@@ -45,7 +45,7 @@ loss:
   eta_loss:
     type: Huber
     delta: 0.1
-  event_loss: none
+  event_loss: none #none, sliced_wasserstein, gen_jet, hist_2d
   event_loss_coef: 0.0
 
 tensorflow:
@@ -56,7 +56,7 @@ setup:
   weights:
   weights_config:
   lr: 0.00002
-  num_events_validation: 1000
+  num_events_validation: 100
   num_epochs: 100
   dtype: float32
   trainable:
@@ -166,7 +166,7 @@ parameters:
     phi_num_layers: 2
     energy_num_layers: 2
     layernorm: yes
-    mask_reg_cls0: yes
+    mask_reg_cls0: no
     energy_multimodal: yes
 
   skip_connection: yes
@@ -228,24 +228,24 @@ train_test_datasets:
       - cms_pf_ztt
       - cms_pf_qcd
       - cms_pf_qcd_high_pt
-      
+
 validation_datasets:
   - cms_pf_ttbar
 
 datasets: 
   cms_pf_ttbar:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_ztt:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_qcd:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_qcd_high_pt:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/cms.yaml b/parameters/cms.yaml
@@ -218,15 +218,15 @@ validation_datasets:
 
 datasets: 
   cms_pf_ttbar:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_ztt:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_qcd:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   cms_pf_qcd_high_pt:

diff --git a/parameters/delphes.yaml b/parameters/delphes.yaml
@@ -50,7 +50,7 @@ setup:
   lr: 1e-4
   num_events_train: 45000
   num_events_test: 5000
-  num_events_validation: 100
+  num_events_validation: 10
   num_epochs: 10
   num_val_files: 5
   dtype: float32

diff --git a/scripts/local_test_cms_pipeline.sh b/scripts/local_test_cms_pipeline.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 set -e
-export PYTHONPATH=`pwd`/hep_tfds
 export TFDS_DATA_DIR=`pwd`/tensorflow_datasets
 
 rm -Rf local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi
@@ -18,7 +17,7 @@ cd ../../..
 rm -Rf local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw
 mkdir -p local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw
 for file in `\ls -1 local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/root/*.root`; do
-	python3 mlpf/data/postprocessing2.py \
+	python mlpf/data/postprocessing2.py \
 	  --input $file \
 	  --outpath local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw \
 	  --save-normalized-table --num-events 10
@@ -29,15 +28,15 @@ mkdir -p experiments
 tfds build hep_tfds/heptfds/cms_pf/ttbar --manual_dir ./local_test_data
 
 #Run a simple training on a few events
-python3 mlpf/pipeline.py train -c parameters/cms-gen.yaml --nepochs 1 --customize pipeline_test
+python mlpf/pipeline.py train -c parameters/cms-gen.yaml --nepochs 1 --customize pipeline_test
 
 ls ./experiments/cms*/weights/
 
 #Generate the pred.npz file of predictions
-python3 mlpf/pipeline.py evaluate --customize pipeline_test -t ./experiments/cms* -w ./experiments/cms*/weights/weights-01-*.hdf5
+python mlpf/pipeline.py evaluate --customize pipeline_test -t ./experiments/cms* -w ./experiments/cms*/weights/weights-01-*.hdf5
 
 #Evaluate the notebook
 papermill --inject-output-path --log-output -p path ./experiments/cms*/evaluation/epoch_1/cms_pf_ttbar/ notebooks/cms-mlpf.ipynb ./out.ipynb
 
 #Retrain from existing weights
-python3 mlpf/pipeline.py train -c parameters/cms-gen.yaml --nepochs 1 --customize pipeline_test -w ./experiments/cms*/weights/weights-01-*.hdf5
+python mlpf/pipeline.py train -c parameters/cms-gen.yaml --nepochs 1 --customize pipeline_test -w ./experiments/cms*/weights/weights-01-*.hdf5