diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 79b31baaa..ccea816fa 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -43,6 +43,18 @@ jobs:
       - run: pip install -r requirements.txt
       - run: ./scripts/local_test_clic_pipeline.sh
 
+  tf-clic-hits-pipeline:
+    runs-on: ubuntu-20.04
+    needs: [deps]
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.8.10'
+          cache: 'pip'
+      - run: pip install -r requirements.txt
+      - run: ./scripts/local_test_clic_hits_pipeline.sh
+
   tf-delphes-pipeline:
     runs-on: ubuntu-20.04
     needs: [deps]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2cd994fce..420e9b7de 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 default_language_version:
     python: python3
-exclude: ^(delphes/tev14_pythia8_*.)|^(images/)|^(clic/dumper_hepsim.py)|^(mlpf/pyg/__init__.py)|^(fcc/.*)
+exclude: ^(delphes/tev14_pythia8_*.)|^(images/)|^(clic/dumper_hepsim.py)|^(mlpf/pyg/__init__.py)|^(fcc/clicRec_e4h_input.py)|^(fcc/clic_steer.py)|^(fcc/pythia.py)|^(fcc/PandoraSettings)
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/fcc/check_files.py b/fcc/check_files.py
index 51fa96d94..91f4e4893 100644
--- a/fcc/check_files.py
+++ b/fcc/check_files.py
@@ -1,28 +1,51 @@
 import os
 
-#check for file presence in this path
-outpath = "/local/joosep/clic_edm4hep_2023_02_27/"
+# check for file presence in this path
+outpath = "/local/joosep/clic_edm4hep_2023_05_05/"
 
-#pythia card, start seed, end seed
+# pythia card, start seed, end seed
 samples = [
-    ("p8_ee_tt_ecm380",              1,  10011),
-    ("p8_ee_qq_ecm380",         100001, 120011),
+    ("p8_ee_tt_ecm380", 1, 10011),
+    ("p8_ee_qq_ecm380", 100001, 120011),
     ("p8_ee_ZH_Htautau_ecm380", 200001, 210011),
     ("p8_ee_WW_fullhad_ecm380", 300001, 310011),
 ]
 
 samples_pu = [
-    ("p8_ee_tt_ecm380",              1,  10001),
+    ("p8_ee_tt_ecm380", 1, 10001),
+]
+
+samples_gun = [
+    ("neutron", 1, 101),
+    ("kaon0L", 1, 101),
+    ("pi-", 1, 101),
+    ("pi+", 1, 101),
+    ("pi0", 1, 101),
+    ("mu-", 1, 101),
+    ("mu+", 1, 101),
+    ("e-", 1, 101),
+    ("e+", 1, 101),
+    ("gamma", 1, 101),
 ]
 
 if __name__ == "__main__":
-    #for sname, seed0, seed1 in samples:
+    # basic samples
+    # for sname, seed0, seed1 in samples:
     #    for seed in range(seed0, seed1):
     #        #check if output file exists, and print out batch submission if it doesn't
     #        if not os.path.isfile("{}/{}/reco_{}_{}.root".format(outpath, sname, sname, seed)):
-    #            print("sbatch run_sim.sh {} {}".format(seed, sname)) 
-    for sname, seed0, seed1 in samples_pu:
+    #            print("sbatch run_sim.sh {} {}".format(seed, sname))
+
+    # PU
+    # for sname, seed0, seed1 in samples_pu:
+    #    for seed in range(seed0, seed1):
+    #        # check if output file exists, and print out batch submission if it doesn't
+    #        if not os.path.isfile("{}/{}_PU10/reco_{}_{}.root".format(outpath, sname, sname, seed)):
+    #            print("sbatch run_sim_pu.sh {} {} p8_ee_gg_ecm380".format(seed, sname))
+
+    # gun
+    for sname, seed0, seed1 in samples_gun:
         for seed in range(seed0, seed1):
-            #check if output file exists, and print out batch submission if it doesn't
-            if not os.path.isfile("{}/{}_PU10/reco_{}_{}.root".format(outpath, sname, sname, seed)):
-                print("sbatch run_sim_pu.sh {} {} p8_ee_gg_ecm380".format(seed, sname)) 
+            # check if output file exists, and print out batch submission if it doesn't
+            if not os.path.isfile("{}/{}/reco_{}_{}.root".format(outpath, sname, sname, seed)):
+                print("sbatch run_sim_gun.sh {} {}".format(seed, sname))
diff --git a/fcc/clicRec_e4h_input.py b/fcc/clicRec_e4h_input.py
index 7e35ee274..d4e094cf4 100644
--- a/fcc/clicRec_e4h_input.py
+++ b/fcc/clicRec_e4h_input.py
@@ -2360,4 +2360,23 @@
 
 from Configurables import ApplicationMgr
 
-ApplicationMgr(TopAlg=algList, EvtSel="NONE", EvtMax=3, ExtSvc=[evtsvc], OutputLevel=WARNING)
+SequencerTimerTool().OutputLevel = INFO
+TIMER = TimingAuditor("TIMER")
+TIMER.addTool(SequencerTimerTool, name="TIMER")
+TIMER.TIMER.HistoProduce = True
+TIMER.TIMER.OutputLevel = INFO
+
+toolsvc = ToolSvc()
+auditorsvc = AuditorSvc()
+auditorsvc.Auditors += [TIMER]
+RootHistSvc().OutputFile = "timing_histos.root"
+ 
+ApplicationMgr(
+    TopAlg=algList,
+    EvtSel="NONE",
+    EvtMax=3,
+    ExtSvc=[evtsvc, toolsvc, auditorsvc],
+    OutputLevel=WARNING,
+    AuditAlgorithms=True,
+    HistogramPersistency = "ROOT")
+
diff --git a/fcc/clic_steer.py b/fcc/clic_steer.py
index c2b584ef3..1e98b2ccd 100644
--- a/fcc/clic_steer.py
+++ b/fcc/clic_steer.py
@@ -139,7 +139,7 @@
 ##     Setting a distribution will set isotrop = True
 ##     
 SIM.gun.distribution = None
-SIM.gun.energy = 10000.0
+SIM.gun.energy = None
 
 ##  isotropic distribution for the particle gun
 ## 
diff --git a/fcc/main19.cc b/fcc/main19.cc
index ae792b5f3..9b3dd2f87 100644
--- a/fcc/main19.cc
+++ b/fcc/main19.cc
@@ -54,15 +54,15 @@ int main(int argc, char *argv[]) {
     std::cerr << "./main SEED NPU" << std::endl;
     return 1;
   }
-  
+
   std::string seedStr = std::string("Random:seed = ").append(std::string(argv[1]));
 
   // Average number of pileup events per signal event.
   double nPileupAvg = atoi(argv[2]);
-  
+
   // Shift each PU event by this time delta in time to mimic ee overlay
   double timeDelta = 0.5;
-  
+
   Pythia8ToHepMC ToHepMC;
   ToHepMC.setNewFile("pythia.hepmc");
 
@@ -84,18 +84,18 @@ int main(int argc, char *argv[]) {
 
     // Select the number of pileup events to generate.
     int nPileup = poisson(nPileupAvg, pythiaPileup.rndm);
-    
+
     // create a random index permutation from [0, nPileup)
     std::vector<int> puVectorInds;
     for (int npu=0; npu<nPileup; npu++) {
-      puVectorInds.push_back(npu); 
+      puVectorInds.push_back(npu);
     }
     pythiaPileup.rndm.shuffle(puVectorInds);
 
     // Generate a number of pileup events. Add them to sumEvent.
     for (int iPileup = 0; iPileup < nPileup; ++iPileup) {
-     
-      //generate a signal event if the permutation value is 0, otherwise generate a pileup event 
+
+      //generate a signal event if the permutation value is 0, otherwise generate a pileup event
       auto& pythiaSigOrPU = (puVectorInds[iPileup] == 0) ? pythiaSignal : pythiaPileup;
       pythiaSigOrPU.next();
 
diff --git a/fcc/p8_ee_ZZ_fullhad_ecm365.cmd b/fcc/p8_ee_ZZ_fullhad_ecm365.cmd
index 95c68d4bf..04acd24ad 100644
--- a/fcc/p8_ee_ZZ_fullhad_ecm365.cmd
+++ b/fcc/p8_ee_ZZ_fullhad_ecm365.cmd
@@ -22,4 +22,3 @@ WeakDoubleBoson:ffbar2gmZgmZ = on
 ! 4) Settings for the event generation process in the Pythia8 library.
 PartonLevel:ISR = on               ! no initial-state radiation
 PartonLevel:FSR = on               ! no final-state radiation
-
diff --git a/fcc/p8_ee_qcd_ecm380.cmd b/fcc/p8_ee_qcd_ecm380.cmd
index 35c64fae1..6841da66e 100644
--- a/fcc/p8_ee_qcd_ecm380.cmd
+++ b/fcc/p8_ee_qcd_ecm380.cmd
@@ -16,7 +16,7 @@ Stat:showPartonLevel = off
 Beams:idA = 11                   ! first beam, e- = 11
 Beams:idB = -11                  ! second beam, e+ = -11
 
-! s-channel gamma/Z 
+! s-channel gamma/Z
 Beams:eCM = 380               ! CM energy of collision
 HardQCD:all = on
 WeakSingleBoson:ffbar2ffbar(s:gmZ) = on
diff --git a/fcc/p8_ee_qq_ecm365.cmd b/fcc/p8_ee_qq_ecm365.cmd
index c88b80ec4..b5886eec0 100644
--- a/fcc/p8_ee_qq_ecm365.cmd
+++ b/fcc/p8_ee_qq_ecm365.cmd
@@ -17,7 +17,7 @@ Beams:eCM = 365                    ! CM energy of collision
 PartonLevel:ISR = on
 PartonLevel:FSR = on
 
-! s-channel gamma/Z 
+! s-channel gamma/Z
 WeakSingleBoson:ffbar2ffbar(s:gmZ) = on
 23:onMode   = off
 23:onIfAny = 1 2 3 4 5 6
diff --git a/fcc/p8_ee_qq_ecm380.cmd b/fcc/p8_ee_qq_ecm380.cmd
index dc845410d..1de7559bd 100644
--- a/fcc/p8_ee_qq_ecm380.cmd
+++ b/fcc/p8_ee_qq_ecm380.cmd
@@ -17,7 +17,7 @@ Beams:eCM = 380                    ! CM energy of collision
 PartonLevel:ISR = on
 PartonLevel:FSR = on
 
-! s-channel gamma/Z 
+! s-channel gamma/Z
 WeakSingleBoson:ffbar2ffbar(s:gmZ) = on
 23:onMode   = off
 23:onIfAny = 1 2 3 4 5
diff --git a/fcc/postprocessing.py b/fcc/postprocessing.py
index 576f0022e..763222683 100644
--- a/fcc/postprocessing.py
+++ b/fcc/postprocessing.py
@@ -1,13 +1,8 @@
-import bz2
 import numpy as np
 import awkward
-import matplotlib.pyplot as plt
 import uproot
 import vector
 import glob
-import networkx as nx
-import tqdm
-import numba
 import os
 import sys
 import multiprocessing
@@ -16,23 +11,63 @@
 track_coll = "SiTracks_Refitted"
 mc_coll = "MCParticles"
 
-#the feature matrices will be saved in this order
+# the feature matrices will be saved in this order
 particle_feature_order = ["PDG", "charge", "pt", "eta", "sin_phi", "cos_phi", "energy"]
 
-#arrange track and cluster features such that pt (et), eta, phi, p (energy) are in the same spot
-#so we can easily use them in skip connections
+# arrange track and cluster features such that pt (et), eta, phi, p (energy) are in the same spot
+# so we can easily use them in skip connections
 track_feature_order = [
-    "type", "pt", "eta", "sin_phi", "cos_phi", "p",
-    "chi2", "ndf", "dEdx", "dEdxError",
-    "radiusOfInnermostHit", "tanLambda", "D0", "omega",
-    "Z0", "time"
+    "elemtype",
+    "pt",
+    "eta",
+    "sin_phi",
+    "cos_phi",
+    "p",
+    "chi2",
+    "ndf",
+    "dEdx",
+    "dEdxError",
+    "radiusOfInnermostHit",
+    "tanLambda",
+    "D0",
+    "omega",
+    "Z0",
+    "time",
 ]
 cluster_feature_order = [
-    "type", "et", "eta", "sin_phi", "cos_phi", "energy",
-    "position.x", "position.y", "position.z", "iTheta",
-    "energy_ecal", "energy_hcal", "energy_other", "num_hits",
-    "sigma_x", "sigma_y", "sigma_z"
+    "elemtype",
+    "et",
+    "eta",
+    "sin_phi",
+    "cos_phi",
+    "energy",
+    "position.x",
+    "position.y",
+    "position.z",
+    "iTheta",
+    "energy_ecal",
+    "energy_hcal",
+    "energy_other",
+    "num_hits",
+    "sigma_x",
+    "sigma_y",
+    "sigma_z",
 ]
+hit_feature_order = [
+    "elemtype",
+    "et",
+    "eta",
+    "sin_phi",
+    "cos_phi",
+    "energy",
+    "position.x",
+    "position.y",
+    "position.z",
+    "time",
+    "subdetector",
+    "type",
+]
+
 
 def track_pt(omega):
     a = 3 * 10**-4
@@ -40,11 +75,12 @@ def track_pt(omega):
 
     return a * np.abs(b / omega)
 
+
 def map_pdgid_to_candid(pdgid, charge):
     if pdgid == 0:
         return 0
 
-    #photon, electron, muon
+    # photon, electron, muon
     if pdgid in [22, 11, 13]:
         return pdgid
 
@@ -55,6 +91,7 @@ def map_pdgid_to_candid(pdgid, charge):
     # neutral hadron
     return 130
 
+
 def map_charged_to_neutral(pdg):
     if pdg == 0:
         return 0
@@ -62,17 +99,21 @@ def map_charged_to_neutral(pdg):
         return 22
     return 130
 
+
 def map_neutral_to_charged(pdg):
     if pdg == 130 or pdg == 22:
         return 211
     return pdg
 
+
 def sanitize(arr):
-    arr[np.isnan(arr)] = 0.0 
-    arr[np.isinf(arr)] = 0.0 
+    arr[np.isnan(arr)] = 0.0
+    arr[np.isinf(arr)] = 0.0
+
 
 class EventData:
-    def __init__(self,
+    def __init__(
+        self,
         gen_features,
         hit_features,
         cluster_features,
@@ -80,53 +121,41 @@ def __init__(self,
         genparticle_to_hit,
         genparticle_to_track,
         hit_to_cluster,
-        ):
-        self.gen_features = gen_features 
-        self.hit_features = hit_features 
-        self.cluster_features = cluster_features 
-        self.track_features = track_features 
-        self.genparticle_to_hit = genparticle_to_hit 
-        self.genparticle_to_track = genparticle_to_track 
-        self.hit_to_cluster = hit_to_cluster 
-
-def get_cluster_subdet_energies(hit_list, hit_data, collectionIDs_reverse, iev):
-    """
-    This function calculates the energy contribution from each of four subdetectors in a particle physics experiment, based on a list of hits and their corresponding data.
-
-    Args:
-    hit_list: a list of tuples, where each tuple contains a collection ID and a hit index
-    hit_data: a dictionary containing data for each hit in the experiment, organized by collection
-    collectionIDs_reverse: a dictionary mapping collection IDs to collection names
-    iev: the event number for the current event
-
-    Returns:
-    A tuple containing the energy contributions from each of the four subdetectors:
-    (ecal_energy, hcal_energy, muon_energy, other_energy)
-    """
-
-    ecal_energy = 0.0
-    hcal_energy = 0.0
-    muon_energy = 0.0
-    other_energy = 0.0
-
-    for coll_id, hit_idx in hit_list:
-        coll = collectionIDs_reverse[coll_id]
-        hit_energy = hit_data[coll][iev][coll+".energy"][hit_idx]
-
-        if coll.startswith("ECAL"):
-            ecal_energy += hit_energy
-        elif coll.startswith("HCAL"):
-            hcal_energy += hit_energy
-        elif coll == "MUON":
-            muon_energy += hit_energy
-        else:
-            other_energy += hit_energy
+        gp_merges,
+    ):
+        self.gen_features = gen_features  # feature matrix of the genparticles
+        self.hit_features = hit_features  # feature matrix of the calo hits
+        self.cluster_features = cluster_features  # feature matrix of the calo clusters
+        self.track_features = track_features  # feature matrix of the tracks
+        self.genparticle_to_hit = genparticle_to_hit  # sparse COO matrix of genparticles to hits (idx_gp, idx_hit, weight)
+        self.genparticle_to_track = (
+            genparticle_to_track  # sparse COO matrix of genparticles to tracks (idx_gp, idx_track, weight)
+        )
+        self.hit_to_cluster = hit_to_cluster  # sparse COO matrix of hits to clusters (idx_hit, idx_cluster, weight)
+        self.gp_merges = gp_merges  # sparse COO matrix of any merged genparticles
+
+        self.genparticle_to_hit = (
+            np.array(self.genparticle_to_hit[0]),
+            np.array(self.genparticle_to_hit[1]),
+            np.array(self.genparticle_to_hit[2]),
+        )
+        self.genparticle_to_track = (
+            np.array(self.genparticle_to_track[0]),
+            np.array(self.genparticle_to_track[1]),
+            np.array(self.genparticle_to_track[2]),
+        )
+        self.hit_to_cluster = (
+            np.array(self.hit_to_cluster[0]),
+            np.array(self.hit_to_cluster[1]),
+            np.array(self.hit_to_cluster[2]),
+        )
+        self.gp_merges = np.array(self.gp_merges[0]), np.array(self.gp_merges[1])
 
-    return ecal_energy, hcal_energy, muon_energy, other_energy
 
 def hits_to_features(hit_data, iev, coll, feats):
     feat_arr = {f: hit_data[coll + "." + f][iev] for f in feats}
 
+    # set the subdetector type
     sdcoll = "subdetector"
     feat_arr[sdcoll] = np.zeros(len(feat_arr["type"]), dtype=np.int32)
     if coll.startswith("ECAL"):
@@ -135,11 +164,26 @@ def hits_to_features(hit_data, iev, coll, feats):
         feat_arr[sdcoll][:] = 1
     else:
         feat_arr[sdcoll][:] = 2
+
+    # hit elemtype is always 2
+    feat_arr["elemtype"] = 2 * np.ones(len(feat_arr["type"]), dtype=np.int32)
+
+    # precompute some approximate et, eta, phi
+    pos_mag = np.sqrt(feat_arr["position.x"] ** 2 + feat_arr["position.y"] ** 2 + feat_arr["position.z"] ** 2)
+    px = (feat_arr["position.x"] / pos_mag) * feat_arr["energy"]
+    py = (feat_arr["position.y"] / pos_mag) * feat_arr["energy"]
+    pz = (feat_arr["position.z"] / pos_mag) * feat_arr["energy"]
+    feat_arr["et"] = np.sqrt(px**2 + py**2)
+    feat_arr["eta"] = 0.5 * np.log((feat_arr["energy"] + pz) / (feat_arr["energy"] - pz))
+    feat_arr["sin_phi"] = py / feat_arr["energy"]
+    feat_arr["cos_phi"] = px / feat_arr["energy"]
+
     return awkward.Record(feat_arr)
 
+
 def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs):
     feats = ["type", "cellID", "energy", "energyError", "time", "position.x", "position.y", "position.z"]
-    
+
     hit_idx_global = 0
     hit_idx_global_to_local = {}
     hit_feature_matrix = []
@@ -147,14 +191,18 @@ def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs):
         icol = collectionIDs[col]
         hit_features = hits_to_features(hit_data[col], iev, col, feats)
         hit_feature_matrix.append(hit_features)
-        for ihit in range(len(hit_data[col][col+".energy"][iev])):
+        for ihit in range(len(hit_data[col][col + ".energy"][iev])):
             hit_idx_global_to_local[hit_idx_global] = (icol, ihit)
             hit_idx_global += 1
     hit_idx_local_to_global = {v: k for k, v in hit_idx_global_to_local.items()}
-    hit_feature_matrix = awkward.Record({
-        k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) for k in hit_feature_matrix[0].fields})
+    hit_feature_matrix = awkward.Record(
+        {
+            k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))])
+            for k in hit_feature_matrix[0].fields
+        }
+    )
 
-    #add all edges from genparticle to calohit
+    # add all edges from genparticle to calohit
     calohit_to_gen_weight = calohit_links["CalohitMCTruthLink"]["CalohitMCTruthLink.weight"][iev]
     calohit_to_gen_calo_colid = calohit_links["CalohitMCTruthLink#0"]["CalohitMCTruthLink#0.collectionID"][iev]
     calohit_to_gen_gen_colid = calohit_links["CalohitMCTruthLink#1"]["CalohitMCTruthLink#1.collectionID"][iev]
@@ -163,12 +211,23 @@ def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs):
     genparticle_to_hit_matrix_coo0 = []
     genparticle_to_hit_matrix_coo1 = []
     genparticle_to_hit_matrix_w = []
-    for calo_colid, calo_idx, gen_colid, gen_idx, w in zip(calohit_to_gen_calo_colid, calohit_to_gen_calo_idx, calohit_to_gen_gen_colid, calohit_to_gen_gen_idx, calohit_to_gen_weight):
+    for calo_colid, calo_idx, gen_colid, gen_idx, w in zip(
+        calohit_to_gen_calo_colid,
+        calohit_to_gen_calo_idx,
+        calohit_to_gen_gen_colid,
+        calohit_to_gen_gen_idx,
+        calohit_to_gen_weight,
+    ):
         genparticle_to_hit_matrix_coo0.append(gen_idx)
         genparticle_to_hit_matrix_coo1.append(hit_idx_local_to_global[(calo_colid, calo_idx)])
         genparticle_to_hit_matrix_w.append(w)
 
-    return hit_feature_matrix, (genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w), hit_idx_local_to_global
+    return (
+        hit_feature_matrix,
+        (genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w),
+        hit_idx_local_to_global,
+    )
+
 
 def hit_cluster_adj(prop_data, hit_idx_local_to_global, iev):
     coll_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.collectionID"][iev]
@@ -176,53 +235,61 @@ def hit_cluster_adj(prop_data, hit_idx_local_to_global, iev):
     hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev]
     hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev]
 
-    #index in the array of all hits
+    # index in the array of all hits
     hit_to_cluster_matrix_coo0 = []
-    #index in the cluster array
+    # index in the cluster array
     hit_to_cluster_matrix_coo1 = []
 
-    #weight
+    # weight
     hit_to_cluster_matrix_w = []
 
-    #loop over all clusters
+    # loop over all clusters
     for icluster in range(len(hits_begin)):
 
-        #get the slice in the hit array corresponding to this cluster
+        # get the slice in the hit array corresponding to this cluster
         hbeg = hits_begin[icluster]
         hend = hits_end[icluster]
         idx_range = idx_arr[hbeg:hend]
         coll_range = coll_arr[hbeg:hend]
 
-        #add edges from hit to cluster
+        # add edges from hit to cluster
         for icol, idx in zip(coll_range, idx_range):
             hit_to_cluster_matrix_coo0.append(hit_idx_local_to_global[(icol, idx)])
             hit_to_cluster_matrix_coo1.append(icluster)
             hit_to_cluster_matrix_w.append(1.0)
     return hit_to_cluster_matrix_coo0, hit_to_cluster_matrix_coo1, hit_to_cluster_matrix_w
 
+
 def gen_to_features(prop_data, iev):
     gen_arr = prop_data[mc_coll][iev]
-    gen_arr = {k.replace(mc_coll+".", ""): gen_arr[k] for k in gen_arr.fields}
+    gen_arr = {k.replace(mc_coll + ".", ""): gen_arr[k] for k in gen_arr.fields}
 
-    MCParticles_p4 = vector.awk(awkward.zip({
-        "mass": gen_arr["mass"],
-        "x": gen_arr["momentum.x"],
-        "y": gen_arr["momentum.y"],
-        "z": gen_arr["momentum.z"]}))
+    MCParticles_p4 = vector.awk(
+        awkward.zip(
+            {"mass": gen_arr["mass"], "x": gen_arr["momentum.x"], "y": gen_arr["momentum.y"], "z": gen_arr["momentum.z"]}
+        )
+    )
     gen_arr["pt"] = MCParticles_p4.pt
     gen_arr["eta"] = MCParticles_p4.eta
     gen_arr["phi"] = MCParticles_p4.phi
     gen_arr["energy"] = MCParticles_p4.energy
+    gen_arr["sin_phi"] = np.sin(gen_arr["phi"])
+    gen_arr["cos_phi"] = np.cos(gen_arr["phi"])
+
+    return awkward.Record(
+        {
+            "PDG": gen_arr["PDG"],
+            "generatorStatus": gen_arr["generatorStatus"],
+            "charge": gen_arr["charge"],
+            "pt": gen_arr["pt"],
+            "eta": gen_arr["eta"],
+            "phi": gen_arr["phi"],
+            "sin_phi": gen_arr["sin_phi"],
+            "cos_phi": gen_arr["cos_phi"],
+            "energy": gen_arr["energy"],
+        }
+    )
 
-    return awkward.Record({
-        "PDG": gen_arr["PDG"],
-        "generatorStatus": gen_arr["generatorStatus"],
-        "charge": gen_arr["charge"],
-        "pt": gen_arr["pt"],
-        "eta": gen_arr["eta"],
-        "phi": gen_arr["phi"],
-        "energy": gen_arr["energy"],
-        })
 
 def genparticle_track_adj(sitrack_links, iev):
     trk_to_gen_trkidx = sitrack_links["SiTracksMCTruthLink#0"]["SiTracksMCTruthLink#0.index"][iev]
@@ -232,9 +299,10 @@ def genparticle_track_adj(sitrack_links, iev):
     genparticle_to_track_matrix_coo0 = awkward.to_numpy(trk_to_gen_genidx)
     genparticle_to_track_matrix_coo1 = awkward.to_numpy(trk_to_gen_trkidx)
     genparticle_to_track_matrix_w = awkward.to_numpy(trk_to_gen_w)
-    
+
     return genparticle_to_track_matrix_coo0, genparticle_to_track_matrix_coo1, genparticle_to_track_matrix_w
 
+
 def cluster_to_features(prop_data, hit_features, hit_to_cluster, iev):
     cluster_arr = prop_data["PandoraClusters"][iev]
     feats = ["type", "position.x", "position.y", "position.z", "iTheta", "phi", "energy"]
@@ -265,9 +333,9 @@ def cluster_to_features(prop_data, hit_features, hit_to_cluster, iev):
         hits_posy = hit_features["position.y"][hits]
         hits_posz = hit_features["position.z"][hits]
 
-        energy_ecal = np.sum(hits_energy[subdets==0])
-        energy_hcal = np.sum(hits_energy[subdets==1])
-        energy_other = np.sum(hits_energy[subdets==2])
+        energy_ecal = np.sum(hits_energy[subdets == 0])
+        energy_hcal = np.sum(hits_energy[subdets == 1])
+        energy_other = np.sum(hits_energy[subdets == 2])
 
         cl_energy_ecal.append(energy_ecal)
         cl_energy_hcal.append(energy_hcal)
@@ -286,33 +354,32 @@ def cluster_to_features(prop_data, hit_features, hit_to_cluster, iev):
     ret["sigma_z"] = np.array(cl_sigma_z)
 
     tt = np.tan(ret["iTheta"] / 2.0)
-    eta = awkward.to_numpy(-np.log(tt, where=tt>0))
-    eta[tt<=0] = 0.0
+    eta = awkward.to_numpy(-np.log(tt, where=tt > 0))
+    eta[tt <= 0] = 0.0
     ret["eta"] = eta
 
     costheta = np.cos(ret["iTheta"])
-    ez = ret["energy"]*costheta
-    ret["et"]  = np.sqrt(ret["energy"]**2 - ez**2)
+    ez = ret["energy"] * costheta
+    ret["et"] = np.sqrt(ret["energy"] ** 2 - ez**2)
+
+    # cluster is always type 2
+    ret["elemtype"] = 2 * np.ones(n_cl, dtype=np.float32)
 
-    #override cluster type with 1
-    ret["type"] = 2*np.ones(n_cl, dtype=np.float32)
-    
     ret["sin_phi"] = np.sin(ret["phi"])
     ret["cos_phi"] = np.cos(ret["phi"])
 
     return awkward.Record(ret)
 
+
 def track_to_features(prop_data, iev):
     track_arr = prop_data[track_coll][iev]
     feats_from_track = ["type", "chi2", "ndf", "dEdx", "dEdxError", "radiusOfInnermostHit"]
     ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track}
     n_tr = len(ret["type"])
 
-    #FIXME: add additional track features from track state
-
-    #get the index of the first track state
+    # get the index of the first track state
     trackstate_idx = prop_data[track_coll][track_coll + ".trackStates_begin"][iev]
-    #get the properties of the track at the first track state (at the origin)
+    # get the properties of the track at the first track state (at the origin)
     for k in ["tanLambda", "D0", "phi", "omega", "Z0", "time"]:
         ret[k] = prop_data["SiTracks_1"]["SiTracks_1." + k][iev][trackstate_idx]
 
@@ -320,22 +387,23 @@ def track_to_features(prop_data, iev):
     ret["px"] = np.cos(ret["phi"]) * ret["pt"]
     ret["py"] = np.sin(ret["phi"]) * ret["pt"]
     ret["pz"] = ret["tanLambda"] * ret["pt"]
-    ret["p"] = np.sqrt(ret["px"]**2 + ret["py"]**2 + ret["pz"]**2)
-    cos_theta = np.divide(ret["pz"], ret["p"], where=ret["p"]>0)
+    ret["p"] = np.sqrt(ret["px"] ** 2 + ret["py"] ** 2 + ret["pz"] ** 2)
+    cos_theta = np.divide(ret["pz"], ret["p"], where=ret["p"] > 0)
     theta = np.arccos(cos_theta)
     tt = np.tan(theta / 2.0)
-    eta = awkward.to_numpy(-np.log(tt, where=tt>0))
-    eta[tt<=0] = 0.0
+    eta = awkward.to_numpy(-np.log(tt, where=tt > 0))
+    eta[tt <= 0] = 0.0
     ret["eta"] = eta
 
     ret["sin_phi"] = np.sin(ret["phi"])
     ret["cos_phi"] = np.cos(ret["phi"])
 
-    #override track type with 1
-    ret["type"] = 1*np.ones(n_tr, dtype=np.float32)
+    # track is always type 1
+    ret["elemtype"] = 1 * np.ones(n_tr, dtype=np.float32)
 
     return awkward.Record(ret)
 
+
 def filter_adj(adj, all_to_filtered):
     i0s_new = []
     i1s_new = []
@@ -348,9 +416,12 @@ def filter_adj(adj, all_to_filtered):
             ws_new.append(w)
     return np.array(i0s_new), np.array(i1s_new), np.array(ws_new)
 
+
 def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs):
     gen_features = gen_to_features(prop_data, iev)
-    hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs)
+    hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(
+        hit_data, calohit_links, iev, collectionIDs
+    )
     hit_to_cluster = hit_cluster_adj(prop_data, hit_idx_local_to_global, iev)
     cluster_features = cluster_to_features(prop_data, hit_features, hit_to_cluster, iev)
     track_features = track_to_features(prop_data, iev)
@@ -361,49 +432,33 @@ def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack
     n_hit = awkward.count(hit_features["type"])
     n_cluster = awkward.count(cluster_features["type"])
 
-    if len(genparticle_to_track[0])>0:
-        gp_to_track = coo_matrix(
-            (genparticle_to_track[2],
-            (genparticle_to_track[0], genparticle_to_track[1])),
-            shape=(n_gp, n_track)
-        ).max(axis=1).todense()
+    if len(genparticle_to_track[0]) > 0:
+        gp_to_track = (
+            coo_matrix((genparticle_to_track[2], (genparticle_to_track[0], genparticle_to_track[1])), shape=(n_gp, n_track))
+            .max(axis=1)
+            .todense()
+        )
     else:
         gp_to_track = np.zeros((n_gp, 1))
 
-    gp_to_calohit = coo_matrix(
-        (genparticle_to_hit[2],
-        (genparticle_to_hit[0], genparticle_to_hit[1])),
-        shape=(n_gp, n_hit)
-    )
-    calohit_to_cluster = coo_matrix(
-        (hit_to_cluster[2],
-        (hit_to_cluster[0], hit_to_cluster[1])),
-        shape=(n_hit, n_cluster)
-    )
-    gp_to_cluster = (gp_to_calohit*calohit_to_cluster).sum(axis=1)
+    gp_to_calohit = coo_matrix((genparticle_to_hit[2], (genparticle_to_hit[0], genparticle_to_hit[1])), shape=(n_gp, n_hit))
+    calohit_to_cluster = coo_matrix((hit_to_cluster[2], (hit_to_cluster[0], hit_to_cluster[1])), shape=(n_hit, n_cluster))
+    gp_to_cluster = (gp_to_calohit * calohit_to_cluster).sum(axis=1)
 
-    #60% of the hits of a track must come from the genparticle
-    gp_in_tracker = np.array(gp_to_track>=0.6)[:, 0]
+    # 60% of the hits of a track must come from the genparticle
+    gp_in_tracker = np.array(gp_to_track >= 0.6)[:, 0]
 
-    #at least 10% of the energy of the genparticle should be matched to a calorimeter cluster
-    gp_in_calo = (np.array(gp_to_cluster)[:, 0]/gen_features["energy"])>0.1
+    # at least 10% of the energy of the genparticle should be matched to a calorimeter cluster
+    gp_in_calo = (np.array(gp_to_cluster)[:, 0] / gen_features["energy"]) > 0.1
 
     gp_interacted_with_detector = gp_in_tracker | gp_in_calo
 
-    mask_visible = (
-        (gen_features["generatorStatus"]==1) & 
-        (gen_features["PDG"]!=12) & 
-        (gen_features["PDG"]!=14) & 
-        (gen_features["PDG"]!=16) & 
-        (gen_features["energy"]>0.01) &
-        gp_interacted_with_detector
-    )
+    mask_visible = (gen_features["energy"] > 0.01) & gp_interacted_with_detector
+    print("gps total={} visible={}".format(n_gp, np.sum(mask_visible)))
     idx_all_masked = np.where(mask_visible)[0]
     genpart_idx_all_to_filtered = {idx_all: idx_filtered for idx_filtered, idx_all in enumerate(idx_all_masked)}
 
-    gen_features = awkward.Record({
-        feat: gen_features[feat][mask_visible] for feat in gen_features.fields
-    })
+    gen_features = awkward.Record({feat: gen_features[feat][mask_visible] for feat in gen_features.fields})
 
     genparticle_to_hit = filter_adj(genparticle_to_hit, genpart_idx_all_to_filtered)
     genparticle_to_track = filter_adj(genparticle_to_track, genpart_idx_all_to_filtered)
@@ -415,9 +470,11 @@ def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack
         track_features,
         genparticle_to_hit,
         genparticle_to_track,
-        hit_to_cluster
+        hit_to_cluster,
+        ([], []),
     )
 
+
 def assign_genparticles_to_obj_and_merge(gpdata):
 
     n_gp = awkward.count(gpdata.gen_features["PDG"])
@@ -425,45 +482,42 @@ def assign_genparticles_to_obj_and_merge(gpdata):
     n_hit = awkward.count(gpdata.hit_features["type"])
     n_cluster = awkward.count(gpdata.cluster_features["type"])
 
-    gp_to_track = np.array(coo_matrix(
-        (gpdata.genparticle_to_track[2],
-        (gpdata.genparticle_to_track[0], gpdata.genparticle_to_track[1])),
-        shape=(n_gp, n_track)
-    ).todense())
+    gp_to_track = np.array(
+        coo_matrix(
+            (gpdata.genparticle_to_track[2], (gpdata.genparticle_to_track[0], gpdata.genparticle_to_track[1])),
+            shape=(n_gp, n_track),
+        ).todense()
+    )
 
     gp_to_calohit = coo_matrix(
-        (gpdata.genparticle_to_hit[2],
-        (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])),
-        shape=(n_gp, n_hit)
+        (gpdata.genparticle_to_hit[2], (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])), shape=(n_gp, n_hit)
     )
     calohit_to_cluster = coo_matrix(
-        (gpdata.hit_to_cluster[2],
-        (gpdata.hit_to_cluster[0], gpdata.hit_to_cluster[1])),
-        shape=(n_hit, n_cluster)
+        (gpdata.hit_to_cluster[2], (gpdata.hit_to_cluster[0], gpdata.hit_to_cluster[1])), shape=(n_hit, n_cluster)
     )
 
-    gp_to_cluster = np.array((gp_to_calohit*calohit_to_cluster).todense())
+    gp_to_cluster = np.array((gp_to_calohit * calohit_to_cluster).todense())
 
-    #map each genparticle to a track or a cluster
-    gp_to_obj = -1*np.ones((n_gp, 2), dtype=np.int32)
+    # map each genparticle to a track or a cluster
+    gp_to_obj = -1 * np.ones((n_gp, 2), dtype=np.int32)
     set_used_tracks = set([])
     set_used_clusters = set([])
     gps_sorted_energy = sorted(range(n_gp), key=lambda x: gpdata.gen_features["energy"][x], reverse=True)
 
     for igp in gps_sorted_energy:
 
-        #first check if we can match the genparticle to a track
+        # first check if we can match the genparticle to a track
         matched_tracks = gp_to_track[igp]
         trks = np.where(matched_tracks)[0]
         trks = sorted(trks, key=lambda x: matched_tracks[x], reverse=True)
         for trk in trks:
-            #if the track was not already used for something else
+            # if the track was not already used for something else
             if trk not in set_used_tracks:
                 gp_to_obj[igp, 0] = trk
                 set_used_tracks.add(trk)
                 break
 
-        #if there was no matched track, try a cluster
+        # if there was no matched track, try a cluster
         if gp_to_obj[igp, 0] == -1:
             matched_clusters = gp_to_cluster[igp]
             clusters = np.where(matched_clusters)[0]
@@ -474,8 +528,8 @@ def assign_genparticles_to_obj_and_merge(gpdata):
                     set_used_clusters.add(cl)
                     break
 
-    #the genparticles that could not be matched to a track or cluster are merged to the closest genparticle
-    unmatched = np.where((gp_to_obj[:, 0]==-1) & (gp_to_obj[:, 1]==-1))[0]
+    # the genparticles that could not be matched to a track or cluster are merged to the closest genparticle
+    unmatched = np.where((gp_to_obj[:, 0] == -1) & (gp_to_obj[:, 1] == -1))[0]
     mask_gp_unmatched = np.ones(n_gp, dtype=bool)
 
     pt_arr = np.array(awkward.to_numpy(gpdata.gen_features["pt"]))
@@ -483,20 +537,25 @@ def assign_genparticles_to_obj_and_merge(gpdata):
     phi_arr = np.array(awkward.to_numpy(gpdata.gen_features["phi"]))
     energy_arr = np.array(awkward.to_numpy(gpdata.gen_features["energy"]))
 
-    #now merge unmatched genparticles to their closest genparticle 
+    # now merge unmatched genparticles to their closest genparticle
+    gp_merges_gp0 = []
+    gp_merges_gp1 = []
     for igp_unmatched in unmatched:
         mask_gp_unmatched[igp_unmatched] = False
         idx_best_cluster = np.argmax(gp_to_cluster[igp_unmatched])
-        idx_gp_bestcluster = np.where(gp_to_obj[:, 1]==idx_best_cluster)[0]
+        idx_gp_bestcluster = np.where(gp_to_obj[:, 1] == idx_best_cluster)[0]
 
-        #if the genparticle is not matched to any cluster, then it left a few hits to some other track
-        #this is rare, happens only for low-pT particles and we don"t want to try to reconstruct it
-        if (len(idx_gp_bestcluster)!=1):
+        # if the genparticle is not matched to any cluster, then it left a few hits to some other track
+        # this is rare, happens only for low-pT particles and we don't want to try to reconstruct it
+        if len(idx_gp_bestcluster) != 1:
             print("unmatched pt=", pt_arr[igp_unmatched])
             continue
 
         idx_gp_bestcluster = idx_gp_bestcluster[0]
 
+        gp_merges_gp0.append(idx_gp_bestcluster)
+        gp_merges_gp1.append(igp_unmatched)
+
         vec0 = vector.obj(
             pt=gpdata.gen_features["pt"][igp_unmatched],
             eta=gpdata.gen_features["eta"][igp_unmatched],
@@ -509,7 +568,7 @@ def assign_genparticles_to_obj_and_merge(gpdata):
             phi=gpdata.gen_features["phi"][idx_gp_bestcluster],
             e=gpdata.gen_features["energy"][idx_gp_bestcluster],
         )
-        vec = vec0+vec1
+        vec = vec0 + vec1
         pt_arr[idx_gp_bestcluster] = vec.pt
         eta_arr[idx_gp_bestcluster] = vec.eta
         phi_arr[idx_gp_bestcluster] = vec.phi
@@ -524,7 +583,7 @@ def assign_genparticles_to_obj_and_merge(gpdata):
         "cos_phi": np.cos(phi_arr[mask_gp_unmatched]),
         "energy": energy_arr[mask_gp_unmatched],
     }
-    assert((np.sum(gen_features_new["energy"])-np.sum(gpdata.gen_features["energy"])) < 1e-2)
+    assert (np.sum(gen_features_new["energy"]) - np.sum(gpdata.gen_features["energy"])) < 1e-2
 
     idx_all_masked = np.where(mask_gp_unmatched)[0]
     genpart_idx_all_to_filtered = {idx_all: idx_filtered for idx_filtered, idx_all in enumerate(idx_all_masked)}
@@ -532,26 +591,30 @@ def assign_genparticles_to_obj_and_merge(gpdata):
     genparticle_to_track = filter_adj(gpdata.genparticle_to_track, genpart_idx_all_to_filtered)
     gp_to_obj = gp_to_obj[mask_gp_unmatched]
 
-    return EventData(
-        gen_features_new,
-        gpdata.hit_features,
-        gpdata.cluster_features,
-        gpdata.track_features,
-        genparticle_to_hit,
-        genparticle_to_track,
-        gpdata.hit_to_cluster
-    ), gp_to_obj
+    return (
+        EventData(
+            gen_features_new,
+            gpdata.hit_features,
+            gpdata.cluster_features,
+            gpdata.track_features,
+            genparticle_to_hit,
+            genparticle_to_track,
+            gpdata.hit_to_cluster,
+            (gp_merges_gp0, gp_merges_gp1),
+        ),
+        gp_to_obj,
+    )
 
 
-#for each PF element (track, cluster), get the index of the best-matched particle (gen or reco)
-#if the PF element has no best-matched particle, returns -1
+# for each PF element (track, cluster), get the index of the best-matched particle (gen or reco)
+# if the PF element has no best-matched particle, returns -1
 def assign_to_recoobj(n_obj, obj_to_ptcl, used_particles):
     obj_to_ptcl_all = -1 * np.ones(n_obj, dtype=np.int64)
     for iobj in range(n_obj):
         if iobj in obj_to_ptcl:
             iptcl = obj_to_ptcl[iobj]
             obj_to_ptcl_all[iobj] = iptcl
-            assert(used_particles[iptcl] == 0)
+            assert used_particles[iptcl] == 0
             used_particles[iptcl] = 1
     return obj_to_ptcl_all
 
@@ -560,66 +623,69 @@ def get_recoptcl_to_obj(n_rps, reco_arr, idx_rp_to_track, idx_rp_to_cluster):
     track_to_rp = {}
     cluster_to_rp = {}
 
-    #loop over the reco particles
+    # loop over the reco particles
     for irp in range(n_rps):
         assigned = False
 
-        #find and loop over tracks associated to the reco particle
+        # find and loop over tracks associated to the reco particle
         trks_begin = reco_arr["tracks_begin"][irp]
         trks_end = reco_arr["tracks_end"][irp]
         for itrk in range(trks_begin, trks_end):
 
-            #get the index of the track in the track collection
+            # get the index of the track in the track collection
             itrk_real = idx_rp_to_track[itrk]
-            assert(itrk_real not in track_to_rp)
+            assert itrk_real not in track_to_rp
             track_to_rp[itrk_real] = irp
             assigned = True
 
-        #only look for clusters if tracks were not found
+        # only look for clusters if tracks were not found
         if not assigned:
 
-            #find and loop over clusters associated to the reco particle
+            # find and loop over clusters associated to the reco particle
             cls_begin = reco_arr["clusters_begin"][irp]
             cls_end = reco_arr["clusters_end"][irp]
             for icls in range(cls_begin, cls_end):
 
-                #get the index of the cluster in the cluster collection
+                # get the index of the cluster in the cluster collection
                 icls_real = idx_rp_to_cluster[icls]
-                assert(icls_real not in cluster_to_rp)
+                assert icls_real not in cluster_to_rp
                 cluster_to_rp[icls_real] = irp
     return track_to_rp, cluster_to_rp
 
+
 def get_reco_properties(prop_data, iev):
     reco_arr = prop_data["MergedRecoParticles"][iev]
     reco_arr = {k.replace("MergedRecoParticles.", ""): reco_arr[k] for k in reco_arr.fields}
 
-    reco_p4 = vector.awk(awkward.zip({
-        "mass": reco_arr["mass"],
-        "x": reco_arr["momentum.x"],
-        "y": reco_arr["momentum.y"],
-        "z": reco_arr["momentum.z"]}))
+    reco_p4 = vector.awk(
+        awkward.zip(
+            {"mass": reco_arr["mass"], "x": reco_arr["momentum.x"], "y": reco_arr["momentum.y"], "z": reco_arr["momentum.z"]}
+        )
+    )
     reco_arr["pt"] = reco_p4.pt
     reco_arr["eta"] = reco_p4.eta
     reco_arr["phi"] = reco_p4.phi
     reco_arr["energy"] = reco_p4.energy
 
-    msk = reco_arr["type"]!=0
+    msk = reco_arr["type"] != 0
     reco_arr = awkward.Record({k: reco_arr[k][msk] for k in reco_arr.keys()})
     return reco_arr
 
+
 def get_particle_feature_matrix(pfelem_to_particle, feature_dict, features):
     feats = []
     for feat in features:
         feat_arr = feature_dict[feat]
-        if len(feat_arr)==0:
+        if len(feat_arr) == 0:
             feat_arr_reordered = feat_arr
         else:
             feat_arr_reordered = awkward.to_numpy(feat_arr[pfelem_to_particle])
-            feat_arr_reordered[pfelem_to_particle==-1] = 0.0
+            feat_arr_reordered[pfelem_to_particle == -1] = 0.0
         feats.append(feat_arr_reordered)
     feats = np.array(feats)
     return feats.T
 
+
 def get_feature_matrix(feature_dict, features):
     feats = []
     for feat in features:
@@ -628,26 +694,42 @@ def get_feature_matrix(feature_dict, features):
     feats = np.array(feats)
     return feats.T
 
+
 def process_one_file(fn, ofn):
 
-    #output exists, do not recreate
+    # output exists, do not recreate
     if os.path.isfile(ofn):
+        print("{} exists".format(ofn))
         return
 
     fi = uproot.open(fn)
-    
+
     arrs = fi["events"]
-    
-    collectionIDs = {k: v for k, v in
-        zip(fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
-        fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0])}
-    collectionIDs_reverse = {v: k for k, v in collectionIDs.items()}
-    
-    prop_data = arrs.arrays([mc_coll, track_coll, "SiTracks_1", "PandoraClusters", "PandoraClusters#1", "PandoraClusters#0", "MergedRecoParticles"])
+
+    collectionIDs = {
+        k: v
+        for k, v in zip(
+            fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
+            fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0],
+        )
+    }
+
+    prop_data = arrs.arrays(
+        [
+            mc_coll,
+            track_coll,
+            "SiTracks_1",
+            "PandoraClusters",
+            "PandoraClusters#1",
+            "PandoraClusters#0",
+            "MergedRecoParticles",
+        ]
+    )
     calohit_links = arrs.arrays(["CalohitMCTruthLink", "CalohitMCTruthLink#0", "CalohitMCTruthLink#1"])
     sitrack_links = arrs.arrays(["SiTracksMCTruthLink", "SiTracksMCTruthLink#0", "SiTracksMCTruthLink#1"])
 
-    #maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end) to the index in the track/cluster collection
+    # maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end)
+    # to the index in the track/cluster collection
     idx_rp_to_cluster = arrs["MergedRecoParticles#0/MergedRecoParticles#0.index"].array()
     idx_rp_to_track = arrs["MergedRecoParticles#1/MergedRecoParticles#1.index"].array()
 
@@ -662,104 +744,87 @@ def process_one_file(fn, ofn):
     }
 
     ret = []
-    for iev in tqdm.tqdm(range(arrs.num_entries)):
+    for iev in range(arrs.num_entries):
 
-        #get the reco particles
+        # get the reco particles
         reco_arr = get_reco_properties(prop_data, iev)
         reco_type = np.abs(reco_arr["type"])
         n_rps = len(reco_type)
-        reco_features = awkward.Record({
-            "PDG": np.abs(reco_type),
-            "charge": reco_arr["charge"],
-            "pt": reco_arr["pt"],
-            "eta": reco_arr["eta"],
-            "sin_phi": np.sin(reco_arr["phi"]),
-            "cos_phi": np.cos(reco_arr["phi"]),
-            "energy": reco_arr["energy"]
-        })
-
-        #get the genparticles and the links between genparticles and tracks/clusters
+        reco_features = awkward.Record(
+            {
+                "PDG": np.abs(reco_type),
+                "charge": reco_arr["charge"],
+                "pt": reco_arr["pt"],
+                "eta": reco_arr["eta"],
+                "sin_phi": np.sin(reco_arr["phi"]),
+                "cos_phi": np.cos(reco_arr["phi"]),
+                "energy": reco_arr["energy"],
+            }
+        )
+
+        # get the genparticles and the links between genparticles and tracks/clusters
         gpdata = get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs)
 
-        #find the reconstructable genparticles and associate them to the best track/cluster
+        # find the reconstructable genparticles and associate them to the best track/cluster
         gpdata_cleaned, gp_to_obj = assign_genparticles_to_obj_and_merge(gpdata)
 
         n_tracks = len(gpdata_cleaned.track_features["type"])
         n_clusters = len(gpdata_cleaned.cluster_features["type"])
         n_gps = len(gpdata_cleaned.gen_features["PDG"])
 
-        assert(len(gp_to_obj) == len(gpdata_cleaned.gen_features["PDG"]))
-        assert(gp_to_obj.shape[1] == 2)
-        
-        #for each reco particle, find the tracks and clusters associated with it
-        #construct track/cluster -> recoparticle maps
+        assert len(gp_to_obj) == len(gpdata_cleaned.gen_features["PDG"])
+        assert gp_to_obj.shape[1] == 2
+
+        # for each reco particle, find the tracks and clusters associated with it
+        # construct track/cluster -> recoparticle maps
         track_to_rp, cluster_to_rp = get_recoptcl_to_obj(n_rps, reco_arr, idx_rp_to_track[iev], idx_rp_to_cluster[iev])
 
-        #get the track/cluster -> genparticle map
+        # get the track/cluster -> genparticle map
         track_to_gp = {itrk: igp for igp, itrk in enumerate(gp_to_obj[:, 0]) if itrk != -1}
         cluster_to_gp = {icl: igp for igp, icl in enumerate(gp_to_obj[:, 1]) if icl != -1}
 
         used_gps = np.zeros(n_gps, dtype=np.int64)
         track_to_gp_all = assign_to_recoobj(n_tracks, track_to_gp, used_gps)
         cluster_to_gp_all = assign_to_recoobj(n_clusters, cluster_to_gp, used_gps)
-        #all genparticles must be assigned to some PFElement
-        assert(np.all(used_gps == 1))
+        # all genparticles must be assigned to some PFElement
+        assert np.all(used_gps == 1)
 
         used_rps = np.zeros(n_rps, dtype=np.int64)
         track_to_rp_all = assign_to_recoobj(n_tracks, track_to_rp, used_rps)
         cluster_to_rp_all = assign_to_recoobj(n_clusters, cluster_to_rp, used_rps)
-        #all reco particles must be assigned to some PFElement
-        assert(np.all(used_rps == 1))
+        # all reco particles must be assigned to some PFElement
+        assert np.all(used_rps == 1)
 
-        gps_track = get_particle_feature_matrix(
-            track_to_gp_all,
-            gpdata_cleaned.gen_features,
-            particle_feature_order
-        )
-        gps_track[:, 0] = np.array([
-            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]
+        gps_track = get_particle_feature_matrix(track_to_gp_all, gpdata_cleaned.gen_features, particle_feature_order)
+        gps_track[:, 0] = np.array(
+            [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]
         )
-        gps_cluster = get_particle_feature_matrix(
-            cluster_to_gp_all,
-            gpdata_cleaned.gen_features,
-            particle_feature_order
-        )
-        gps_cluster[:, 0] = np.array([
-            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_cluster[:, 0], gps_cluster[:, 1])]
+        gps_cluster = get_particle_feature_matrix(cluster_to_gp_all, gpdata_cleaned.gen_features, particle_feature_order)
+        gps_cluster[:, 0] = np.array(
+            [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_cluster[:, 0], gps_cluster[:, 1])]
         )
         gps_cluster[:, 1] = 0
 
-        rps_track = get_particle_feature_matrix(
-            track_to_rp_all,
-            reco_features,
-            particle_feature_order
-        )
-        rps_track[:, 0] = np.array([
-            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]
+        rps_track = get_particle_feature_matrix(track_to_rp_all, reco_features, particle_feature_order)
+        rps_track[:, 0] = np.array(
+            [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]
         )
-        rps_cluster = get_particle_feature_matrix(
-            cluster_to_rp_all,
-            reco_features,
-            particle_feature_order
-        )
-        rps_cluster[:, 0] = np.array([
-            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_cluster[:, 0], rps_cluster[:, 1])]
+        rps_cluster = get_particle_feature_matrix(cluster_to_rp_all, reco_features, particle_feature_order)
+        rps_cluster[:, 0] = np.array(
+            [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_cluster[:, 0], rps_cluster[:, 1])]
         )
         rps_cluster[:, 1] = 0
 
-        #all initial gen/reco particle energy must be reconstructable
-        assert(abs(
-            np.sum(gps_track[:, 6]) + np.sum(gps_cluster[:, 6]) - np.sum(gpdata_cleaned.gen_features["energy"])
-            ) < 1e-2)
-
-        assert(abs(
-            np.sum(rps_track[:, 6]) + np.sum(rps_cluster[:, 6]) - np.sum(reco_features["energy"])
-            ) < 1e-2)
+        # all initial gen/reco particle energy must be reconstructable
+        assert (
+            abs(np.sum(gps_track[:, 6]) + np.sum(gps_cluster[:, 6]) - np.sum(gpdata_cleaned.gen_features["energy"])) < 1e-2
+        )
 
+        assert abs(np.sum(rps_track[:, 6]) + np.sum(rps_cluster[:, 6]) - np.sum(reco_features["energy"])) < 1e-2
 
-        #we don"t want to try to reconstruct charged particles from primary clusters, make sure the charge is 0
-        assert(np.all(gps_cluster[:, 1] == 0))
-        assert(np.all(rps_cluster[:, 1] == 0))
+        # we don"t want to try to reconstruct charged particles from primary clusters, make sure the charge is 0
+        assert np.all(gps_cluster[:, 1] == 0)
+        assert np.all(rps_cluster[:, 1] == 0)
 
         X_track = get_feature_matrix(gpdata_cleaned.track_features, track_feature_order)
         X_cluster = get_feature_matrix(gpdata_cleaned.cluster_features, cluster_feature_order)
@@ -775,27 +840,30 @@ def process_one_file(fn, ofn):
         sanitize(ycand_track)
         sanitize(ycand_cluster)
 
-        this_ev = awkward.Record({
-            "X_track": X_track,
-            "X_cluster": X_cluster,
-            "ygen_track": ygen_track,
-            "ygen_cluster": ygen_cluster,
-            "ycand_track": ycand_track,
-            "ycand_cluster": ycand_cluster
-        })
+        this_ev = awkward.Record(
+            {
+                "X_track": X_track,
+                "X_cluster": X_cluster,
+                "ygen_track": ygen_track,
+                "ygen_cluster": ygen_cluster,
+                "ycand_track": ycand_track,
+                "ycand_cluster": ycand_cluster,
+            }
+        )
         ret.append(this_ev)
 
     ret = awkward.Record({k: awkward.from_iter([r[k] for r in ret]) for k in ret[0].fields})
     awkward.to_parquet(ret, ofn)
 
+
 def process_sample(sample):
     inp = "/local/joosep/clic_edm4hep_2023_02_27/"
-    outp = "/local/joosep/mlpf/clic_edm4hep_2023_04_27/"
+    outp = "/local/joosep/mlpf/clic_edm4hep_2023_05_09/"
 
-    pool = multiprocessing.Pool(30)
+    pool = multiprocessing.Pool(16)
 
-    inpath_samp = inp + samp
-    outpath_samp = outp + samp
+    inpath_samp = inp + sample
+    outpath_samp = outp + sample
     infiles = list(glob.glob(inpath_samp + "/*.root"))
     if not os.path.isdir(outpath_samp):
         os.makedirs(outpath_samp)
@@ -806,6 +874,7 @@ def process_sample(sample):
         args.append((inf, of))
     pool.starmap(process_one_file, args)
 
+
 if __name__ == "__main__":
     if len(sys.argv) == 2:
         process_sample(sys.argv[1])
diff --git a/fcc/postprocessing_hits.py b/fcc/postprocessing_hits.py
index f804f5fbe..3009fa90e 100644
--- a/fcc/postprocessing_hits.py
+++ b/fcc/postprocessing_hits.py
@@ -1,38 +1,57 @@
-import bz2
 import numpy as np
 import awkward
-import matplotlib.pyplot as plt
 import uproot
-import vector
 import glob
-import networkx as nx
-import tqdm
-import numba
 import os
 import sys
 import multiprocessing
 from scipy.sparse import coo_matrix
 
-track_coll = "SiTracks_Refitted"
-mc_coll = "MCParticles"
+from postprocessing import map_pdgid_to_candid, map_charged_to_neutral, map_neutral_to_charged, sanitize
 
-#the feature matrices will be saved in this order
-particle_feature_order = ["PDG", "charge", "pt", "eta", "sin_phi", "cos_phi", "energy"]
+from postprocessing import track_coll, mc_coll, particle_feature_order
 
-#arrange track and cluster features such that pt (et), eta, phi, p (energy) are in the same spot
-#so we can easily use them in skip connections
 track_feature_order = [
-    "elemtype", "pt", "eta", "sin_phi", "cos_phi", "p",
-    "chi2", "ndf",
-    "radiusOfInnermostHit", "tanLambda", "D0", "omega",
-    "referencePoint.x", "referencePoint.y", "referencePoint.z",
-    "Z0", "time", "type"
+    "elemtype",
+    "pt",
+    "eta",
+    "sin_phi",
+    "cos_phi",
+    "p",
+    "chi2",
+    "ndf",
+    "radiusOfInnermostHit",
+    "tanLambda",
+    "D0",
+    "omega",
+    "Z0",
+    "time",
+    "type",
 ]
 hit_feature_order = [
-    "elemtype", "et", "eta", "sin_phi", "cos_phi", "energy",
-    "position.x", "position.y", "position.z", "time", "subdetector", "type"
+    "elemtype",
+    "et",
+    "eta",
+    "sin_phi",
+    "cos_phi",
+    "energy",
+    "position.x",
+    "position.y",
+    "position.z",
+    "time",
+    "subdetector",
+    "type",
 ]
 
+from postprocessing import (
+    get_genparticles_and_adjacencies,
+    assign_to_recoobj,
+    get_reco_properties,
+    get_particle_feature_matrix,
+    get_feature_matrix,
+)
+
+
 def build_dummy_array(num, dtype=np.int64):
     return awkward.Array(
         awkward.contents.ListOffsetArray(
@@ -41,373 +60,46 @@ def build_dummy_array(num, dtype=np.int64):
         )
     )
 
-def track_pt(omega):
-    a = 3 * 10**-4
-    b = 4  # B-field in tesla, from clicRec_e4h_input
-
-    return a * np.abs(b / omega)
-
-def map_pdgid_to_candid(pdgid, charge):
-    if pdgid == 0:
-        return 0
-
-    #photon, electron, muon
-    if pdgid in [22, 11, 13]:
-        return pdgid
-
-    # charged hadron
-    if abs(charge) > 0:
-        return 211
-
-    # neutral hadron
-    return 130
-
-def map_charged_to_neutral(pdg):
-    if pdg == 0:
-        return 0
-    if pdg == 11 or pdg == 22:
-        return 22
-    return 130
-
-def map_neutral_to_charged(pdg):
-    if pdg == 130 or pdg == 22:
-        return 211
-    return pdg
-
-def sanitize(arr):
-    arr[np.isnan(arr)] = 0.0 
-    arr[np.isinf(arr)] = 0.0 
-
-class EventData:
-    def __init__(self,
-        gen_features,
-        hit_features,
-        track_features,
-        genparticle_to_hit,
-        genparticle_to_track,
-        hit_to_cluster
-        ):
-        self.gen_features = gen_features 
-        self.hit_features = hit_features 
-        self.track_features = track_features 
-        self.genparticle_to_hit = genparticle_to_hit 
-        self.genparticle_to_track = genparticle_to_track 
-        self.hit_to_cluster = hit_to_cluster
-
-def get_cluster_subdet_energies(hit_list, hit_data, collectionIDs_reverse, iev):
-    """
-    This function calculates the energy contribution from each of four subdetectors in a particle physics experiment, based on a list of hits and their corresponding data.
-
-    Args:
-    hit_list: a list of tuples, where each tuple contains a collection ID and a hit index
-    hit_data: a dictionary containing data for each hit in the experiment, organized by collection
-    collectionIDs_reverse: a dictionary mapping collection IDs to collection names
-    iev: the event number for the current event
-
-    Returns:
-    A tuple containing the energy contributions from each of the four subdetectors:
-    (ecal_energy, hcal_energy, muon_energy, other_energy)
-    """
-
-    ecal_energy = 0.0
-    hcal_energy = 0.0
-    muon_energy = 0.0
-    other_energy = 0.0
-
-    for coll_id, hit_idx in hit_list:
-        coll = collectionIDs_reverse[coll_id]
-        hit_energy = hit_data[coll][iev][coll+".energy"][hit_idx]
-
-        if coll.startswith("ECAL"):
-            ecal_energy += hit_energy
-        elif coll.startswith("HCAL"):
-            hcal_energy += hit_energy
-        elif coll == "MUON":
-            muon_energy += hit_energy
-        else:
-            other_energy += hit_energy
-
-    return ecal_energy, hcal_energy, muon_energy, other_energy
-
-def hits_to_features(hit_data, iev, coll, feats):
-    feat_arr = {f: hit_data[coll + "." + f][iev] for f in feats}
-
-    #set the subdetector type
-    sdcoll = "subdetector"
-    feat_arr[sdcoll] = np.zeros(len(feat_arr["type"]), dtype=np.int32)
-    if coll.startswith("ECAL"):
-        feat_arr[sdcoll][:] = 0
-    elif coll.startswith("HCAL"):
-        feat_arr[sdcoll][:] = 1
-    else:
-        feat_arr[sdcoll][:] = 2
-
-    #hit elemtype is always 2
-    feat_arr["elemtype"] = 2*np.ones(len(feat_arr["type"]), dtype=np.int32)
-
-    #precompute some approximate et, eta, phi
-    pos_mag = np.sqrt(feat_arr["position.x"]**2 + feat_arr["position.y"]**2 + feat_arr["position.z"]**2) 
-    px = (feat_arr["position.x"] / pos_mag) * feat_arr["energy"]
-    py = (feat_arr["position.y"] / pos_mag) * feat_arr["energy"]
-    pz = (feat_arr["position.z"] / pos_mag) * feat_arr["energy"]
-    feat_arr["et"] = np.sqrt(px**2+py**2)
-    feat_arr["eta"] = 0.5*np.log((feat_arr["energy"] + pz)/(feat_arr["energy"] - pz))
-    feat_arr["sin_phi"] = py/feat_arr["energy"]
-    feat_arr["cos_phi"] = px/feat_arr["energy"]
-
-    return awkward.Record(feat_arr)
-
-def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs):
-    feats = ["type", "cellID", "energy", "energyError", "time", "position.x", "position.y", "position.z"]
-    
-    hit_idx_global = 0
-    hit_idx_global_to_local = {}
-    hit_feature_matrix = []
-    for col in sorted(hit_data.keys()):
-        icol = collectionIDs[col]
-        hit_features = hits_to_features(hit_data[col], iev, col, feats)
-        hit_feature_matrix.append(hit_features)
-        for ihit in range(len(hit_data[col][col+".energy"][iev])):
-            hit_idx_global_to_local[hit_idx_global] = (icol, ihit)
-            hit_idx_global += 1
-    hit_idx_local_to_global = {v: k for k, v in hit_idx_global_to_local.items()}
-    hit_feature_matrix = awkward.Record({
-        k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) for k in hit_feature_matrix[0].fields})
-
-    #add all edges from genparticle to calohit
-    calohit_to_gen_weight = calohit_links["CalohitMCTruthLink"]["CalohitMCTruthLink.weight"][iev]
-    calohit_to_gen_calo_colid = calohit_links["CalohitMCTruthLink#0"]["CalohitMCTruthLink#0.collectionID"][iev]
-    calohit_to_gen_gen_colid = calohit_links["CalohitMCTruthLink#1"]["CalohitMCTruthLink#1.collectionID"][iev]
-    calohit_to_gen_calo_idx = calohit_links["CalohitMCTruthLink#0"]["CalohitMCTruthLink#0.index"][iev]
-    calohit_to_gen_gen_idx = calohit_links["CalohitMCTruthLink#1"]["CalohitMCTruthLink#1.index"][iev]
-    genparticle_to_hit_matrix_coo0 = []
-    genparticle_to_hit_matrix_coo1 = []
-    genparticle_to_hit_matrix_w = []
-    for calo_colid, calo_idx, gen_colid, gen_idx, w in zip(calohit_to_gen_calo_colid, calohit_to_gen_calo_idx, calohit_to_gen_gen_colid, calohit_to_gen_gen_idx, calohit_to_gen_weight):
-        genparticle_to_hit_matrix_coo0.append(gen_idx)
-        genparticle_to_hit_matrix_coo1.append(hit_idx_local_to_global[(calo_colid, calo_idx)])
-        genparticle_to_hit_matrix_w.append(w)
-
-    return hit_feature_matrix, (genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w), hit_idx_local_to_global
-
-def hit_cluster_adj(prop_data, hit_idx_local_to_global, iev):
-    coll_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.collectionID"][iev]
-    idx_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.index"][iev]
-    hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev]
-    hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev]
-
-    #index in the array of all hits
-    hit_to_cluster_matrix_coo0 = []
-    #index in the cluster array
-    hit_to_cluster_matrix_coo1 = []
-
-    #weight
-    hit_to_cluster_matrix_w = []
-
-    #loop over all clusters
-    for icluster in range(len(hits_begin)):
-
-        #get the slice in the hit array corresponding to this cluster
-        hbeg = hits_begin[icluster]
-        hend = hits_end[icluster]
-        idx_range = idx_arr[hbeg:hend]
-        coll_range = coll_arr[hbeg:hend]
-
-        #add edges from hit to cluster
-        for icol, idx in zip(coll_range, idx_range):
-            hit_to_cluster_matrix_coo0.append(hit_idx_local_to_global[(icol, idx)])
-            hit_to_cluster_matrix_coo1.append(icluster)
-            hit_to_cluster_matrix_w.append(1.0)
-    return np.array(hit_to_cluster_matrix_coo0), np.array(hit_to_cluster_matrix_coo1), np.array(hit_to_cluster_matrix_w)
-
-def gen_to_features(prop_data, iev):
-    gen_arr = prop_data[mc_coll][iev]
-    gen_arr = {k.replace(mc_coll+".", ""): gen_arr[k] for k in gen_arr.fields}
-
-    MCParticles_p4 = vector.awk(awkward.zip({
-        "mass": gen_arr["mass"],
-        "x": gen_arr["momentum.x"],
-        "y": gen_arr["momentum.y"],
-        "z": gen_arr["momentum.z"]}))
-    gen_arr["pt"] = MCParticles_p4.pt
-    gen_arr["eta"] = MCParticles_p4.eta
-    gen_arr["phi"] = MCParticles_p4.phi
-    gen_arr["energy"] = MCParticles_p4.energy
-
-    return awkward.Record({
-        "PDG": gen_arr["PDG"],
-        "generatorStatus": gen_arr["generatorStatus"],
-        "charge": gen_arr["charge"],
-        "pt": gen_arr["pt"],
-        "eta": gen_arr["eta"],
-        "phi": gen_arr["phi"],
-        "sin_phi": np.sin(gen_arr["phi"]),
-        "cos_phi": np.cos(gen_arr["phi"]),
-        "energy": gen_arr["energy"],
-        })
-
-def genparticle_track_adj(sitrack_links, iev):
-    trk_to_gen_trkidx = sitrack_links["SiTracksMCTruthLink#0"]["SiTracksMCTruthLink#0.index"][iev]
-    trk_to_gen_genidx = sitrack_links["SiTracksMCTruthLink#1"]["SiTracksMCTruthLink#1.index"][iev]
-    trk_to_gen_w = sitrack_links["SiTracksMCTruthLink"]["SiTracksMCTruthLink.weight"][iev]
-
-    genparticle_to_track_matrix_coo0 = awkward.to_numpy(trk_to_gen_genidx)
-    genparticle_to_track_matrix_coo1 = awkward.to_numpy(trk_to_gen_trkidx)
-    genparticle_to_track_matrix_w = awkward.to_numpy(trk_to_gen_w)
-    
-    return genparticle_to_track_matrix_coo0, genparticle_to_track_matrix_coo1, genparticle_to_track_matrix_w
-
-
-def track_to_features(prop_data, iev):
-    track_arr = prop_data[track_coll][iev]
-    feats_from_track = ["type", "chi2", "ndf", "dEdx", "dEdxError", "radiusOfInnermostHit"]
-    ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track}
-    n_tr = len(ret["type"])
-
-    #FIXME: add additional track features from track state
-
-    #get the index of the first track state
-    trackstate_idx = prop_data[track_coll][track_coll + ".trackStates_begin"][iev]
-    #get the properties of the track at the first track state (at the origin)
-    for k in ["tanLambda", "D0", "phi", "omega", "Z0", "time", "referencePoint.x", "referencePoint.y", "referencePoint.z"]:
-        ret[k] = prop_data["SiTracks_1"]["SiTracks_1." + k][iev][trackstate_idx]
-
-    ret["pt"] = track_pt(ret["omega"])
-    ret["px"] = np.cos(ret["phi"]) * ret["pt"]
-    ret["py"] = np.sin(ret["phi"]) * ret["pt"]
-    ret["pz"] = ret["tanLambda"] * ret["pt"]
-    ret["p"] = np.sqrt(ret["px"]**2 + ret["py"]**2 + ret["pz"]**2)
-    cos_theta = np.divide(ret["pz"], ret["p"], where=ret["p"]>0)
-    theta = np.arccos(cos_theta)
-    tt = np.tan(theta / 2.0)
-    eta = awkward.to_numpy(-np.log(tt, where=tt>0))
-    eta[tt<=0] = 0.0
-    ret["eta"] = eta
-
-    ret["sin_phi"] = np.sin(ret["phi"])
-    ret["cos_phi"] = np.cos(ret["phi"])
-
-    #override track type with 1
-    ret["elemtype"] = 1*np.ones(n_tr, dtype=np.int32)
-
-    return awkward.Record(ret)
-
-def filter_adj(adj, all_to_filtered):
-    i0s_new = []
-    i1s_new = []
-    ws_new = []
-    for i0, i1, w in zip(*adj):
-        if i0 in all_to_filtered:
-            i0_new = all_to_filtered[i0]
-            i0s_new.append(i0_new)
-            i1s_new.append(i1)
-            ws_new.append(w)
-    return np.array(i0s_new), np.array(i1s_new), np.array(ws_new)
-
-def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs):
-    gen_features = gen_to_features(prop_data, iev)
-    hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs)
-    hit_to_cluster = hit_cluster_adj(prop_data, hit_idx_local_to_global, iev)
-    track_features = track_to_features(prop_data, iev)
-    genparticle_to_track = genparticle_track_adj(sitrack_links, iev)
-
-    n_gp = awkward.count(gen_features["PDG"])
-    n_track = awkward.count(track_features["type"])
-    n_hit = awkward.count(hit_features["type"])
-
-    if len(genparticle_to_track[0])>0:
-        gp_to_track = np.array(coo_matrix(
-            (genparticle_to_track[2],
-            (genparticle_to_track[0], genparticle_to_track[1])),
-            shape=(n_gp, n_track)
-        ).max(axis=1).todense())[:, 0]
-    else:
-        gp_to_track = np.zeros(n_gp)
-
-    if len(genparticle_to_hit[0])>0:
-        gp_to_calohit = np.array(coo_matrix(
-            (genparticle_to_hit[2],
-            (genparticle_to_hit[0], genparticle_to_hit[1])),
-            shape=(n_gp, n_hit)
-        ).max(axis=1).todense())[:, 0]
-    else:
-        gp_to_calohit = np.zeros(n_gp)
-
-    #60% of the hits of a track must come from the genparticle
-    gp_in_tracker = gp_to_track>=0.6
-
-    #the particle should deposit energy to some calo hit
-    gp_in_calo = gp_to_calohit>0.0
-
-    gp_interacted_with_detector = gp_in_tracker | gp_in_calo
-
-    #get status 1 particles that are not neutrinos
-    #and have energy > 100 MeV
-    mask_visible = (
-        (gen_features["generatorStatus"]==1) & 
-        (gen_features["PDG"]!=12) & 
-        (gen_features["PDG"]!=14) & 
-        (gen_features["PDG"]!=16) & 
-        (gen_features["energy"]>0.1) &
-        gp_interacted_with_detector
-    )
-    idx_all_masked = np.where(mask_visible)[0]
-    genpart_idx_all_to_filtered = {idx_all: idx_filtered for idx_filtered, idx_all in enumerate(idx_all_masked)}
-
-    gen_features = awkward.Record({
-        feat: gen_features[feat][mask_visible] for feat in gen_features.fields
-    })
-
-    genparticle_to_hit = filter_adj(genparticle_to_hit, genpart_idx_all_to_filtered)
-    genparticle_to_track = filter_adj(genparticle_to_track, genpart_idx_all_to_filtered)
-
-    return EventData(
-        gen_features,
-        hit_features,
-        track_features,
-        genparticle_to_hit,
-        genparticle_to_track,
-        hit_to_cluster
-    )
 
-def assign_genparticles_to_obj_and_merge(gpdata):
+def assign_genparticles_to_obj(gpdata):
 
     n_gp = awkward.count(gpdata.gen_features["PDG"])
     n_track = awkward.count(gpdata.track_features["type"])
     n_hit = awkward.count(gpdata.hit_features["type"])
 
-    gp_to_track = np.array(coo_matrix(
-        (gpdata.genparticle_to_track[2],
-        (gpdata.genparticle_to_track[0], gpdata.genparticle_to_track[1])),
-        shape=(n_gp, n_track)
-    ).todense())
+    gp_to_track = np.array(
+        coo_matrix(
+            (gpdata.genparticle_to_track[2], (gpdata.genparticle_to_track[0], gpdata.genparticle_to_track[1])),
+            shape=(n_gp, n_track),
+        ).todense()
+    )
 
-    gp_to_calohit = np.array(coo_matrix(
-        (gpdata.genparticle_to_hit[2],
-        (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])),
-        shape=(n_gp, n_hit)
-    ).todense())
+    gp_to_calohit = np.array(
+        coo_matrix(
+            (gpdata.genparticle_to_hit[2], (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])), shape=(n_gp, n_hit)
+        ).todense()
+    )
 
-    #map each genparticle to a track or calohit
-    gp_to_obj = -1*np.ones((n_gp, 2), dtype=np.int32)
+    # map each genparticle to a track or calohit
+    gp_to_obj = -1 * np.ones((n_gp, 2), dtype=np.int32)
     set_used_tracks = set([])
     set_used_calohits = set([])
     gps_sorted_energy = sorted(range(n_gp), key=lambda x: gpdata.gen_features["energy"][x], reverse=True)
 
     for igp in gps_sorted_energy:
 
-        #first check if we can match the genparticle to a track
+        # first check if we can match the genparticle to a track
         matched_tracks = gp_to_track[igp]
         trks = np.where(matched_tracks)[0]
         trks = sorted(trks, key=lambda x: matched_tracks[x], reverse=True)
         for trk in trks:
-            #if the track was not already used for something else
+            # if the track was not already used for something else
             if trk not in set_used_tracks:
                 gp_to_obj[igp, 0] = trk
                 set_used_tracks.add(trk)
                 break
 
-        #if there was no matched track, try a calohit
+        # if there was no matched track, try a calohit
         if gp_to_obj[igp, 0] == -1:
             matched_calohits = np.where(gp_to_calohit[igp])[0]
             calohits = sorted(matched_calohits, key=lambda x: gp_to_calohit[igp, x], reverse=True)
@@ -417,114 +109,86 @@ def assign_genparticles_to_obj_and_merge(gpdata):
                     set_used_calohits.add(calohit)
                     break
 
-    unmatched = (gp_to_obj[:, 0]!=-1) & (gp_to_obj[:, 1]!=-1)
+    # unmatched = (gp_to_obj[:, 0] != -1) & (gp_to_obj[:, 1] != -1)
     return gp_to_obj
 
 
-#for each PF element (track, cluster), get the index of the best-matched particle (gen or reco)
-#if the PF element has no best-matched particle, returns -1
-def assign_to_recoobj(n_obj, obj_to_ptcl, used_particles):
-    obj_to_ptcl_all = -1 * np.ones(n_obj, dtype=np.int64)
-    for iobj in range(n_obj):
-        if iobj in obj_to_ptcl:
-            iptcl = obj_to_ptcl[iobj]
-            obj_to_ptcl_all[iobj] = iptcl
-            assert(used_particles[iptcl] == 0)
-            used_particles[iptcl] = 1
-    return obj_to_ptcl_all
-
 def get_recoptcl_to_obj(n_rps, reco_arr, gpdata, idx_rp_to_track, idx_rp_to_cluster):
     track_to_rp = {}
     calohit_to_rp = {}
     for irp in range(n_rps):
         assigned = False
+
+        # get the tracks of the reco particle
         trks_begin = reco_arr["tracks_begin"][irp]
         trks_end = reco_arr["tracks_end"][irp]
         for itrk in range(trks_begin, trks_end):
+
+            # get the index of the track
             itrk_real = idx_rp_to_track[itrk]
-            assert(itrk_real not in track_to_rp)
+            assert itrk_real not in track_to_rp
             track_to_rp[itrk_real] = irp
             assigned = True
 
-        #only look for calohits if tracks were not found
+        # only look for calohits if tracks were not found
         if not assigned:
+
+            # loop over clusters of the reco particle
             cls_begin = reco_arr["clusters_begin"][irp]
             cls_end = reco_arr["clusters_end"][irp]
             for icls in range(cls_begin, cls_end):
+
+                # get the index of the cluster
                 icls_real = idx_rp_to_cluster[icls]
 
-                #find hits of the cluster
-                calohit_inds = gpdata.hit_to_cluster[0][gpdata.hit_to_cluster[1]==icls_real]
+                # find hits of the cluster
+                calohit_inds = gpdata.hit_to_cluster[0][gpdata.hit_to_cluster[1] == icls_real]
 
-                #get the highest-energy hit
+                # get the highest-energy hit
                 calohits_e_ascending = np.argsort(gpdata.hit_features["energy"][calohit_inds])
                 highest_e_hit = calohit_inds[calohits_e_ascending[-1]]
-                assert(highest_e_hit not in calohit_to_rp)
+                assert highest_e_hit not in calohit_to_rp
                 calohit_to_rp[highest_e_hit] = irp
                 assigned = True
                 break
     return track_to_rp, calohit_to_rp
 
-def get_reco_properties(prop_data, iev):
-    reco_arr = prop_data["MergedRecoParticles"][iev]
-    reco_arr = {k.replace("MergedRecoParticles.", ""): reco_arr[k] for k in reco_arr.fields}
-
-    reco_p4 = vector.awk(awkward.zip({
-        "mass": reco_arr["mass"],
-        "x": reco_arr["momentum.x"],
-        "y": reco_arr["momentum.y"],
-        "z": reco_arr["momentum.z"]}))
-    reco_arr["pt"] = reco_p4.pt
-    reco_arr["eta"] = reco_p4.eta
-    reco_arr["phi"] = reco_p4.phi
-    reco_arr["energy"] = reco_p4.energy
-
-    msk = reco_arr["type"]!=0
-    reco_arr = awkward.Record({k: reco_arr[k][msk] for k in reco_arr.keys()})
-    return reco_arr
-
-def get_particle_feature_matrix(pfelem_to_particle, feature_dict, features):
-    feats = []
-    for feat in features:
-        feat_arr = feature_dict[feat]
-        if len(feat_arr)==0:
-            feat_arr_reordered = feat_arr
-        else:
-            feat_arr_reordered = awkward.to_numpy(feat_arr[pfelem_to_particle])
-            feat_arr_reordered[pfelem_to_particle==-1] = 0.0
-        feats.append(feat_arr_reordered)
-    feats = np.array(feats)
-    return feats.T
-
-def get_feature_matrix(feature_dict, features):
-    feats = []
-    for feat in features:
-        feat_arr = awkward.to_numpy(feature_dict[feat])
-        feats.append(feat_arr)
-    feats = np.array(feats)
-    return feats.T
 
 def process_one_file(fn, ofn):
 
-    #output exists, do not recreate
+    # output exists, do not recreate
     if os.path.isfile(ofn):
         return
     print(fn)
 
     fi = uproot.open(fn)
-    
+
     arrs = fi["events"]
-    
-    collectionIDs = {k: v for k, v in
-        zip(fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
-        fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0])}
-    collectionIDs_reverse = {v: k for k, v in collectionIDs.items()}
-    
-    prop_data = arrs.arrays([mc_coll, track_coll, "SiTracks_1", "PandoraClusters", "PandoraClusters#1", "PandoraClusters#0", "MergedRecoParticles"])
+
+    collectionIDs = {
+        k: v
+        for k, v in zip(
+            fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
+            fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0],
+        )
+    }
+
+    prop_data = arrs.arrays(
+        [
+            mc_coll,
+            track_coll,
+            "SiTracks_1",
+            "PandoraClusters",
+            "PandoraClusters#1",
+            "PandoraClusters#0",
+            "MergedRecoParticles",
+        ]
+    )
     calohit_links = arrs.arrays(["CalohitMCTruthLink", "CalohitMCTruthLink#0", "CalohitMCTruthLink#1"])
     sitrack_links = arrs.arrays(["SiTracksMCTruthLink", "SiTracksMCTruthLink#0", "SiTracksMCTruthLink#1"])
 
-    #maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end) to the index in the track/cluster collection
+    # maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end)
+    # to the index in the track/cluster collection
     idx_rp_to_cluster = arrs["MergedRecoParticles#0/MergedRecoParticles#0.index"].array()
     idx_rp_to_track = arrs["MergedRecoParticles#1/MergedRecoParticles#1.index"].array()
 
@@ -539,107 +203,86 @@ def process_one_file(fn, ofn):
     }
 
     ret = []
-    ret_unused_pt = []
     for iev in range(arrs.num_entries):
 
-        #get the reco particles
+        # get the reco particles
         reco_arr = get_reco_properties(prop_data, iev)
         reco_type = np.abs(reco_arr["type"])
         n_rps = len(reco_type)
-        reco_features = awkward.Record({
-            "PDG": np.abs(reco_type),
-            "charge": reco_arr["charge"],
-            "pt": reco_arr["pt"],
-            "eta": reco_arr["eta"],
-            "sin_phi": np.sin(reco_arr["phi"]),
-            "cos_phi": np.cos(reco_arr["phi"]),
-            "energy": reco_arr["energy"]
-        })
-
-        #get the genparticles and the links between genparticles and tracks/clusters
+        reco_features = awkward.Record(
+            {
+                "PDG": np.abs(reco_type),
+                "charge": reco_arr["charge"],
+                "pt": reco_arr["pt"],
+                "eta": reco_arr["eta"],
+                "sin_phi": np.sin(reco_arr["phi"]),
+                "cos_phi": np.cos(reco_arr["phi"]),
+                "energy": reco_arr["energy"],
+            }
+        )
+
+        # get the genparticles and the links between genparticles and tracks/clusters
         gpdata = get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs)
 
-        #find the reconstructable genparticles and associate them to the best track/cluster
-        gp_to_obj = assign_genparticles_to_obj_and_merge(gpdata)
+        # find the reconstructable genparticles and associate them to the best track/cluster
+        gp_to_obj = assign_genparticles_to_obj(gpdata)
 
         n_tracks = len(gpdata.track_features["type"])
         n_hits = len(gpdata.hit_features["type"])
         n_gps = len(gpdata.gen_features["PDG"])
+        print("hits={} tracks={} gps={}".format(n_hits, n_tracks, n_gps))
+
+        assert len(gp_to_obj) == len(gpdata.gen_features["PDG"])
+        assert gp_to_obj.shape[1] == 2
 
-        assert(len(gp_to_obj) == len(gpdata.gen_features["PDG"]))
-        assert(gp_to_obj.shape[1] == 2)
-        
-        #for each reco particle, find the tracks and clusters associated with it
-        #construct track/cluster -> recoparticle maps
+        # for each reco particle, find the tracks and clusters associated with it
+        # construct track/cluster -> recoparticle maps
         track_to_rp, hit_to_rp = get_recoptcl_to_obj(n_rps, reco_arr, gpdata, idx_rp_to_track[iev], idx_rp_to_cluster[iev])
 
-        #get the track/cluster -> genparticle map
+        # get the track/cluster -> genparticle map
         track_to_gp = {itrk: igp for igp, itrk in enumerate(gp_to_obj[:, 0]) if itrk != -1}
         hit_to_gp = {ihit: igp for igp, ihit in enumerate(gp_to_obj[:, 1]) if ihit != -1}
 
+        # keep track if all genparticles were used
         used_gps = np.zeros(n_gps, dtype=np.int64)
+
+        # assign all track-associated genparticles to a track
         track_to_gp_all = assign_to_recoobj(n_tracks, track_to_gp, used_gps)
+
+        # assign all calohit-associated genparticles to a calohit
         hit_to_gp_all = assign_to_recoobj(n_hits, hit_to_gp, used_gps)
-        if not np.all(used_gps==1):
-            print("unmatched gen", gpdata.gen_features["energy"][used_gps==0])
-        #assert(np.all(used_gps == 1))
+        if not np.all(used_gps == 1):
+            print("unmatched gen", gpdata.gen_features["energy"][used_gps == 0])
 
         used_rps = np.zeros(n_rps, dtype=np.int64)
         track_to_rp_all = assign_to_recoobj(n_tracks, track_to_rp, used_rps)
         hit_to_rp_all = assign_to_recoobj(n_hits, hit_to_rp, used_rps)
-        if not np.all(used_rps==1):
-            print("unmatched reco", reco_features["energy"][used_rps==0])
-        #assert(np.all(used_rps == 1))
-
-        gps_track = get_particle_feature_matrix(
-            track_to_gp_all,
-            gpdata.gen_features,
-            particle_feature_order
-        )
-        gps_track[:, 0] = np.array([
-            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]
-        )
-        gps_hit = get_particle_feature_matrix(
-            hit_to_gp_all,
-            gpdata.gen_features,
-            particle_feature_order
+        if not np.all(used_rps == 1):
+            print("unmatched reco", reco_features["energy"][used_rps == 0])
+
+        gps_track = get_particle_feature_matrix(track_to_gp_all, gpdata.gen_features, particle_feature_order)
+        gps_track[:, 0] = np.array(
+            [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]
         )
-        gps_hit[:, 0] = np.array([
-            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_hit[:, 0], gps_hit[:, 1])]
+        gps_hit = get_particle_feature_matrix(hit_to_gp_all, gpdata.gen_features, particle_feature_order)
+        gps_hit[:, 0] = np.array(
+            [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_hit[:, 0], gps_hit[:, 1])]
         )
         gps_hit[:, 1] = 0
 
-        rps_track = get_particle_feature_matrix(
-            track_to_rp_all,
-            reco_features,
-            particle_feature_order
-        )
-        rps_track[:, 0] = np.array([
-            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]
-        )
-        rps_hit = get_particle_feature_matrix(
-            hit_to_rp_all,
-            reco_features,
-            particle_feature_order
+        rps_track = get_particle_feature_matrix(track_to_rp_all, reco_features, particle_feature_order)
+        rps_track[:, 0] = np.array(
+            [map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]
         )
-        rps_hit[:, 0] = np.array([
-            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_hit[:, 0], rps_hit[:, 1])]
+        rps_hit = get_particle_feature_matrix(hit_to_rp_all, reco_features, particle_feature_order)
+        rps_hit[:, 0] = np.array(
+            [map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_hit[:, 0], rps_hit[:, 1])]
         )
         rps_hit[:, 1] = 0
 
-        #all initial gen/reco particle energy must be reconstructable
-        #assert(abs(
-        #    np.sum(gps_track[:, 6]) + np.sum(gps_hit[:, 6]) - np.sum(gpdata.gen_features["energy"])
-        #    ) < 1e-2)
-
-        #assert(abs(
-        #    np.sum(rps_track[:, 6]) + np.sum(rps_hit[:, 6]) - np.sum(reco_features["energy"])
-        #    ) < 1e-2)
-
-
-        #we don't want to try to reconstruct charged particles from primary clusters, make sure the charge is 0
-        assert(np.all(gps_hit[:, 1] == 0))
-        assert(np.all(rps_hit[:, 1] == 0))
+        # we don't want to try to reconstruct charged particles from primary clusters, make sure the charge is 0
+        assert np.all(gps_hit[:, 1] == 0)
+        assert np.all(rps_hit[:, 1] == 0)
 
         X_track = get_feature_matrix(gpdata.track_features, track_feature_order)
         X_hit = get_feature_matrix(gpdata.hit_features, hit_feature_order)
@@ -663,34 +306,26 @@ def process_one_file(fn, ofn):
             "ycand_track": ycand_track,
             "ycand_hit": ycand_hit,
         }
-        if np.sum(used_gps==0)>0:
-            ret_unused_pt.append(awkward.to_numpy(gpdata.gen_features["pt"][used_gps==0]))
-        else:
-            ret_unused_pt.append(np.array([], dtype=np.float32))
         this_ev = awkward.Record(this_ev)
-
         ret.append(this_ev)
 
     ret = {k: awkward.from_iter([r[k] for r in ret]) for k in ret[0].fields}
-
-    ntot = sum([len(x) for x in ret_unused_pt])
-    if ntot>0:
-        ret["ygen_unused_pt"] = awkward.from_iter(ret_unused_pt)
-    else:
-        ret["ygen_unused_pt"] = build_dummy_array(len(ret_unused_pt), dtype=np.float32)
+    for k in ret.keys():
+        if len(awkward.flatten(ret[k])) == 0:
+            ret[k] = build_dummy_array(len(ret[k]), np.float32)
     ret = awkward.Record(ret)
-
     awkward.to_parquet(ret, ofn)
 
+
 def process_sample(samp):
-    inp = "/media/joosep/data/clic_edm4hep_2023_02_27/"
-    outp = "/media/joosep/data/mlpf_hits/clic_edm4hep_2023_02_27/"
+    inp = "/local/joosep/clic_edm4hep/"
+    outp = "/local/joosep/mlpf_hits/clic_edm4hep/"
 
-    pool = multiprocessing.Pool(15)
+    pool = multiprocessing.Pool(8)
 
     inpath_samp = inp + samp
     outpath_samp = outp + samp
-    infiles = list(glob.glob(inpath_samp + "/*.root"))[:10000]
+    infiles = list(glob.glob(inpath_samp + "/*.root"))
     if not os.path.isdir(outpath_samp):
         os.makedirs(outpath_samp)
 
@@ -700,6 +335,7 @@ def process_sample(samp):
         args.append((inf, of))
     pool.starmap(process_one_file, args)
 
+
 if __name__ == "__main__":
     if len(sys.argv) == 2:
         process_sample(sys.argv[1])
diff --git a/fcc/run_pandora_timing.sh b/fcc/run_pandora_timing.sh
new file mode 100755
index 000000000..670ce52cc
--- /dev/null
+++ b/fcc/run_pandora_timing.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+SLURM_JOB_ID=1 ./run_sim_gun_np.sh 1 pi- 100 &> gun_np_100_1.txt
+for iseed in 6 7; do
+    for nptcl in 25 50 100 200; do
+        SLURM_JOB_ID=$iseed ./run_sim_gun_np.sh $iseed pi- $nptcl &> gun_np_${nptcl}_${iseed}.txt
+    done
+done
diff --git a/fcc/run_sim.sh b/fcc/run_sim.sh
index 321c3374c..913f2866b 100755
--- a/fcc/run_sim.sh
+++ b/fcc/run_sim.sh
@@ -41,8 +41,8 @@ source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2023-01-15/x86_64-centos7-gcc1
 k4run $PFDIR/fcc/pythia.py -n $NEV --Dumper.Filename out.hepmc --Pythia8.PythiaInterface.pythiacard card.cmd
 
 ddsim --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml \
-      --outputFile out_sim_edm4hep.root \
       --steeringFile clic_steer.py \
+      --outputFile out_sim_edm4hep.root \
       --inputFiles out.hepmc \
       --numberOfEvents $NEV \
       --random.seed $NUM
diff --git a/fcc/run_sim_gun.sh b/fcc/run_sim_gun.sh
new file mode 100755
index 000000000..986a39721
--- /dev/null
+++ b/fcc/run_sim_gun.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+#SBATCH -p main
+#SBATCH --mem-per-cpu=35G
+#SBATCH --cpus-per-task=1
+#SBATCH -o logs/slurm-%x-%j-%N.out
+#SBATCH --no-requeue
+set -e
+set -x
+
+env
+df -h
+
+OUTDIR=/local/joosep/clic_edm4hep_gun/
+PFDIR=/home/joosep/particleflow
+NEV=100
+
+NUM=$1 #random seed
+SAMPLE=$2 #main card
+
+
+WORKDIR=/scratch/local/$USER/${SAMPLE}_${SLURM_JOB_ID}
+FULLOUTDIR=${OUTDIR}/${SAMPLE}
+
+mkdir -p $FULLOUTDIR
+
+mkdir -p $WORKDIR
+cd $WORKDIR
+
+#cp $PFDIR/fcc/main ./
+cp $PFDIR/fcc/pythia.py ./
+cp $PFDIR/fcc/clic_steer.py ./
+cp -R $PFDIR/fcc/PandoraSettings ./
+cp -R $PFDIR/fcc/clicRec_e4h_input.py ./
+
+#without PU
+source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2023-01-15/x86_64-centos7-gcc11.2.0-opt/csapx/setup.sh
+
+ddsim --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml \
+      --steeringFile clic_steer.py \
+      --enableGun \
+      --gun.distribution uniform \
+      --gun.particle $SAMPLE \
+      --gun.momentumMin 1*GeV \
+      --gun.momentumMax 100*GeV \
+      --outputFile out_sim_edm4hep.root \
+      --numberOfEvents $NEV \
+      --random.seed $NUM
+cp out_sim_edm4hep.root $FULLOUTDIR/sim_${SAMPLE}_${NUM}.root
+
+k4run clicRec_e4h_input.py -n $NEV --EventDataSvc.input out_sim_edm4hep.root --PodioOutput.filename out_reco_edm4hep.root
+cp out_reco_edm4hep.root $FULLOUTDIR/reco_${SAMPLE}_${NUM}.root
+cp timing_histos.root $FULLOUTDIR/timing_${SAMPLE}_${NUM}.root
+
+rm -Rf $WORKDIR
diff --git a/fcc/run_sim_gun_np.sh b/fcc/run_sim_gun_np.sh
new file mode 100755
index 000000000..f26aaf750
--- /dev/null
+++ b/fcc/run_sim_gun_np.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+#SBATCH -p main
+#SBATCH --mem-per-cpu=35G
+#SBATCH --cpus-per-task=1
+#SBATCH -o logs/slurm-%x-%j-%N.out
+#SBATCH --no-requeue
+set -e
+set -x
+
+env
+df -h
+
+PFDIR=/home/joosep/particleflow
+NEV=10
+
+NUM=$1 #random seed
+SAMPLE=$2 #main card
+NUMPART=$3 #number of particles
+
+OUTDIR=/home/joosep/clic_edm4hep_gun_np$NUMPART/
+
+WORKDIR=/scratch/$USER/${SAMPLE}_${SLURM_JOB_ID}
+FULLOUTDIR=${OUTDIR}/${SAMPLE}
+
+mkdir -p $FULLOUTDIR
+
+mkdir -p $WORKDIR
+cd $WORKDIR
+
+#cp $PFDIR/fcc/main ./
+cp $PFDIR/fcc/pythia.py ./
+cp $PFDIR/fcc/clic_steer.py ./
+cp -R $PFDIR/fcc/PandoraSettings ./
+cp -R $PFDIR/fcc/clicRec_e4h_input.py ./
+
+#without PU
+source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2023-01-15/x86_64-centos7-gcc11.2.0-opt/csapx/setup.sh
+
+ddsim --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml \
+      --steeringFile clic_steer.py \
+      --enableGun \
+      --gun.distribution uniform \
+      --gun.multiplicity $NUMPART \
+      --gun.particle $SAMPLE \
+      --gun.momentumMin 1*GeV \
+      --gun.momentumMax 100*GeV \
+      --outputFile out_sim_edm4hep.root \
+      --numberOfEvents $NEV \
+      --random.seed $NUM
+cp out_sim_edm4hep.root $FULLOUTDIR/sim_${SAMPLE}_${NUM}.root
+
+k4run clicRec_e4h_input.py -n $NEV --EventDataSvc.input out_sim_edm4hep.root --PodioOutput.filename out_reco_edm4hep.root
+cp out_reco_edm4hep.root $FULLOUTDIR/reco_${SAMPLE}_${NUM}.root
+cp timing_histos.root $FULLOUTDIR/timing_${SAMPLE}_${NUM}.root
+
+rm -Rf $WORKDIR
diff --git a/mlpf/customizations.py b/mlpf/customizations.py
index 9e9188af1..b0234ea95 100644
--- a/mlpf/customizations.py
+++ b/mlpf/customizations.py
@@ -19,10 +19,18 @@ def customize_pipeline_test(config):
     if "clic_edm_ttbar_pf" in config["datasets"]:
         config["train_test_datasets"]["physical"]["datasets"] = ["clic_edm_ttbar_pf"]
         config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]}
-        config["train_test_datasets"]["physical"]["batch_per_gpu"] = 50
+        config["train_test_datasets"]["physical"]["batch_per_gpu"] = 5
         config["validation_dataset"] = "clic_edm_ttbar_pf"
-        config["validation_batch_size"] = 50
-        config["evaluation_datasets"] = {"clic_edm_ttbar_pf": {"batch_size": 50, "num_events": -1}}
+        config["validation_batch_size"] = 5
+        config["evaluation_datasets"] = {"clic_edm_ttbar_pf": {"batch_size": 5, "num_events": -1}}
+
+    if "clic_edm_ttbar_hits_pf" in config["datasets"]:
+        config["train_test_datasets"]["physical"]["datasets"] = ["clic_edm_ttbar_hits_pf"]
+        config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]}
+        config["train_test_datasets"]["physical"]["batch_per_gpu"] = 1
+        config["validation_dataset"] = "clic_edm_ttbar_hits_pf"
+        config["validation_batch_size"] = 1
+        config["evaluation_datasets"] = {"clic_edm_ttbar_hits_pf": {"batch_size": 1, "num_events": -1}}
 
     # validate only on a small number of events
     config["validation_num_events"] = config["validation_batch_size"] * 2
diff --git a/mlpf/heptfds/clic_pf_edm4hep/qq.py b/mlpf/heptfds/clic_pf_edm4hep/qq.py
index c074df8f9..fbe2aa345 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/qq.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/qq.py
@@ -20,13 +20,14 @@
 
 
 class ClicEdmQqPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.3.1")
+    VERSION = tfds.core.Version("1.4.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
         "1.1.0": "update stats, move to 380 GeV",
         "1.2.0": "sin cos as separate features",
         "1.3.0": "Update stats to ~1M events",
         "1.3.1": "Update stats to ~2M events",
+        "1.4.0": "Fix ycand matching",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
diff --git a/mlpf/heptfds/clic_pf_edm4hep/ttbar.py b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py
index 0e0fad827..09fb0c9fb 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/ttbar.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py
@@ -20,12 +20,13 @@
 
 
 class ClicEdmTtbarPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.3.0")
+    VERSION = tfds.core.Version("1.4.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
         "1.1.0": "update stats, move to 380 GeV",
         "1.2.0": "sin/cos phi separately",
         "1.3.0": "Update stats to ~1M events",
+        "1.4.0": "Fix ycand matching",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
diff --git a/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py b/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py
index 215873e58..b5993434a 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py
@@ -20,9 +20,10 @@
 
 
 class ClicEdmTtbarPu10Pf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.3.0")
+    VERSION = tfds.core.Version("1.4.0")
     RELEASE_NOTES = {
         "1.3.0": "Update stats to ~1M events",
+        "1.4.0": "Fix ycand matching",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
diff --git a/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py b/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
index 39f625f1c..2d07aace0 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
@@ -8,7 +8,7 @@
 
 # from fcc/postprocessing.py
 X_FEATURES_TRK = [
-    "type",
+    "elemtype",
     "pt",
     "eta",
     "sin_phi",
@@ -26,7 +26,7 @@
     "time",
 ]
 X_FEATURES_CL = [
-    "type",
+    "elemtype",
     "et",
     "eta",
     "sin_phi",
@@ -64,6 +64,27 @@ def split_sample(path, test_frac=0.8):
     }
 
 
+def split_sample_several(paths, test_frac=0.8):
+    files_train_tot = []
+    files_test_tot = []
+    for path in paths:
+        files = sorted(list(path.glob("*.parquet")))
+        print("Found {} files in {}".format(files, path))
+        assert len(files) > 0
+        idx_split = int(test_frac * len(files))
+        files_train = files[:idx_split]
+        files_test = files[idx_split:]
+        assert len(files_train) > 0
+        assert len(files_test) > 0
+        files_train_tot.append(files_train)
+        files_test_tot.append(files_test)
+
+    return {
+        "train": generate_examples(files_train_tot),
+        "test": generate_examples(files_test_tot),
+    }
+
+
 def prepare_data_clic(fn, with_jet_idx=True):
     ret = ak.from_parquet(fn)
 
diff --git a/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py b/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py
index 2e16d860d..b4db98f30 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py
@@ -20,9 +20,10 @@
 
 
 class ClicEdmWwFullhadPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.3.0")
+    VERSION = tfds.core.Version("1.4.0")
     RELEASE_NOTES = {
         "1.3.0": "Update stats to ~1M events",
+        "1.4.0": "Fix ycand matching",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
diff --git a/mlpf/heptfds/clic_pf_edm4hep/zh.py b/mlpf/heptfds/clic_pf_edm4hep/zh.py
index 436e1e1cc..a97ec64ca 100644
--- a/mlpf/heptfds/clic_pf_edm4hep/zh.py
+++ b/mlpf/heptfds/clic_pf_edm4hep/zh.py
@@ -20,9 +20,10 @@
 
 
 class ClicEdmZhTautauPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.3.0")
+    VERSION = tfds.core.Version("1.4.0")
     RELEASE_NOTES = {
         "1.3.0": "First version",
+        "1.4.0": "Fix ycand matching",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py b/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py
index c1215996e..126da7cff 100644
--- a/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py
+++ b/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py
@@ -20,10 +20,12 @@
 
 
 class ClicEdmQqHitsPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("0.9.0")
+    VERSION = tfds.core.Version("1.2.0")
     RELEASE_NOTES = {
         "0.9.0": "Small stats",
         "1.0.0": "Initial release",
+        "1.1.0": "Remove track referencepoint feature",
+        "1.2.0": "Keep all interacting genparticles",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     FIXME
diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py b/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py
index 0ba4fd564..21f9be93e 100644
--- a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py
+++ b/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py
@@ -20,10 +20,12 @@
 
 
 class ClicEdmTtbarHitsPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("0.9.0")
+    VERSION = tfds.core.Version("1.2.0")
     RELEASE_NOTES = {
         "0.9.0": "Small stats",
         "1.0.0": "Initial release",
+        "1.1.0": "Remove track referencepoint feature",
+        "1.2.0": "Keep all interacting genparticles",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
     FIXME
diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py b/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py
index 3ecad14b4..93e1acc73 100644
--- a/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py
+++ b/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py
@@ -1,6 +1,7 @@
 import awkward as ak
 import numpy as np
 import tqdm
+import random
 
 # from fcc/postprocessing_hits.py
 X_FEATURES_TRK = [
@@ -16,9 +17,6 @@
     "tanLambda",
     "D0",
     "omega",
-    "referencePoint.x",
-    "referencePoint.y",
-    "referencePoint.z",
     "Z0",
     "time",
     "type",
@@ -37,13 +35,14 @@
     "subdetector",
     "type",
 ]
+X_FEAT_NUM = max(len(X_FEATURES_TRK), len(X_FEATURES_CH))
 
 Y_FEATURES = ["PDG", "charge", "pt", "eta", "sin_phi", "cos_phi", "energy"]
 labels = [0, 211, 130, 22, 11, 13]
 
 
 def split_sample(path, test_frac=0.8):
-    files = sorted(list(path.glob("*.parquet")))[:1000]
+    files = sorted(list(path.glob("*.parquet")))
     print("Found {} files in {}".format(len(files), path))
     assert len(files) > 0
     idx_split = int(test_frac * len(files))
@@ -57,8 +56,23 @@ def split_sample(path, test_frac=0.8):
     }
 
 
+def split_sample_several(paths, test_frac=0.8):
+    files = sum([list(path.glob("*.parquet")) for path in paths], [])
+    random.shuffle(files)
+    print("Found {} files".format(len(files)))
+    assert len(files) > 0
+    idx_split = int(test_frac * len(files))
+    files_train = files[:idx_split]
+    files_test = files[idx_split:]
+    assert len(files_train) > 0
+    assert len(files_test) > 0
+    return {
+        "train": generate_examples(files_train),
+        "test": generate_examples(files_test),
+    }
+
+
 def prepare_data_clic(fn):
-    print(fn)
     ret = ak.from_parquet(fn)
 
     X_track = ret["X_track"]
@@ -75,24 +89,30 @@ def prepare_data_clic(fn):
         X1 = ak.to_numpy(X_track[iev])
         X2 = ak.to_numpy(X_hit[iev])
 
-        if len(X1) == 0 or len(X2) == 0:
+        if len(X1) == 0 and len(X2) == 0:
             continue
 
         ygen_track = ak.to_numpy(ret["ygen_track"][iev])
         ygen_hit = ak.to_numpy(ret["ygen_hit"][iev])
         ycand_track = ak.to_numpy(ret["ycand_track"][iev])
         ycand_hit = ak.to_numpy(ret["ycand_hit"][iev])
-
-        if len(ygen_track) == 0 or len(ygen_hit) == 0:
+        if ygen_track.shape[0] == 0:
+            ygen_track = np.zeros((0, 7), dtype=np.float32)
+        if ycand_track.shape[0] == 0:
+            ycand_track = np.zeros((0, 7), dtype=np.float32)
+        if ygen_hit.shape[0] == 0:
+            ygen_hit = np.zeros((0, 7), dtype=np.float32)
+        if ycand_hit.shape[0] == 0:
+            ycand_hit = np.zeros((0, 7), dtype=np.float32)
+
+        if len(ygen_track) == 0 and len(ygen_hit) == 0:
             continue
-        if len(ycand_track) == 0 or len(ycand_hit) == 0:
+        if len(ycand_track) == 0 and len(ycand_hit) == 0:
             continue
 
         # pad feature dim between tracks and hits to the same size
-        if X1.shape[1] < X2.shape[1]:
-            X1 = np.pad(X1, [[0, 0], [0, X2.shape[1] - X1.shape[1]]])
-        if X2.shape[1] < X1.shape[1]:
-            X2 = np.pad(X2, [[0, 0], [0, X1.shape[1] - X2.shape[1]]])
+        X1 = np.pad(X1, [[0, 0], [0, X_FEAT_NUM - X1.shape[1]]])
+        X2 = np.pad(X2, [[0, 0], [0, X_FEAT_NUM - X2.shape[1]]])
 
         # concatenate tracks and hits in features and targets
         X = np.concatenate([X1, X2])
@@ -106,7 +126,6 @@ def prepare_data_clic(fn):
         ygen[:, 0][:] = arr[:]
         arr = np.array([labels.index(p) for p in ycand[:, 0]])
         ycand[:, 0][:] = arr[:]
-
         Xs.append(X)
         ygens.append(ygen)
         ycands.append(ycand)
@@ -115,16 +134,13 @@ def prepare_data_clic(fn):
 
 def generate_examples(files):
     for fi in tqdm.tqdm(files):
-        try:
-            Xs, ygens, ycands = prepare_data_clic(fi)
-            for iev in range(len(Xs)):
-                yield str(fi) + "_" + str(iev), {
-                    "X": Xs[iev].astype(np.float32),
-                    "ygen": ygens[iev].astype(np.float32),
-                    "ycand": ycands[iev].astype(np.float32),
-                }
-        except Exception as e:
-            print("could not process {}: {}".format(fi, e))
+        Xs, ygens, ycands = prepare_data_clic(fi)
+        for iev in range(len(Xs)):
+            yield str(fi) + "_" + str(iev), {
+                "X": Xs[iev].astype(np.float32),
+                "ygen": ygens[iev].astype(np.float32),
+                "ycand": ycands[iev].astype(np.float32),
+            }
 
 
 if __name__ == "__main__":
diff --git a/mlpf/lumi/train-gpu-1.sh b/mlpf/lumi/train-gpu-1.sh
deleted file mode 100755
index b9bae9c4b..000000000
--- a/mlpf/lumi/train-gpu-1.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=mlpf-train-cms-gen
-#SBATCH --account=project_465000301
-#SBATCH --time=24:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=16
-#SBATCH --mem=120G
-#SBATCH --gres=gpu:mi250:1
-#SBATCH --partition=eap
-#SBATCH --no-requeue
-#SBATCH -o logs/slurm-%x-%j-%N.out
-
-IMG=/users/patajoos/tf-rocm.simg
-cd ~/particleflow
-
-#TF training
-singularity exec \
-    --rocm \
-    -B /scratch/project_465000301 \
-    --env PYTHONPATH=hep_tfds \
-    --env TFDS_DATA_DIR=/scratch/project_465000301/tensorflow_datasets \
-    $IMG python3 mlpf/pipeline.py train \
-    --config parameters/cms-gen.yaml --plot-freq 1 --num-cpus 16 \
-    --batch-multiplier 10
diff --git a/mlpf/lumi/train-gpu-2.sh b/mlpf/lumi/train-gpu-2.sh
deleted file mode 100755
index 65fa4c67a..000000000
--- a/mlpf/lumi/train-gpu-2.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=mlpf-train-cms-gen
-#SBATCH --account=project_465000301
-#SBATCH --time=24:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=16
-#SBATCH --mem=120G
-#SBATCH --gres=gpu:mi250:2
-#SBATCH --partition=eap
-#SBATCH --no-requeue
-#SBATCH -o logs/slurm-%x-%j-%N.out
-
-IMG=/users/patajoos/tf-rocm.simg
-cd ~/particleflow
-
-#TF training
-singularity exec \
-    --rocm \
-    -B /scratch/project_465000301 \
-    --env PYTHONPATH=hep_tfds \
-    --env TFDS_DATA_DIR=/scratch/project_465000301/tensorflow_datasets \
-    $IMG python3 mlpf/pipeline.py train \
-    --config parameters/cms-gen.yaml --plot-freq 1 --num-cpus 16 \
-    --batch-multiplier 10
diff --git a/mlpf/lumi/train-gpu-4.sh b/mlpf/lumi/train-gpu-4.sh
deleted file mode 100755
index 6f0c08edc..000000000
--- a/mlpf/lumi/train-gpu-4.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=mlpf-train-cms-gen
-#SBATCH --account=project_465000301
-#SBATCH --time=24:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=16
-#SBATCH --mem=120G
-#SBATCH --gres=gpu:mi250:4
-#SBATCH --partition=eap
-#SBATCH --no-requeue
-#SBATCH -o logs/slurm-%x-%j-%N.out
-
-IMG=/users/patajoos/tf-rocm.simg
-cd ~/particleflow
-
-#TF training
-singularity exec \
-    --rocm \
-    -B /scratch/project_465000301 \
-    --env PYTHONPATH=hep_tfds \
-    --env TFDS_DATA_DIR=/scratch/project_465000301/tensorflow_datasets \
-    $IMG python3 mlpf/pipeline.py train \
-    --config parameters/cms-gen.yaml --plot-freq 1 --num-cpus 16 \
-    --batch-multiplier 10
diff --git a/mlpf/lumi/train-gpu-clic.sh b/mlpf/lumi/train-gpu-clic.sh
deleted file mode 100755
index 6966fd313..000000000
--- a/mlpf/lumi/train-gpu-clic.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=mlpf-train-clic
-#SBATCH --account=project_465000301
-#SBATCH --time=24:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=16
-#SBATCH --mem=120G
-#SBATCH --gres=gpu:mi250:1
-#SBATCH --partition=eap
-#SBATCH --no-requeue
-#SBATCH -o logs/slurm-%x-%j-%N.out
-
-IMG=/users/patajoos/tf-rocm.simg
-cd ~/particleflow
-
-#TF training
-singularity exec \
-    --rocm \
-    -B /scratch/project_465000301 \
-    --env PYTHONPATH=hep_tfds \
-    --env TFDS_DATA_DIR=/scratch/project_465000301/tensorflow_datasets \
-    $IMG python3 mlpf/pipeline.py train \
-    --config parameters/clic.yaml --plot-freq 1 --num-cpus 16 \
-    --batch-multiplier 10
diff --git a/mlpf/lumi/train-gpu-ln-full.sh b/mlpf/lumi/train-gpu-ln-full.sh
new file mode 100755
index 000000000..e94eb0fcc
--- /dev/null
+++ b/mlpf/lumi/train-gpu-ln-full.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#SBATCH --job-name=mlpf-train-clic-hits-ln-full
+#SBATCH --account=project_465000301
+#SBATCH --time=3-00:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=8
+#SBATCH --mem=130G
+#SBATCH --gpus-per-task=8
+#SBATCH --partition=small-g
+#SBATCH --no-requeue
+#SBATCH -o logs/slurm-%x-%j-%N.out
+
+cd /scratch/project_465000301/particleflow
+
+module load LUMI/22.08 partition/G
+
+export IMG=/scratch/project_465000301/tf-rocm.simg
+export PYTHONPATH=hep_tfds
+export TFDS_DATA_DIR=/scratch/project_465000301/tensorflow_datasets
+#export MIOPEN_DISABLE_CACHE=true
+export MIOPEN_USER_DB_PATH=/tmp/${USER}-${SLURM_JOB_ID}-miopen-cache
+export MIOPEN_CUSTOM_CACHE_DIR=${MIOPEN_USER_DB_PATH}
+export TF_CPP_MAX_VLOG_LEVEL=-1 #to suppress ROCm fusion is enabled messages
+#export MIOPEN_ENABLE_LOGGING=1
+#export MIOPEN_ENABLE_LOGGING_CMD=1
+#export MIOPEN_LOG_LEVEL=4
+
+#TF training
+singularity exec \
+    --rocm \
+    -B /scratch/project_465000301 \
+    -B /tmp \
+    --env LD_LIBRARY_PATH=/opt/rocm-5.4.0/lib/ \
+    $IMG python3 mlpf/pipeline.py train \
+    --config parameters/clic-hits-ln.yaml --plot-freq 1 --num-cpus 8 \
+    --batch-multiplier 2 \
+    --weights experiments/clic-hits-ln_20230623_090308_368360.nid007329/weights/weights-10-0.163285.hdf5
+
+#    --env MIOPEN_USER_DB_PATH=$MIPEN_USER_DB_PATH \
+#    --env MIOPEN_CUSTOM_CACHE_DIR=$MIOPEN_CUSTOM_CACHE_DIR \
+#    --env MIOPEN_ENABLE_LOGGING=1 \
+#    --env MIOPEN_ENABLE_LOGGING_CMD=1 \
+#    --env MIOPEN_LOG_LEVEL=7 \
+#    --env MIOPEN_ENABLE_LOGGING=1 \
+#    --env MIOPEN_ENABLE_LOGGING_CMD=1 \
+#    --env MIOPEN_LOG_LEVEL=5 \
diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py
index 52fcf0c07..3ab0e2e32 100644
--- a/mlpf/pipeline.py
+++ b/mlpf/pipeline.py
@@ -241,6 +241,9 @@ def train(
 
     ds_train, ds_test, ds_val = get_train_test_val_datasets(config, num_batches_multiplier, ntrain, ntest)
 
+    ds_train.tensorflow_dataset = ds_train.tensorflow_dataset.prefetch(tf.data.AUTOTUNE)
+    ds_test.tensorflow_dataset = ds_test.tensorflow_dataset.prefetch(tf.data.AUTOTUNE)
+
     epochs = config["setup"]["num_epochs"]
     total_steps = ds_train.num_steps() * epochs
     logging.info("num_train_steps: {}".format(ds_train.num_steps()))
@@ -283,9 +286,22 @@ def train(
 
         callbacks.append(optim_callbacks)
 
-        model.normalizer.adapt(ds_train.tensorflow_dataset.map(lambda X, y, w: X[:, :, 1:]))
-        print(model.normalizer.mean)
-        print(model.normalizer.variance)
+        if not os.path.isfile(config["setup"]["normalizer_cache"] + ".npz"):
+            logging.info(
+                "Could not find normalizer cache in {}, recreating".format(config["setup"]["normalizer_cache"] + ".npz")
+            )
+            model.normalizer.adapt(ds_train.tensorflow_dataset.map(lambda X, y, w: X[:, :, 1:]))
+            print(model.normalizer.mean)
+            print(model.normalizer.variance)
+            np.savez(
+                config["setup"]["normalizer_cache"],
+                mean=model.normalizer.mean.numpy(),
+                variance=model.normalizer.variance.numpy(),
+            )
+
+        cache = np.load(config["setup"]["normalizer_cache"] + ".npz")
+        model.normalizer.mean = tf.convert_to_tensor(cache["mean"])
+        model.normalizer.variance = tf.convert_to_tensor(cache["variance"])
 
         model.fit(
             ds_train.tensorflow_dataset.repeat(),
@@ -336,6 +352,17 @@ def evaluate(config, train_dir, weights, customize, nevents):
 
     model, _, initial_epoch = model_scope(config, 1, weights=weights)
 
+    print("before loading")
+    print(model.normalizer.mean)
+    print(model.normalizer.variance)
+
+    cache = np.load(config["setup"]["normalizer_cache"] + ".npz")
+    model.normalizer.mean = tf.convert_to_tensor(cache["mean"])
+    model.normalizer.variance = tf.convert_to_tensor(cache["variance"])
+    print("after loading")
+    print(model.normalizer.mean)
+    print(model.normalizer.variance)
+
     for dsname in config["evaluation_datasets"]:
         val_ds = config["evaluation_datasets"][dsname]
         ds_test = mlpf_dataset_from_config(
diff --git a/mlpf/plotting/plot_utils.py b/mlpf/plotting/plot_utils.py
index d0896e1fb..caaa28235 100644
--- a/mlpf/plotting/plot_utils.py
+++ b/mlpf/plotting/plot_utils.py
@@ -621,7 +621,7 @@ def plot_sum_energy(yvals, class_names, epoch=None, cp_dir=None, comet_experimen
         plt.xlabel("total energy / event [GeV]")
         plt.ylabel("events / bin")
         if title:
-            plt.title(title + " " + clname)
+            plt.title(title + ", " + clname)
         save_img(
             "sum_energy_cls{}.png".format(cls_id),
             epoch,
@@ -636,7 +636,7 @@ def plot_sum_energy(yvals, class_names, epoch=None, cp_dir=None, comet_experimen
         plt.xlabel("total true energy / event [GeV]")
         plt.ylabel("total PF energy / event [GeV]")
         if title:
-            plt.title(title + " " + clname)
+            plt.title(title + ", " + clname)
         save_img(
             "sum_gen_cand_energy_cls{}.png".format(cls_id),
             epoch,
@@ -651,7 +651,7 @@ def plot_sum_energy(yvals, class_names, epoch=None, cp_dir=None, comet_experimen
         plt.xlabel("total true energy / event [GeV]")
         plt.ylabel("total MLPF energy / event [GeV]")
         if title:
-            plt.title(title + " " + clname)
+            plt.title(title + ", " + clname)
         save_img(
             "sum_gen_pred_energy_cls{}.png".format(cls_id),
             epoch,
@@ -676,7 +676,7 @@ def plot_sum_energy(yvals, class_names, epoch=None, cp_dir=None, comet_experimen
         plt.xlabel("total true energy / event [GeV]")
         plt.ylabel("total reconstructed energy / event [GeV]")
         if title:
-            plt.title(title + ", PF")
+            plt.title(title + ", " + clname + ", PF")
         save_img(
             "sum_gen_cand_energy_log_cls{}.png".format(cls_id),
             epoch,
@@ -698,7 +698,7 @@ def plot_sum_energy(yvals, class_names, epoch=None, cp_dir=None, comet_experimen
         plt.xlabel("total true energy / event [GeV]")
         plt.ylabel("total reconstructed energy / event [GeV]")
         if title:
-            plt.title(title + ", MLPF")
+            plt.title(title + ", " + clname + ", MLPF")
         save_img(
             "sum_gen_pred_energy_log_cls{}.png".format(cls_id),
             epoch,
@@ -729,7 +729,7 @@ def plot_particle_multiplicity(X, yvals, class_names, epoch=None, cp_dir=None, c
         plt.xlim(0, max_val)
         plt.ylim(0, max_val)
         if title:
-            plt.title(title + " " + clname)
+            plt.title(title + ", " + clname)
 
         save_img(
             "particle_multiplicity_{}.png".format(cls_id),
@@ -957,6 +957,8 @@ def plot_jet_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
     plt.ylim(0.75, 1.25)
     plt.axhline(1.0, color="black", ls="--")
     plt.ylabel("Response median")
+    if title:
+        plt.title(title)
     plt.legend()
 
     plt.sca(axs[1])
@@ -964,6 +966,8 @@ def plot_jet_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
     plt.plot(x_vals, mlpf_vals[:, 2] - mlpf_vals[:, 0], marker="o", label="MLPF")
     plt.ylabel("Response IQR")
     plt.legend()
+    if title:
+        plt.title(title)
     plt.xlabel("gen-jet $p_T$ [GeV]")
 
     plt.tight_layout()
@@ -1051,6 +1055,8 @@ def plot_met_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
     plt.ylim(0.75, 1.25)
     plt.axhline(1.0, color="black", ls="--")
     plt.ylabel("Response median")
+    if title:
+        plt.title(title)
     plt.legend()
 
     plt.sca(axs[1])
@@ -1058,6 +1064,8 @@ def plot_met_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
     plt.plot(x_vals, mlpf_vals[:, 2] - mlpf_vals[:, 0], marker="o", label="MLPF")
     plt.ylabel("Response IQR")
     plt.legend()
+    if title:
+        plt.title(title)
     plt.xlabel("gen MET [GeV]")
 
     plt.tight_layout()
diff --git a/mlpf/tallinn/eval.sh b/mlpf/tallinn/eval.sh
index c9f98205b..c4811fa60 100755
--- a/mlpf/tallinn/eval.sh
+++ b/mlpf/tallinn/eval.sh
@@ -7,12 +7,13 @@
 IMG=/home/software/singularity/tf-2.11.0.simg
 cd ~/particleflow
 
-EXPDIR=experiments/clic-hits_20230421_213012_921390.gpu1.local
+EXPDIR=experiments/clic-hits_20230512_161010_875811.gpu1.local
+WEIGHTS=experiments/clic-hits_20230512_161010_875811.gpu1.local/weights/weights-06-0.076698.hdf5
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=hep_tfds \
     --env TFDS_DATA_DIR=/scratch/persistent/joosep/tensorflow_datasets \
     $IMG python mlpf/pipeline.py evaluate \
-    --train-dir $EXPDIR
+    --train-dir $EXPDIR --weights $WEIGHTS
 
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=hep_tfds \
diff --git a/mlpf/tallinn/mlpf-train-a100.sh b/mlpf/tallinn/mlpf-train-a100.sh
index 71aa5305c..b5128c13c 100755
--- a/mlpf/tallinn/mlpf-train-a100.sh
+++ b/mlpf/tallinn/mlpf-train-a100.sh
@@ -4,11 +4,17 @@
 #SBATCH --mem-per-gpu 40G
 #SBATCH -o logs/slurm-%x-%j-%N.out
 
-IMG=/home/software/singularity/tf-2.11.0.simg
+IMG=/home/software/singularity/tf-2.12.0-nvidia.simg
 cd ~/particleflow
 
 #TF training
 singularity exec -B /scratch/persistent --nv \
     --env PYTHONPATH=hep_tfds \
     --env TFDS_DATA_DIR=/scratch/persistent/joosep/tensorflow_datasets \
-    $IMG python mlpf/pipeline.py train -c $1 --plot-freq 1 --num-cpus 16 --batch-multiplier $2
+    $IMG python mlpf/pipeline.py train -c parameters/clic-hits-ln.yaml \
+    --plot-freq 1 --num-cpus 32 --batch-multiplier 2 \
+    --weights experiments/clic-hits-ln_20230626_123309_931116.gpu1.local/weights/weights-12-0.172574.hdf5
+
+#    --env TF_GPU_THREAD_MODE=gpu_private \
+#    --env TF_GPU_THREAD_COUNT=8 \
+#    --env TF_XLA_FLAGS="--tf_xla_auto_jit=2" \
diff --git a/mlpf/tallinn/mlpf-train.sh b/mlpf/tallinn/mlpf-train.sh
index a27aff070..4d890a83e 100755
--- a/mlpf/tallinn/mlpf-train.sh
+++ b/mlpf/tallinn/mlpf-train.sh
@@ -4,11 +4,12 @@
 #SBATCH --mem-per-gpu=8G
 #SBATCH -o logs/slurm-%x-%j-%N.out
 
-IMG=/home/software/singularity/tf-2.11.0.simg
+IMG=docker://nvcr.io/nvidia/tensorflow:23.05-tf2-py3
 cd ~/particleflow
 
 #TF training
-singularity exec -B /scratch/persistent --nv \
+singularity exec -B /scratch/persistent -B /local --nv \
     --env PYTHONPATH=hep_tfds \
-    --env TFDS_DATA_DIR=/scratch/persistent/joosep/tensorflow_datasets \
-    $IMG python mlpf/pipeline.py train -c $1 --plot-freq 1 --num-cpus 16 --batch-multiplier $2 --weights experiments/clic_20230412_155159_717751.gpu1.local/weights/weights-100-9.948204.hdf5
+    --env TFDS_DATA_DIR=/local/joosep/mlpf/tensorflow_datasets \
+    --env TF_XLA_FLAGS="--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit" \
+    $IMG python mlpf/pipeline.py train -c parameters/clic-hits.yaml --plot-freq 1 --num-cpus 16 --batch-multiplier 1 --ntrain 100000 --ntest 100000
diff --git a/mlpf/tallinn/postprocessing.sh b/mlpf/tallinn/postprocessing.sh
index f3e614b14..730f3e7b4 100755
--- a/mlpf/tallinn/postprocessing.sh
+++ b/mlpf/tallinn/postprocessing.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p main
-#SBATCH --cpus-per-task 30
+#SBATCH --cpus-per-task 16
 #SBATCH --mem-per-cpu=1G
 #SBATCH -o logs/slurm-%x-%j-%N.out
 
diff --git a/mlpf/tallinn/submit_postprocessing.sh b/mlpf/tallinn/submit_postprocessing.sh
new file mode 100755
index 000000000..c1686562e
--- /dev/null
+++ b/mlpf/tallinn/submit_postprocessing.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+#sbatch mlpf/tallinn/postprocessing.sh p8_ee_tt_ecm380
+#sbatch mlpf/tallinn/postprocessing.sh p8_ee_qq_ecm380
+#sbatch mlpf/tallinn/postprocessing.sh p8_ee_tt_ecm380_PU10
+#sbatch mlpf/tallinn/postprocessing.sh p8_ee_WW_fullhad_ecm380
+#sbatch mlpf/tallinn/postprocessing.sh p8_ee_ZH_Htautau_ecm380
+
+#sbatch mlpf/tallinn/postprocessing_hits.sh p8_ee_tt_ecm380
+#sbatch mlpf/tallinn/postprocessing_hits.sh p8_ee_qq_ecm380
+#sbatch mlpf/tallinn/postprocessing_hits.sh kaon0L
+#sbatch mlpf/tallinn/postprocessing_hits.sh pi-
+#sbatch mlpf/tallinn/postprocessing_hits.sh pi+
+sbatch mlpf/tallinn/postprocessing_hits.sh pi0
+sbatch mlpf/tallinn/postprocessing_hits.sh e-
+sbatch mlpf/tallinn/postprocessing_hits.sh e+
+#sbatch mlpf/tallinn/postprocessing_hits.sh mu-
+#sbatch mlpf/tallinn/postprocessing_hits.sh mu+
+sbatch mlpf/tallinn/postprocessing_hits.sh gamma
+sbatch mlpf/tallinn/postprocessing_hits.sh neutron
diff --git a/mlpf/tfmodel/callbacks.py b/mlpf/tfmodel/callbacks.py
index f290460b0..03b4c4b16 100644
--- a/mlpf/tfmodel/callbacks.py
+++ b/mlpf/tfmodel/callbacks.py
@@ -2,6 +2,7 @@
 import pickle
 from datetime import datetime
 from pathlib import Path
+import time
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -58,6 +59,7 @@ def _collect_learning_rate(self, logs):
     def on_epoch_end(self, epoch, logs):
         logs = logs or {}
         logs.update(self._collect_learning_rate(logs))
+        logs["time"] = time.time()
         if self.dump_history:
             history_path = Path(self.log_dir) / "history"
             history_path.mkdir(parents=True, exist_ok=True)
diff --git a/mlpf/tfmodel/model.py b/mlpf/tfmodel/model.py
index 05970b800..7fd7cbdb5 100644
--- a/mlpf/tfmodel/model.py
+++ b/mlpf/tfmodel/model.py
@@ -16,16 +16,30 @@ def debugging_train_step(self, data):
     print("data", data[0].shape, [(k, v.shape) for (k, v) in data[1].items()])
 
     with tf.GradientTape() as tape:
-        y_pred = self(x, training=True)  # Forward pass
-        loss = self.compiled_loss(y, y_pred, sample_weights, regularization_losses=self.losses)
+        y_pred = self(x, training=True)  # Forward passa
+
+        tf.print("predictions and targets")
+        for k in y_pred.keys():
+            tf.print(k, y_pred[k].shape, y[k].shape)
+
+        tf.print("loss shapes")
+        for k in self.compiled_loss._user_losses.keys():
+            tf.print(k, self.compiled_loss._user_losses[k])
+            tf.print(self.compiled_loss._user_losses[k](y[k], y_pred[k]).shape)
+
+        tf.print("sample weights")
+        for k in sample_weights.keys():
+            tf.print(k, sample_weights[k].shape)
+
+        loss = self.compiled_loss(y, y_pred, sample_weights)  # , regularization_losses=self.losses)
 
     trainable_vars = self.trainable_variables
     gradients = tape.gradient(loss, trainable_vars)
 
-    print("Max of Gradients[0]: %.4f" % tf.reduce_max(gradients[0]))
-    print("Min of Gradients[0]: %.4f" % tf.reduce_min(gradients[0]))
-    print("Mean of Gradients[0]: %.4f" % tf.reduce_mean(gradients[0]))
-    print("Loss: %.4f" % loss)
+    print("Max of Gradients[0]: {:.4f}".format(tf.reduce_max(gradients[0])))
+    print("Min of Gradients[0]: {:.4f}".format(tf.reduce_min(gradients[0])))
+    print("Mean of Gradients[0]: {:.4f}".format(tf.reduce_mean(gradients[0])))
+    print("Loss: {}".format(loss))
 
     self.optimizer.apply_gradients(zip(gradients, trainable_vars))
     self.compiled_metrics.update_state(y, y_pred)
@@ -51,12 +65,16 @@ def debugging_test_step(self, data):
     return {m.name: m.result() for m in self.metrics}
 
 
+# @tf.function(jit_compile=True)
+@tf.function
 def split_indices_to_bins_batch(cmul, nbins, bin_size, msk):
     bin_idx = tf.argmax(cmul, axis=-1) + tf.cast(tf.where(~msk, nbins - 1, 0), tf.int64)
     bins_split = tf.reshape(tf.argsort(bin_idx), (tf.shape(cmul)[0], nbins, bin_size))
     return bins_split
 
 
+# @tf.function(jit_compile=True)
+@tf.function
 def pairwise_l2_dist(A, B):
     na = tf.reduce_sum(tf.square(A), -1)
     nb = tf.reduce_sum(tf.square(B), -1)
@@ -115,17 +133,18 @@ def pairwise_sigmoid_dist(A, B):
 """
 
 
+# @tf.function(jit_compile=True)
 @tf.function
 def reverse_lsh(bins_split, points_binned_enc, small_graph_opt=False):
-    tf.debugging.assert_shapes(
-        [
-            (bins_split, ("n_batch", "n_bins", "n_points_bin")),
-            (
-                points_binned_enc,
-                ("n_batch", "n_bins", "n_points_bin", "n_features"),
-            ),
-        ]
-    )
+    # tf.debugging.assert_shapes(
+    #     [
+    #         (bins_split, ("n_batch", "n_bins", "n_points_bin")),
+    #         (
+    #             points_binned_enc,
+    #             ("n_batch", "n_bins", "n_points_bin", "n_features"),
+    #         ),
+    #     ]
+    # )
 
     shp = tf.shape(points_binned_enc)
     n_bins = shp[1]
@@ -158,11 +177,11 @@ def single_bin():
     else:
         ret = multiple_bins()
 
-    tf.debugging.assert_shapes(
-        [
-            (ret, ("n_batch", "n_elems", "n_features")),
-        ]
-    )
+    # tf.debugging.assert_shapes(
+    #     [
+    #         (ret, ("n_batch", "n_elems", "n_features")),
+    #     ]
+    # )
     return ret
 
 
@@ -175,7 +194,6 @@ def __init__(self, num_input_classes):
         X: [Nbatch, Nelem, Nfeat] array of all the input detector element feature data
     """
 
-    @tf.function
     def call(self, X):
 
         # X[:, :, 0] - categorical index of the element type
@@ -198,7 +216,6 @@ def __init__(self, num_input_classes):
         X: [Nbatch, Nelem, Nfeat] array of all the input detector element feature data
     """
 
-    @tf.function
     def call(self, X):
 
         # X[:, :, 0] - categorical index of the element type
@@ -290,6 +307,7 @@ def __init__(self, *args, **kwargs):
         self.activation = getattr(tf.keras.activations, kwargs.pop("activation"))
         self.output_dim = kwargs.pop("output_dim")
         self.normalize_degrees = kwargs.pop("normalize_degrees", True)
+        self.initializer = kwargs.pop("initializer", "random_normal")
 
         super(GHConvDense, self).__init__(*args, **kwargs)
 
@@ -299,28 +317,28 @@ def build(self, input_shape):
         self.W_t = self.add_weight(
             shape=(self.hidden_dim, self.output_dim),
             name="w_t",
-            initializer="random_normal",
+            initializer=self.initializer,
             trainable=True,
             regularizer=tf.keras.regularizers.L1(regularizer_weight),
         )
         self.b_t = self.add_weight(
             shape=(self.output_dim,),
             name="b_t",
-            initializer="random_normal",
+            initializer=self.initializer,
             trainable=True,
             regularizer=tf.keras.regularizers.L1(regularizer_weight),
         )
         self.W_h = self.add_weight(
             shape=(self.hidden_dim, self.output_dim),
             name="w_h",
-            initializer="random_normal",
+            initializer=self.initializer,
             trainable=True,
             regularizer=tf.keras.regularizers.L1(regularizer_weight),
         )
         self.theta = self.add_weight(
             shape=(self.hidden_dim, self.output_dim),
             name="theta",
-            initializer="random_normal",
+            initializer=self.initializer,
             trainable=True,
             regularizer=tf.keras.regularizers.L1(regularizer_weight),
         )
@@ -332,7 +350,7 @@ def call(self, inputs):
         # tf.print("GHConvDense.call:msk", msk.shape)
 
         # remove last dim from distance/adjacency matrix
-        tf.debugging.assert_equal(tf.shape(adj)[-1], 1)
+        # tf.debugging.assert_equal(tf.shape(adj)[-1], 1)
         adj = tf.squeeze(adj, axis=-1)
 
         # compute the normalization of the adjacency matrix
@@ -352,20 +370,20 @@ def call(self, inputs):
         gate = tf.nn.sigmoid(tf.linalg.matmul(x, self.W_t) + self.b_t)
 
         out = gate * f_hom + (1.0 - gate) * f_het
-        tf.debugging.assert_shapes(
-            [
-                (x, ("n_batch", "n_bins", "n_points_bin", "num_features")),
-                (
-                    adj,
-                    ("n_batch", "n_bins", "n_points_bin", "n_points_bin"),
-                ),
-                (msk, ("n_batch", "n_bins", "n_points_bin", 1)),
-                (
-                    out,
-                    ("n_batch", "n_bins", "n_points_bin", self.output_dim),
-                ),
-            ]
-        )
+        # tf.debugging.assert_shapes(
+        #     [
+        #         (x, ("n_batch", "n_bins", "n_points_bin", "num_features")),
+        #         (
+        #             adj,
+        #             ("n_batch", "n_bins", "n_points_bin", "n_points_bin"),
+        #         ),
+        #         (msk, ("n_batch", "n_bins", "n_points_bin", 1)),
+        #         (
+        #             out,
+        #             ("n_batch", "n_bins", "n_points_bin", self.output_dim),
+        #         ),
+        #     ]
+        # )
         # tf.print("GHConvDense.call:out", out.shape)
         return self.activation(out) * msk
 
@@ -558,6 +576,7 @@ def __init__(
         self.bin_size = bin_size
         self.kernel = kernel
         self.small_graph_opt = small_graph_opt
+        self.initializer = kwargs.pop("initializer", "random_normal")
 
         super(MessageBuildingLayerLSH, self).__init__(**kwargs)
 
@@ -567,7 +586,7 @@ def build(self, input_shape):
         # generate the LSH codebook for random rotations (num_features, max_num_bins/2)
         self.codebook_random_rotations = self.add_weight(
             shape=(self.distance_dim, self.max_num_bins // 2),
-            initializer="random_normal",
+            initializer=self.initializer,
             trainable=False,
             name="lsh_projections",
         )
@@ -580,13 +599,13 @@ def build(self, input_shape):
     def call(self, x_msg, x_node, msk, training=False):
         msk_f = tf.expand_dims(tf.cast(msk, x_msg.dtype), -1)
 
-        tf.debugging.assert_shapes(
-            [
-                (x_msg, ("n_batch", "n_points", "n_msg_features")),
-                (x_node, ("n_batch", "n_points", "n_node_features")),
-                (msk_f, ("n_batch", "n_points", 1)),
-            ]
-        )
+        # tf.debugging.assert_shapes(
+        #     [
+        #         (x_msg, ("n_batch", "n_points", "n_msg_features")),
+        #         (x_node, ("n_batch", "n_points", "n_node_features")),
+        #         (msk_f, ("n_batch", "n_points", 1)),
+        #     ]
+        # )
 
         shp = tf.shape(x_msg)
         n_points = shp[1]
@@ -605,16 +624,16 @@ def dobin():
             # n_points must be divisible by bin_size exactly due to the use of reshape
             n_bins = tf.math.floordiv(n_points, self.bin_size)
 
-            tf.debugging.assert_greater(
-                n_bins,
-                0,
-                "number of points (dim 1) must be greater than bin_size={}".format(self.bin_size),
-            )
-            tf.debugging.assert_equal(
-                tf.math.floormod(n_points, self.bin_size),
-                0,
-                "number of points (dim 1) must be an integer multiple of bin_size={}".format(self.bin_size),
-            )
+            # tf.debugging.assert_greater(
+            #     n_bins,
+            #     0,
+            #     "number of points (dim 1) must be greater than bin_size={}".format(self.bin_size),
+            # )
+            # tf.debugging.assert_equal(
+            #     tf.math.floormod(n_points, self.bin_size),
+            #     0,
+            #     "number of points (dim 1) must be an integer multiple of bin_size={}".format(self.bin_size),
+            # )
             mul = tf.linalg.matmul(
                 x_msg,
                 self.codebook_random_rotations[:, : tf.math.maximum(1, n_bins // 2)],
@@ -674,39 +693,39 @@ def nobin():
         msk_col = tf.cast(tf.reshape(msk_f_binned_squeeze, rshp_col), dm.dtype)
         dm = tf.math.multiply(dm, msk_row)
         dm = tf.math.multiply(dm, msk_col)
-        tf.debugging.assert_shapes(
-            [
-                (
-                    x_msg_binned,
-                    (
-                        "n_batch",
-                        "n_bins",
-                        "n_points_bin",
-                        "n_msg_features",
-                    ),
-                ),
-                (
-                    x_features_binned,
-                    (
-                        "n_batch",
-                        "n_bins",
-                        "n_points_bin",
-                        "n_node_features",
-                    ),
-                ),
-                (msk_f_binned, ("n_batch", "n_bins", "n_points_bin", 1)),
-                (
-                    dm,
-                    (
-                        "n_batch",
-                        "n_bins",
-                        "n_points_bin",
-                        "n_points_bin",
-                        1,
-                    ),
-                ),
-            ]
-        )
+        # tf.debugging.assert_shapes(
+        #     [
+        #         (
+        #             x_msg_binned,
+        #             (
+        #                 "n_batch",
+        #                 "n_bins",
+        #                 "n_points_bin",
+        #                 "n_msg_features",
+        #             ),
+        #         ),
+        #         (
+        #             x_features_binned,
+        #             (
+        #                 "n_batch",
+        #                 "n_bins",
+        #                 "n_points_bin",
+        #                 "n_node_features",
+        #             ),
+        #         ),
+        #         (msk_f_binned, ("n_batch", "n_bins", "n_points_bin", 1)),
+        #         (
+        #             dm,
+        #             (
+        #                 "n_batch",
+        #                 "n_bins",
+        #                 "n_points_bin",
+        #                 "n_points_bin",
+        #                 1,
+        #             ),
+        #         ),
+        #     ]
+        # )
 
         return bins_split, x_features_binned, dm, msk_f_binned
 
@@ -1057,44 +1076,44 @@ def call(self, x, msk, training=False):
         # tf.print("CombinedGraphLayer.call:dm", dm.shape)
         # tf.print("CombinedGraphLayer.call:msk_f", msk_f.shape)
 
-        tf.debugging.assert_shapes(
-            [
-                (bins_split, ("n_batch", "n_bins", "n_points_bin")),
-                (
-                    x,
-                    (
-                        "n_batch",
-                        "n_bins",
-                        "n_points_bin",
-                        "n_node_features",
-                    ),
-                ),
-                (
-                    dm,
-                    (
-                        "n_batch",
-                        "n_bins",
-                        "n_points_bin",
-                        "n_points_bin",
-                        1,
-                    ),
-                ),
-                (msk_f, ("n_batch", "n_bins", "n_points_bin", 1)),
-            ]
-        )
+        # tf.debugging.assert_shapes(
+        #     [
+        #         (bins_split, ("n_batch", "n_bins", "n_points_bin")),
+        #         (
+        #             x,
+        #             (
+        #                 "n_batch",
+        #                 "n_bins",
+        #                 "n_points_bin",
+        #                 "n_node_features",
+        #             ),
+        #         ),
+        #         (
+        #             dm,
+        #             (
+        #                 "n_batch",
+        #                 "n_bins",
+        #                 "n_points_bin",
+        #                 "n_points_bin",
+        #                 1,
+        #             ),
+        #         ),
+        #         (msk_f, ("n_batch", "n_bins", "n_points_bin", 1)),
+        #     ]
+        # )
 
         # run the node update with message passing
         for msg in self.message_passing_layers:
             x_out = msg((x, dm, msk_f))
-            tf.debugging.assert_shapes(
-                [
-                    (x, ("n_batch", "n_bins", "n_points_bin", "feat_in")),
-                    (
-                        x_out,
-                        ("n_batch", "n_bins", "n_points_bin", "feat_out"),
-                    ),
-                ]
-            )
+            # tf.debugging.assert_shapes(
+            #     [
+            #         (x, ("n_batch", "n_bins", "n_points_bin", "feat_in")),
+            #         (
+            #             x_out,
+            #             ("n_batch", "n_bins", "n_points_bin", "feat_out"),
+            #         ),
+            #     ]
+            # )
             x = x_out
             if self.dropout_layer:
                 x = self.dropout_layer(x, training=training)
@@ -1194,7 +1213,10 @@ def __init__(
     def call(self, inputs, training=False):
         Xorig = inputs
 
+        # tf.print(tf.shape(Xorig))
+
         # normalize all features except the PFElement type (feature 0)
+        # X = Xorig
         X = tf.concat([Xorig[:, :, 0:1], tf.cast(self.normalizer(Xorig[:, :, 1:]), dtype=Xorig.dtype)], axis=-1)
 
         X = tf.where(tf.math.is_inf(X), tf.zeros_like(X), X)
@@ -1418,12 +1440,12 @@ def __init__(
         event_set_output=False,
         met_output=False,
         cls_output_as_logits=False,
-        num_layers_encoder=2,
-        num_layers_decoder_reg=2,
-        num_layers_decoder_cls=2,
+        num_layers_encoder=4,
+        num_layers_decoder_reg=4,
+        num_layers_decoder_cls=4,
         hidden_dim=256,
         num_heads=8,
-        num_random_features=128,
+        num_random_features=256,
     ):
         super(PFNetTransformer, self).__init__()
 
@@ -1492,6 +1514,7 @@ def __init__(
 
     def call(self, inputs, training=False):
         Xorig = inputs
+        # X = Xorig
         X = tf.concat([Xorig[:, :, 0:1], self.normalizer(Xorig[:, :, 1:])], axis=-1)
 
         # tf.print("\nX.shape=", tf.shape(X), "\n")
@@ -1513,6 +1536,7 @@ def call(self, inputs, training=False):
         for enc in self.encoders:
             X_enc = enc([X_enc, X_enc, msk], training=training) * msk_input
 
+        # initialize the classification and regression latent state with identity
         X_cls = tf.identity(X_enc)
         X_reg = tf.identity(X_enc)
 
diff --git a/mlpf/tfmodel/model_setup.py b/mlpf/tfmodel/model_setup.py
index 0a4c5c2a3..95ddbc437 100644
--- a/mlpf/tfmodel/model_setup.py
+++ b/mlpf/tfmodel/model_setup.py
@@ -11,11 +11,11 @@
 import pickle
 from pathlib import Path
 
+import time
 import awkward
 import fastjet
 import numpy as np
 import tensorflow as tf
-import tensorflow_addons as tfa
 import vector
 from plotting.plot_utils import (
     compute_distances,
@@ -223,15 +223,16 @@ def get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, h
         is_hpo_run=is_hpo_run,
     )
 
-    callbacks += [cb]
+    if config.get("do_validation_callback", True):
+        callbacks += [cb]
+
     tb = CustomTensorBoard(
         log_dir=outdir + "/logs",
         histogram_freq=config["callbacks"]["tensorboard"]["hist_freq"],
         write_graph=False,
         write_images=False,
         update_freq="batch",
-        # profile_batch=(10,200),
-        profile_batch=0,
+        # profile_batch=(50,100),
         dump_history=config["callbacks"]["tensorboard"]["dump_history"],
     )
     # Change the class name of CustomTensorBoard TensorBoard to make keras_tuner recognise it
@@ -470,6 +471,27 @@ def model_output(ret):
 
     full_model = tf.function(lambda x: model_output(model(x, training=False)))
 
+    niter = 10
+    nfeat = config["dataset"]["num_input_features"]
+
+    if "combined_graph_layer" in config["parameters"]:
+        bin_size = config["parameters"]["combined_graph_layer"]["bin_size"]
+        elem_range = list(range(bin_size, 5 * bin_size, bin_size))
+    else:
+        elem_range = range(100, 1000, 200)
+
+    for ibatch in [1, 2, 4]:
+        for nptcl in elem_range:
+            X = np.random.rand(ibatch, nptcl, nfeat)
+            full_model(X)
+
+            t0 = time.time()
+            for i in range(niter):
+                full_model(X)
+            t1 = time.time()
+
+            print(ibatch, nptcl, (t1 - t0) / niter)
+
     # we need to use opset 12 for the version of ONNXRuntime in CMSSW
     # the warnings "RuntimeError: Opset (12) must be >= 13 for operator 'batch_dot'." do not seem to be critical
     model_proto, _ = tf2onnx.convert.from_function(
@@ -523,7 +545,10 @@ def configure_model_weights(model, trainable_layers):
 
 def make_focal_loss(config):
     def loss(x, y):
-        return tfa.losses.sigmoid_focal_crossentropy(
+
+        from .tfa import sigmoid_focal_crossentropy
+
+        return sigmoid_focal_crossentropy(
             x,
             y,
             alpha=float(config["setup"].get("focal_loss_alpha", 0.25)),
diff --git a/mlpf/tfmodel/tfa.py b/mlpf/tfmodel/tfa.py
new file mode 100644
index 000000000..7f45399eb
--- /dev/null
+++ b/mlpf/tfmodel/tfa.py
@@ -0,0 +1,168 @@
+import tensorflow as tf
+import tensorflow.keras.backend as K
+
+
+def is_tensor_or_variable(x):
+    return tf.is_tensor(x) or isinstance(x, tf.Variable)
+
+
+class LossFunctionWrapper(tf.keras.losses.Loss):
+    """Wraps a loss function in the `Loss` class."""
+
+    def __init__(self, fn, reduction=tf.keras.losses.Reduction.AUTO, name=None, **kwargs):
+        """Initializes `LossFunctionWrapper` class.
+
+        Args:
+          fn: The loss function to wrap, with signature `fn(y_true, y_pred,
+            **kwargs)`.
+          reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
+            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
+            option will be determined by the usage context. For almost all cases
+            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
+            `tf.distribute.Strategy`, outside of built-in training loops such as
+            `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
+            will raise an error. Please see this custom training [tutorial](
+              https://www.tensorflow.org/tutorials/distribute/custom_training)
+            for more details.
+          name: (Optional) name for the loss.
+          **kwargs: The keyword arguments that are passed on to `fn`.
+        """
+        super().__init__(reduction=reduction, name=name)
+        self.fn = fn
+        self._fn_kwargs = kwargs
+
+    def call(self, y_true, y_pred):
+        """Invokes the `LossFunctionWrapper` instance.
+
+        Args:
+          y_true: Ground truth values.
+          y_pred: The predicted values.
+
+        Returns:
+          Loss values per sample.
+        """
+        return self.fn(y_true, y_pred, **self._fn_kwargs)
+
+    def get_config(self):
+        config = {}
+        for k, v in iter(self._fn_kwargs.items()):
+            config[k] = tf.keras.backend.eval(v) if is_tensor_or_variable(v) else v
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+
+class SigmoidFocalCrossEntropy(LossFunctionWrapper):
+    """Implements the focal loss function.
+
+    Focal loss was first introduced in the RetinaNet paper
+    (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
+    classification when you have highly imbalanced classes. It down-weights
+    well-classified examples and focuses on hard examples. The loss value is
+    much higher for a sample which is misclassified by the classifier as compared
+    to the loss value corresponding to a well-classified example. One of the
+    best use-cases of focal loss is its usage in object detection where the
+    imbalance between the background class and other classes is extremely high.
+
+    Usage:
+
+    >>> fl = tfa.losses.SigmoidFocalCrossEntropy()
+    >>> loss = fl(
+    ...     y_true = [[1.0], [1.0], [0.0]],y_pred = [[0.97], [0.91], [0.03]])
+    >>> loss
+    <tf.Tensor: shape=(3,), dtype=float32, numpy=array([6.8532745e-06, 1.9097870e-04, 2.0559824e-05],
+    dtype=float32)>
+
+    Usage with `tf.keras` API:
+
+    >>> model = tf.keras.Model()
+    >>> model.compile('sgd', loss=tfa.losses.SigmoidFocalCrossEntropy())
+
+    Args:
+      alpha: balancing factor, default value is 0.25.
+      gamma: modulating factor, default value is 2.0.
+
+    Returns:
+      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+          shape as `y_true`; otherwise, it is scalar.
+
+    Raises:
+        ValueError: If the shape of `sample_weight` is invalid or value of
+          `gamma` is less than zero.
+    """
+
+    def __init__(
+        self,
+        from_logits: bool = False,
+        alpha=0.25,
+        gamma=2.0,
+        reduction: str = tf.keras.losses.Reduction.NONE,
+        name: str = "sigmoid_focal_crossentropy",
+    ):
+        super().__init__(
+            sigmoid_focal_crossentropy,
+            name=name,
+            reduction=reduction,
+            from_logits=from_logits,
+            alpha=alpha,
+            gamma=gamma,
+        )
+
+
+@tf.function
+def sigmoid_focal_crossentropy(
+    y_true,
+    y_pred,
+    alpha=0.25,
+    gamma=2.0,
+    from_logits: bool = False,
+) -> tf.Tensor:
+    """Implements the focal loss function.
+
+    Focal loss was first introduced in the RetinaNet paper
+    (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
+    classification when you have highly imbalanced classes. It down-weights
+    well-classified examples and focuses on hard examples. The loss value is
+    much higher for a sample which is misclassified by the classifier as compared
+    to the loss value corresponding to a well-classified example. One of the
+    best use-cases of focal loss is its usage in object detection where the
+    imbalance between the background class and other classes is extremely high.
+
+    Args:
+        y_true: true targets tensor.
+        y_pred: predictions tensor.
+        alpha: balancing factor.
+        gamma: modulating factor.
+
+    Returns:
+        Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the
+        same shape as `y_true`; otherwise, it is scalar.
+    """
+    if gamma and gamma < 0:
+        raise ValueError("Value of gamma should be greater than or equal to zero.")
+
+    y_pred = tf.convert_to_tensor(y_pred)
+    y_true = tf.cast(y_true, dtype=y_pred.dtype)
+
+    # Get the cross_entropy for each entry
+    ce = K.binary_crossentropy(y_true, y_pred, from_logits=from_logits)
+
+    # If logits are provided then convert the predictions into probabilities
+    if from_logits:
+        pred_prob = tf.sigmoid(y_pred)
+    else:
+        pred_prob = y_pred
+
+    p_t = (y_true * pred_prob) + ((1 - y_true) * (1 - pred_prob))
+    alpha_factor = 1.0
+    modulating_factor = 1.0
+
+    if alpha:
+        alpha = tf.cast(alpha, dtype=y_true.dtype)
+        alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
+
+    if gamma:
+        gamma = tf.cast(gamma, dtype=y_true.dtype)
+        modulating_factor = tf.pow((1.0 - p_t), gamma)
+
+    # compute the final loss and return
+    return tf.reduce_sum(alpha_factor * modulating_factor * ce, axis=-1)
diff --git a/mlpf/tfmodel/utils.py b/mlpf/tfmodel/utils.py
index 602d2c963..aa382a7e1 100644
--- a/mlpf/tfmodel/utils.py
+++ b/mlpf/tfmodel/utils.py
@@ -15,7 +15,6 @@
 
 
 import tensorflow as tf
-import tensorflow_addons as tfa
 import yaml
 from tensorflow.keras import mixed_precision
 
@@ -196,13 +195,10 @@ def get_strategy(num_cpus=None):
         tf.config.threading.set_inter_op_parallelism_threads(num_cpus)
         tf.config.threading.set_intra_op_parallelism_threads(num_cpus)
 
-    device = "cpu"
     if "CUDA_VISIBLE_DEVICES" in os.environ:
         num_gpus, gpus = get_num_gpus("CUDA_VISIBLE_DEVICES")
-        device = "cuda"
     elif "ROCR_VISIBLE_DEVICES" in os.environ:
         num_gpus, gpus = get_num_gpus("ROCR_VISIBLE_DEVICES")
-        device = "roc"
     else:
         logging.warning(
             "CUDA/ROC variable is empty. \
@@ -213,13 +209,7 @@ def get_strategy(num_cpus=None):
     if num_gpus > 1:
         # multiple GPUs selected
         logging.info("Attempting to use multiple GPUs with tf.distribute.MirroredStrategy()...")
-
-        # For ROCM devices, I was getting errors from Adam/NcclAllReduce on multiple GPUs
-        cross_device_ops = tf.distribute.NcclAllReduce()
-        if device == "roc":
-            cross_device_ops = tf.distribute.HierarchicalCopyAllReduce()
-
-        strategy = tf.distribute.MirroredStrategy(cross_device_ops=cross_device_ops)
+        strategy = tf.distribute.MirroredStrategy()
     elif num_gpus == 1:
         # single GPU
         logging.info("Using a single GPU with tf.distribute.OneDeviceStrategy()")
@@ -292,13 +282,6 @@ def get_optimizer(config, lr_schedule=None):
         cfg_adam = config["optimizer"]["adam"]
         opt = tf.keras.optimizers.legacy.Adam(learning_rate=lr, amsgrad=cfg_adam["amsgrad"])
         return opt
-    elif config["setup"]["optimizer"] == "adamw":
-        cfg_adamw = config["optimizer"]["adamw"]
-        return tfa.optimizers.AdamW(
-            learning_rate=lr,
-            weight_decay=cfg_adamw["weight_decay"],
-            amsgrad=cfg_adamw["amsgrad"],
-        )
     elif config["setup"]["optimizer"] == "sgd":
         cfg_sgd = config["optimizer"]["sgd"]
         return tf.keras.optimizers.legacy.SGD(
@@ -389,9 +372,22 @@ def load_and_interleave(
 
     # use dynamic batching depending on the sequence length
     if config["batching"]["bucket_by_sequence_length"]:
-        bucket_batch_sizes = [[float(v) for v in x.split(",")] for x in config["batching"]["bucket_batch_sizes"]]
+        if config["batching"]["bucket_batch_sizes"] == "auto":
+            if "combined_graph_layer" in config["parameters"]:
+                bin_size = config["parameters"]["combined_graph_layer"]["bin_size"]
+            else:
+                bin_size = 256
+
+            # generate (max_elems, batch_size) pairs
+            # scale from bin_size to max_elems in steps of bin_size
+            max_elems = 75 * bin_size
+            max_n = 75
+            reduction_factor = 125
+            bucket_batch_sizes = [(bin_size * (n + 1) + 1, (max_elems) / (n + 1) // reduction_factor) for n in range(max_n)]
+        else:
+            bucket_batch_sizes = [[float(v) for v in x.split(",")] for x in config["batching"]["bucket_batch_sizes"]]
 
-        assert bucket_batch_sizes[-1][0] == float("inf")
+        # assert bucket_batch_sizes[-1][0] == float("inf")
 
         bucket_boundaries = [int(x[0]) for x in bucket_batch_sizes[:-1]]
         bucket_batch_sizes = [
@@ -408,6 +404,7 @@ def load_and_interleave(
             bucket_boundaries=bucket_boundaries,
             # for multi-GPU, we need to multiply the batch size by the number of GPUs
             bucket_batch_sizes=bucket_batch_sizes,
+            pad_to_bucket_boundary=True,
             drop_remainder=True,
         )
     # use fixed-size batching
@@ -479,27 +476,16 @@ def set_config_loss(config, trainable):
     return config
 
 
-def get_class_loss(config):
-    if config["setup"]["classification_loss_type"] == "categorical_cross_entropy":
-        cls_loss = tf.keras.losses.CategoricalCrossentropy(
-            from_logits=False,
-            label_smoothing=config["setup"].get("classification_label_smoothing", 0.0),
-        )
-    elif config["setup"]["classification_loss_type"] == "sigmoid_focal_crossentropy":
-        cls_loss = tfa.losses.sigmoid_focal_crossentropy
-    else:
-        raise KeyError("Unknown classification loss type: {}".format(config["setup"]["classification_loss_type"]))
-    return cls_loss
-
-
 def get_loss_from_params(input_dict):
     input_dict = input_dict.copy()
     loss_type = input_dict.pop("type")
-    if loss_type == "PinballLoss":
-        loss_cls = getattr(tfa.losses, loss_type)
+    if loss_type == "SigmoidFocalCrossEntropy":
+        from .tfa import SigmoidFocalCrossEntropy
+
+        loss_cls = SigmoidFocalCrossEntropy
     else:
         loss_cls = getattr(tf.keras.losses, loss_type)
-    return loss_cls(**input_dict)
+    return loss_cls(**input_dict, reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)
 
 
 # batched version of https://github.com/VinAIResearch/DSW/blob/master/gsw.py#L19
@@ -677,7 +663,7 @@ def gen_jet_logcosh_loss(y_true, y_pred):
 
 
 def get_loss_dict(config):
-    cls_loss = get_class_loss(config)
+    cls_loss = get_loss_from_params(config["loss"].get("cls_loss"))
 
     default_loss = {"type": "MeanSquaredError"}
     loss_dict = {
@@ -759,6 +745,11 @@ def model_scope(config, total_steps, weights=None, horovod_enabled=False):
         policy = mixed_precision.Policy("mixed_float16")
         mixed_precision.set_global_policy(policy)
         opt = mixed_precision.LossScaleOptimizer(opt)
+    elif config["setup"]["dtype"] == "bfloat16":
+        model_dtype = tf.dtypes.bfloat16
+        policy = mixed_precision.Policy("mixed_bfloat16")
+        mixed_precision.set_global_policy(policy)
+        opt = mixed_precision.LossScaleOptimizer(opt)
     else:
         model_dtype = tf.dtypes.float32
 
diff --git a/notebooks/clic-visualize.ipynb b/notebooks/clic-visualize.ipynb
index c734655f5..2be2ca68c 100644
--- a/notebooks/clic-visualize.ipynb
+++ b/notebooks/clic-visualize.ipynb
@@ -225,7 +225,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -239,7 +239,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/clic.ipynb b/notebooks/clic.ipynb
index 59278b2c9..7d8ea0724 100644
--- a/notebooks/clic.ipynb
+++ b/notebooks/clic.ipynb
@@ -135,6 +135,24 @@
     "        \"X\": X[msk],\n",
     "        \"ygen\": ygen[msk],\n",
     "        \"ycand\": ycand[msk]\n",
+    "    }\n",
+    "\n",
+    "def load_data_hits(path, num_files):\n",
+    "    ret = []\n",
+    "    filelist = list(glob.glob(path))[:num_files]\n",
+    "    print(len(filelist))\n",
+    "\n",
+    "    X_hit = []\n",
+    "\n",
+    "    for fn in tqdm.tqdm(filelist):\n",
+    "        dd = ak.from_parquet(fn)\n",
+    "\n",
+    "        X_hit.append(dd[\"X_hit\"])\n",
+    "        \n",
+    "    X_hit = ak.concatenate(X_hit)\n",
+    "\n",
+    "    return {\n",
+    "        \"X_hit\": X_hit,\n",
     "    }"
    ]
   },
@@ -151,6 +169,27 @@
     "data_ww = load_data(\"/media/joosep/data/mlpf/clic_edm4hep_2023_02_27/p8_ee_WW_fullhad_ecm380/*.parquet\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8622c8fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_tt_pu10 = load_data(\"/media/joosep/data/mlpf/clic_edm4hep_2023_03_03/p8_ee_tt_ecm380_PU10/*.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af6ce420",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_tt_hits = load_data_hits(\"/media/joosep/data/mlpf_hits/clic_edm4hep/p8_ee_tt_ecm380/*.parquet\", 100)\n",
+    "data_qq_hits = load_data_hits(\"/media/joosep/data/mlpf_hits/clic_edm4hep/p8_ee_qq_ecm380/*.parquet\", 100)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "1a5ecc39",
@@ -168,21 +207,26 @@
    },
    "outputs": [],
    "source": [
-    "b = np.linspace(0, 100, 51)\n",
+    "b = np.linspace(0, 200, 101)\n",
     "\n",
     "h1 = to_bh(ak.num(data_tt[\"X_track\"]), b)\n",
     "h2 = to_bh(ak.num(data_qcd[\"X_track\"]), b)\n",
     "h3 = to_bh(ak.num(data_zh[\"X_track\"]), b)\n",
     "h4 = to_bh(ak.num(data_ww[\"X_track\"]), b)\n",
+    "h5 = to_bh(ak.num(data_tt_pu10[\"X_track\"]), b)\n",
+    "\n",
+    "fig = plt.figure()\n",
+    "ax = plt.axes()\n",
     "\n",
     "mplhep.histplot(h1, histtype=\"step\", lw=2, label=label_tt)\n",
     "mplhep.histplot(h2, histtype=\"step\", lw=2, label=label_qq)\n",
     "mplhep.histplot(h3, histtype=\"step\", lw=2, label=label_zh)\n",
     "mplhep.histplot(h4, histtype=\"step\", lw=2, label=label_ww)\n",
+    "mplhep.histplot(h5*10, histtype=\"step\", lw=2, label=label_tt + \" PU10\")\n",
     "plt.xlabel(\"Number of tracks / event\")\n",
     "plt.ylabel(\"Number of events\")\n",
     "plt.legend()\n",
-    "plt.ylim(0, 15*num_files)\n",
+    "plt.ylim(0, 10*num_files)\n",
     "plt.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0,0))\n",
     "plt.savefig(\"plots/clic/num_tracks.pdf\")"
    ]
@@ -194,17 +238,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "b = np.linspace(0, 200, 51)\n",
+    "b = np.linspace(0, 500, 101)\n",
     "\n",
     "h1 = to_bh(ak.num(data_tt[\"X_cluster\"]), b)\n",
     "h2 = to_bh(ak.num(data_qcd[\"X_cluster\"]), b)\n",
     "h3 = to_bh(ak.num(data_zh[\"X_cluster\"]), b)\n",
     "h4 = to_bh(ak.num(data_ww[\"X_cluster\"]), b)\n",
+    "h5 = to_bh(ak.num(data_tt_pu10[\"X_cluster\"]), b)\n",
     "\n",
     "mplhep.histplot(h1, histtype=\"step\", lw=2, label=label_tt)\n",
     "mplhep.histplot(h2, histtype=\"step\", lw=2, label=label_qq)\n",
     "mplhep.histplot(h3, histtype=\"step\", lw=2, label=label_zh)\n",
     "mplhep.histplot(h4, histtype=\"step\", lw=2, label=label_ww)\n",
+    "mplhep.histplot(h5*10, histtype=\"step\", lw=2, label=label_tt + \" PU10\")\n",
+    "\n",
     "plt.xlabel(\"Number of clusters / event\")\n",
     "plt.ylabel(\"Number of events\")\n",
     "plt.legend()\n",
@@ -216,71 +263,87 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4ea58410",
+   "id": "6d0bad8b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "gen_pt1 = ak.flatten(data_tt[\"ygen\"][data_tt[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt2 = ak.flatten(data_qcd[\"ygen\"][data_qcd[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt3 = ak.flatten(data_zh[\"ygen\"][data_zh[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt4 = ak.flatten(data_ww[\"ygen\"][data_ww[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
-    "\n",
-    "b = np.logspace(-2,3,100)\n",
-    "h1 = to_bh(gen_pt1, b)\n",
-    "h2 = to_bh(gen_pt2, b)\n",
-    "h3 = to_bh(gen_pt3, b)\n",
-    "h4 = to_bh(gen_pt4, b)\n",
+    "b = np.linspace(0, 15000, 101)\n",
     "\n",
-    "fig = plt.figure()\n",
-    "ax = plt.axes()\n",
+    "h1 = to_bh(ak.num(data_tt_hits[\"X_hit\"]), b)\n",
+    "h2 = to_bh(ak.num(data_qq_hits[\"X_hit\"]), b)\n",
     "\n",
     "mplhep.histplot(h1, histtype=\"step\", lw=2, label=label_tt)\n",
     "mplhep.histplot(h2, histtype=\"step\", lw=2, label=label_qq)\n",
-    "mplhep.histplot(h3, histtype=\"step\", lw=2, label=label_zh)\n",
-    "mplhep.histplot(h4, histtype=\"step\", lw=2, label=label_ww)\n",
-    "plt.xscale(\"log\")\n",
-    "plt.xlabel(\"particle $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"Number of particles / bin\")\n",
-    "plt.text(0.03, 0.97, \"stable generator particles\", transform=ax.transAxes, va=\"top\", ha=\"left\")\n",
+    "\n",
+    "plt.xlabel(\"Number of calorimeter hits / event\")\n",
+    "plt.ylabel(\"Number of events\")\n",
     "plt.legend()\n",
-    "plt.ylim(0,500*num_files)\n",
+    "plt.ylim(0,500)\n",
     "plt.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0,0))\n",
-    "plt.savefig(\"plots/clic/gen_particle_pt.pdf\")"
+    "plt.savefig(\"plots/clic/num_hits.pdf\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f0a55d22",
-   "metadata": {},
+   "id": "4ea58410",
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
-    "gen_pt1 = ak.flatten(data_tt[\"ycand\"][data_tt[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt2 = ak.flatten(data_qcd[\"ycand\"][data_qcd[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt3 = ak.flatten(data_zh[\"ycand\"][data_zh[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
-    "gen_pt4 = ak.flatten(data_ww[\"ycand\"][data_ww[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "gen_pt1 = ak.flatten(data_tt[\"ygen\"][data_tt[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
+    "gen_pt2 = ak.flatten(data_qcd[\"ygen\"][data_qcd[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
+    "gen_pt3 = ak.flatten(data_zh[\"ygen\"][data_zh[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
+    "gen_pt4 = ak.flatten(data_ww[\"ygen\"][data_ww[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
+    "gen_pt5 = ak.flatten(data_tt_pu10[\"ygen\"][data_tt_pu10[\"ygen\"][:, :, 0]!=0][:, :, 2])\n",
+    "\n",
+    "cand_pt1 = ak.flatten(data_tt[\"ycand\"][data_tt[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "cand_pt2 = ak.flatten(data_qcd[\"ycand\"][data_qcd[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "cand_pt3 = ak.flatten(data_zh[\"ycand\"][data_zh[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "cand_pt4 = ak.flatten(data_ww[\"ycand\"][data_ww[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "cand_pt5 = ak.flatten(data_tt_pu10[\"ycand\"][data_tt_pu10[\"ycand\"][:, :, 0]!=0][:, :, 2])\n",
+    "\n",
     "\n",
     "b = np.logspace(-2,3,100)\n",
     "h1 = to_bh(gen_pt1, b)\n",
     "h2 = to_bh(gen_pt2, b)\n",
     "h3 = to_bh(gen_pt3, b)\n",
-    "# h2 = to_bh(cand_pt, b)\n",
+    "h4 = to_bh(gen_pt4, b)\n",
+    "h5 = to_bh(gen_pt5, b)\n",
+    "\n",
+    "h1c = to_bh(cand_pt1, b)\n",
+    "h2c = to_bh(cand_pt2, b)\n",
+    "h3c = to_bh(cand_pt3, b)\n",
+    "h4c = to_bh(cand_pt4, b)\n",
+    "h5c = to_bh(cand_pt5, b)\n",
     "\n",
     "fig = plt.figure()\n",
     "ax = plt.axes()\n",
     "\n",
-    "mplhep.histplot(h1, histtype=\"step\", lw=2, label=label_tt)\n",
-    "mplhep.histplot(h2, histtype=\"step\", lw=2, label=label_qq)\n",
-    "mplhep.histplot(h3, histtype=\"step\", lw=2, label=label_zh)\n",
-    "mplhep.histplot(h4, histtype=\"step\", lw=2, label=label_ww)\n",
+    "prev = mplhep.histplot(h1, histtype=\"step\", lw=1, label=label_tt, ls=\"--\")\n",
+    "mplhep.histplot(h1c, histtype=\"step\", lw=2, color=prev[0].errorbar.get_children()[0].get_color())\n",
+    "\n",
+    "prev = mplhep.histplot(h2, histtype=\"step\", lw=1, label=label_qq, ls=\"--\")\n",
+    "mplhep.histplot(h2c, histtype=\"step\", lw=2, color=prev[0].errorbar.get_children()[0].get_color())\n",
+    "\n",
+    "prev = mplhep.histplot(h3, histtype=\"step\", lw=1, label=label_zh, ls=\"--\")\n",
+    "mplhep.histplot(h3c, histtype=\"step\", lw=2, color=prev[0].errorbar.get_children()[0].get_color())\n",
+    "\n",
+    "prev = mplhep.histplot(h4, histtype=\"step\", lw=1, label=label_ww, ls=\"--\")\n",
+    "mplhep.histplot(h4c, histtype=\"step\", lw=2, color=prev[0].errorbar.get_children()[0].get_color())\n",
+    "\n",
+    "prev = mplhep.histplot(h5, histtype=\"step\", lw=1, label=label_tt + \" PU10\", ls=\"--\")\n",
+    "mplhep.histplot(h5c, histtype=\"step\", lw=2, color=prev[0].errorbar.get_children()[0].get_color())\n",
+    "\n",
     "plt.xscale(\"log\")\n",
     "plt.xlabel(\"particle $p_T$ [GeV]\")\n",
     "plt.ylabel(\"Number of particles / bin\")\n",
-    "plt.text(0.03, 0.97, \"Pandora PF particles\", transform=ax.transAxes, va=\"top\", ha=\"left\")\n",
     "plt.legend()\n",
+    "plt.text(0.03, 0.97, \"dashed - stable generator particles\\nsolid - reconstructed Pandora PF particles\", transform=ax.transAxes, va=\"top\", ha=\"left\", fontsize=16)\n",
     "plt.ylim(0,500*num_files)\n",
     "plt.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0,0))\n",
-    "plt.savefig(\"plots/clic/pf_particle_pt.pdf\")"
+    "plt.savefig(\"plots/clic/gen_cand_particle_pt.pdf\")"
    ]
   },
   {
diff --git a/notebooks/paper_plots_2023_ml_training.ipynb b/notebooks/paper_plots_2023_ml_training.ipynb
new file mode 100644
index 000000000..0989c597c
--- /dev/null
+++ b/notebooks/paper_plots_2023_ml_training.ipynb
@@ -0,0 +1,700 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import json\n",
+    "from uncertainties import ufloat\n",
+    "import glob\n",
+    "import pandas\n",
+    "import json\n",
+    "\n",
+    "import mplhep\n",
+    "mplhep.style.use(mplhep.style.CMS)\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append(\"../mlpf/\")\n",
+    "from plotting.plot_utils import pid_to_text, format_dataset_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir -f plots_mlpf_clic_2023"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_combined_array(histories, key):\n",
+    "    combined_array = np.array(histories[0][key])\n",
+    "    for ii in range(1, len(histories)):\n",
+    "        combined_array = np.vstack([combined_array, np.array(histories[ii][key])])\n",
+    "    return combined_array\n",
+    "\n",
+    "\n",
+    "def get_full_history(hist_dir, verbose=False):\n",
+    "    jsons = list(hist_dir.glob(\"history*.json\"))\n",
+    "    if verbose:\n",
+    "        print(f\"{hist_dir.parent} has {len(jsons)} hisotries\")\n",
+    "    if len(jsons) == 0:\n",
+    "        return {}, 0\n",
+    "    jsons.sort(key=lambda x: int(x.name.split(\"_\")[1].split(\".\")[0]))  # sort according to epoch number\n",
+    "\n",
+    "    # initialize a dict with correct keys and empty lists as values\n",
+    "    with open(jsons[0]) as h:\n",
+    "        keys = json.load(h).keys()\n",
+    "    full_history = {key: [] for key in keys}\n",
+    "\n",
+    "    # join epoch values to a full history\n",
+    "    for path in jsons:\n",
+    "        with open(path) as h:\n",
+    "            epoch = json.load(h)\n",
+    "            for key in epoch.keys():\n",
+    "                full_history[key].append(epoch[key])\n",
+    "\n",
+    "    reg_loss = np.sum(\n",
+    "        np.array([full_history[\"{}_loss\".format(l)] for l in [\"energy\", \"pt\", \"eta\", \"sin_phi\", \"cos_phi\", \"charge\"]]),\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    val_reg_loss = np.sum(\n",
+    "        np.array(\n",
+    "            [full_history[\"val_{}_loss\".format(l)] for l in [\"energy\", \"pt\", \"eta\", \"sin_phi\", \"cos_phi\", \"charge\"]]\n",
+    "        ),\n",
+    "        axis=0,\n",
+    "    )\n",
+    "    full_history.update({\"reg_loss\": reg_loss})\n",
+    "    full_history.update({\"val_reg_loss\": val_reg_loss})\n",
+    "\n",
+    "    return full_history, len(jsons)\n",
+    "\n",
+    "\n",
+    "def get_histories(train_dirs):\n",
+    "    train_dirs = [Path(train_dir) for train_dir in train_dirs]\n",
+    "    histories = []\n",
+    "\n",
+    "    for train_dir in train_dirs:\n",
+    "        hist, N = get_full_history(hist_dir=train_dir / \"logs/history\")\n",
+    "        if N > 0:\n",
+    "            histories.append(hist)\n",
+    "\n",
+    "    return histories"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "histories_gnn_before = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments-archive/hypertuning/clic_gnn_beforeHPO/*\")))\n",
+    "histories_gnn_after = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments-archive/hypertuning//clic_gnn_afterHPO/*\")))\n",
+    "\n",
+    "histories_tf_before = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments-archive/hypertuning//clic_transformer_beforeHPO/*\")))\n",
+    "histories_tf_after = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments-archive/hypertuning//clic_transformer_afterHPO/*\")))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret = {\n",
+    "    \"gnn\": {\n",
+    "        \"before\": {\n",
+    "            \"val_loss\": get_combined_array(histories_gnn_before,\"val_loss\"),\n",
+    "            \"jet_iqr\": get_combined_array(histories_gnn_before,\"val_jet_iqr\"),\n",
+    "            \"met_iqr\": get_combined_array(histories_gnn_before,\"val_met_iqr\"),\n",
+    "        },\n",
+    "        \"after\": {\n",
+    "            \"val_loss\": get_combined_array(histories_gnn_after,\"val_loss\"),\n",
+    "            \"jet_iqr\": get_combined_array(histories_gnn_after,\"val_jet_iqr\"),\n",
+    "            \"met_iqr\": get_combined_array(histories_gnn_after,\"val_met_iqr\"),\n",
+    "        }\n",
+    "    },\n",
+    "    \"transformer\": {\n",
+    "        \"before\": {\n",
+    "            \"val_loss\": get_combined_array(histories_tf_before,\"val_loss\"),\n",
+    "            \"jet_iqr\": get_combined_array(histories_tf_before,\"val_jet_iqr\"),\n",
+    "            \"met_iqr\": get_combined_array(histories_tf_before,\"val_met_iqr\"),\n",
+    "        },\n",
+    "        \"after\": {\n",
+    "            \"val_loss\": get_combined_array(histories_tf_after,\"val_loss\"),\n",
+    "            \"jet_iqr\": get_combined_array(histories_tf_after,\"val_jet_iqr\"),\n",
+    "            \"met_iqr\": get_combined_array(histories_tf_after,\"val_met_iqr\"),\n",
+    "        }\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigdigits(mean, std):\n",
+    "    return \"{:L}\".format(ufloat(mean, std))\n",
+    "\n",
+    "\n",
+    "def run_label(x=0.67, y=0.90, fz=12):\n",
+    "    plt.figtext(x, y, r'tt+qq',  wrap=False, horizontalalignment='right', fontsize=fz)\n",
+    "\n",
+    "\n",
+    "def cms_label(x0=0.12, y=0.90, s=None, fz=22):\n",
+    "    # plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=fz)\n",
+    "    # plt.figtext(x0+0.09, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=fz-3)\n",
+    "    if s is not None:\n",
+    "        t = plt.figtext(x=x0, y=y-0.15, s=s[:-1], fontsize=fz-6)\n",
+    "\n",
+    "\n",
+    "def plot_variance_curve(array_list,\n",
+    "                        labels,\n",
+    "                        colors_styles,\n",
+    "                        skip=0,\n",
+    "                        ylim=None,\n",
+    "                        save_path=None,\n",
+    "                        x=0.45,\n",
+    "                        y=0.53,\n",
+    "                        loc=None,\n",
+    "                        ylabel=None,\n",
+    "                        custom_info=None,\n",
+    "                        threshold=None\n",
+    "                       ):\n",
+    "    \n",
+    "    fig = plt.figure()\n",
+    "    final_means = []\n",
+    "    final_stds = []\n",
+    "    for ii, array in enumerate(array_list):\n",
+    "        print(f\"{labels[ii]} is averaged over {array.shape[0]} trainings.\")\n",
+    "        xx = np.array(range(array.shape[1])) + 1  # Epochs\n",
+    "\n",
+    "        xx = xx[skip:]\n",
+    "        array = array[:, skip:]\n",
+    "\n",
+    "        std = np.std(array, axis=0)\n",
+    "        mean = np.mean(array, axis=0)\n",
+    "\n",
+    "        col, sty = colors_styles[ii]\n",
+    "        plt.plot(xx, mean, label=labels[ii], color=col, ls=sty)\n",
+    "        plt.fill_between(xx, mean - std, mean + std, alpha=0.4, facecolor=col)\n",
+    "\n",
+    "        # Add individual loss curves\n",
+    "        # plt.plot(np.tile(xx, reps=[10,1]).transpose(), array.transpose(), linewidth=0.2)\n",
+    "\n",
+    "        print(labels[ii] + \": {:s}\".format(sigdigits(mean[-1], std[-1])))\n",
+    "        final_means.append(mean[-1])\n",
+    "        final_stds.append(std[-1])\n",
+    "\n",
+    "    if threshold:\n",
+    "        plt.axhline(threshold, ls=\"--\", color=\"black\", label=\"baseline PF\")    \n",
+    "        \n",
+    "#     plt.legend(bbox_to_anchor=(0.98, 0.78), loc=\"center right\")\n",
+    "    if loc is not None:\n",
+    "        plt.legend(loc=loc)\n",
+    "    else:\n",
+    "        plt.legend()\n",
+    "    plt.xlabel(\"Epochs\")\n",
+    "    if ylabel:\n",
+    "        plt.ylabel(ylabel)\n",
+    "\n",
+    "    s=\"Mean and stddev of {:d} trainings\\n\".format(array.shape[0])\n",
+    "    for ii, label in enumerate(labels):\n",
+    "        if custom_info:\n",
+    "            s += \"Final {}:${:s}$\\n\".format(label, sigdigits(custom_info[ii]['mean'], custom_info[ii][\"std\"]))\n",
+    "        else:\n",
+    "            s += \"Final {}:${:s}$\\n\".format(label, sigdigits(final_means[ii], final_stds[ii]))\n",
+    "\n",
+    "    if ylim:\n",
+    "        plt.ylim(top=ylim[1], bottom=ylim[0])\n",
+    "\n",
+    "    plt.subplots_adjust(left=0.14)\n",
+    "    \n",
+    "    cms_label(x0=x, y=y, s=s, fz=24)\n",
+    "    run_label(x=0.9, y=0.89, fz=22)\n",
+    "    if save_path:\n",
+    "        plt.savefig(Path(save_path).with_suffix('.png'))\n",
+    "        plt.savefig(Path(save_path).with_suffix('.pdf'))\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Figure 4: hypertuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_variance_curve([ret[\"gnn\"][\"before\"][\"val_loss\"], ret[\"gnn\"][\"after\"][\"val_loss\"],\n",
+    "                     ret[\"transformer\"][\"before\"][\"val_loss\"], ret[\"transformer\"][\"after\"][\"val_loss\"]],\n",
+    "                    [\"GNN\", \"GNN-HPO\",\"TF\", \"TF-HPO\"],\n",
+    "                    [(\"red\", \"--\"), (\"red\", \"-\"), (\"blue\", \"--\"), (\"blue\", \"-\")],\n",
+    "                    skip=1,\n",
+    "                    ylim=[0, 20],\n",
+    "                    save_path=\"plots_mlpf_clic_2023/loss.png\",\n",
+    "                    x=0.25,\n",
+    "                    y=0.85,\n",
+    "                    ylabel=\"Total validation loss (a.u.)\"\n",
+    "                   )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_variance_curve([ret[\"gnn\"][\"before\"][\"jet_iqr\"], ret[\"gnn\"][\"after\"][\"jet_iqr\"],\n",
+    "                     ret[\"transformer\"][\"before\"][\"jet_iqr\"], ret[\"transformer\"][\"after\"][\"jet_iqr\"]],\n",
+    "                    [\"GNN\", \"GNN-HPO\",\"TF\", \"TF-HPO\"],\n",
+    "                    [(\"red\", \"--\"), (\"red\", \"-\"), (\"blue\", \"--\"), (\"blue\", \"-\")],\n",
+    "                    skip=1,\n",
+    "                    save_path=\"plots_mlpf_clic_2023/jet_iqr.png\",\n",
+    "                    x=0.25,\n",
+    "                    y=0.85,\n",
+    "                    ylim=(0, 0.3),\n",
+    "                    ylabel=r\"jet response IQR\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_variance_curve([ret[\"gnn\"][\"before\"][\"met_iqr\"], ret[\"gnn\"][\"after\"][\"met_iqr\"],\n",
+    "                     ret[\"transformer\"][\"before\"][\"met_iqr\"], ret[\"transformer\"][\"after\"][\"met_iqr\"]],\n",
+    "                    [\"GNN\", \"GNN-HPO\",\"TF\", \"TF-HPO\"],\n",
+    "                    [(\"red\", \"--\"), (\"red\", \"-\"), (\"blue\", \"--\"), (\"blue\", \"-\")],\n",
+    "                    skip=1,\n",
+    "                    save_path=\"plots_mlpf_clic_2023/met_iqr.png\",\n",
+    "                    x=0.25,\n",
+    "                    y=0.85,\n",
+    "                    ylim=(0, 2),\n",
+    "                    ylabel=r\"MET response IQR\"\n",
+    "                   )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Figure 5: scaling of timing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timing_data_gpu_1 = open(\"/home/joosep/particleflow/experiments-archive/timing/mlpf-gnn/gpu_timing_1.txt\").read()\n",
+    "timing_data_gpu_2 = open(\"/home/joosep/particleflow/experiments-archive/timing/mlpf-gnn/gpu_timing_2.txt\").read()\n",
+    "timing_data_gpu_3 = open(\"/home/joosep/particleflow/experiments-archive/timing/mlpf-gnn/gpu_timing_2.txt\").read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batches = []\n",
+    "nptcls = []\n",
+    "ts = []\n",
+    "\n",
+    "for line in timing_data_gpu_1.strip().split(\"\\n\") + timing_data_gpu_2.strip().split(\"\\n\") + timing_data_gpu_3.strip().split(\"\\n\"):\n",
+    "    batch, nptcl, t = line.split()\n",
+    "    batches.append(int(batch))\n",
+    "    nptcls.append(int(nptcl))\n",
+    "    ts.append(float(t))\n",
+    "    \n",
+    "df = pandas.DataFrame()\n",
+    "df[\"batch\"] = batches\n",
+    "df[\"nptcl\"] = nptcls\n",
+    "df[\"t\"] = ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_agg = df.groupby(['batch', 'nptcl'], as_index=False).agg({'t':['mean','std']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sf = df_agg[(df_agg[\"batch\"]==16) & (df_agg[\"nptcl\"]==256)][\"t\"][\"mean\"].values[0]/16\n",
+    "\n",
+    "plt.plot([256,20*256], [1,20], color=\"black\", ls=\"--\", lw=2, label=\"linear scaling\")\n",
+    "\n",
+    "markers = [\"o\", \"^\", \"v\", \"s\", \".\"]\n",
+    "for batch, elem in df_agg.groupby(\"batch\"):\n",
+    "    m = markers.pop(0)\n",
+    "    plt.errorbar(\n",
+    "        elem[\"nptcl\"],\n",
+    "        elem[\"t\"][\"mean\"]/sf/batch,\n",
+    "        elem[\"t\"][\"std\"]/sf/batch,\n",
+    "        label=\"B={}\".format(batch),\n",
+    "        marker=m)\n",
+    "plt.legend(loc=\"best\")\n",
+    "\n",
+    "plt.legend(loc=\"best\")\n",
+    "plt.ylabel(\"relative time per event\\nT(N,B) / T(256,16)\")\n",
+    "plt.xlabel(\"number of input elements per event, N\")\n",
+    "plt.title(\"MLPF-GNN on 8GB GPU\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mlpf_gnn.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mlpf_gnn.pdf\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#cd experiments-archive/timing/pandora\n",
+    "#grep TIMER gun_np* | grep MyDDMarlinPandora\n",
+    "\n",
+    "timing_data_cpu = \"\"\"\n",
+    "gun_np_100_1.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 13836.000 | 13836.583 | 11041.204   18211.2  2452.37 |      10 |   138.366 |\n",
+    "gun_np_100_2.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 13863.000 | 13861.665 | 11451.365   17552.7  1725.47 |      10 |   138.617 |\n",
+    "gun_np_100_3.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 12829.000 | 12828.634 | 8948.882   15546.3  2064.03 |      10 |   128.286 |\n",
+    "gun_np_100_4.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 14907.000 | 14908.701 | 10741.859   18102.9  2524.65 |      10 |   149.087 |\n",
+    "gun_np_100_5.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 13174.000 | 13173.000 | 9291.507   23493.5  4326.01 |      10 |   131.730 |\n",
+    "gun_np_100_6.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 12383.000 | 12383.906 | 10438.694   14086.3  1404.91 |      10 |   123.839 |\n",
+    "gun_np_100_7.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 12956.000 | 12955.911 | 10072.747   16893.5  2782.78 |      10 |   129.559 |\n",
+    "gun_np_200_1.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 47395.000 | 47397.637 | 35578.270   55634.0  5259.41 |      10 |   473.976 |\n",
+    "gun_np_200_2.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 49098.000 | 49099.168 | 43919.848   55204.7  3801.20 |      10 |   490.992 |\n",
+    "gun_np_200_3.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 47285.000 | 47283.594 | 34430.031   52283.8  5227.59 |      10 |   472.836 |\n",
+    "gun_np_200_4.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 45921.000 | 45919.754 | 31380.205   56530.5  8713.73 |      10 |   459.198 |\n",
+    "gun_np_200_5.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 46047.000 | 46047.980 | 37939.055   57653.4  5632.91 |      10 |   460.480 |\n",
+    "gun_np_200_6.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 46928.000 | 46930.746 | 36946.914   62604.0  8045.00 |      10 |   469.307 |\n",
+    "gun_np_200_7.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            | 44988.000 | 44989.551 | 39393.648   48307.4  2795.09 |      10 |   449.896 |\n",
+    "gun_np_25_1.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1278.000 |  1275.958 |  890.380    1750.9   258.73 |      10 |    12.760 |\n",
+    "gun_np_25_2.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1611.000 |  1611.889 | 1061.231    2250.4   371.55 |      10 |    16.119 |\n",
+    "gun_np_25_3.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1511.000 |  1513.813 |  821.452    2323.8   449.85 |      10 |    15.138 |\n",
+    "gun_np_25_4.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1391.000 |  1393.518 |  884.898    2606.6   475.22 |      10 |    13.935 |\n",
+    "gun_np_25_5.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1458.000 |  1457.076 |  843.642    2644.6   584.55 |      10 |    14.571 |\n",
+    "gun_np_25_6.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1705.000 |  1706.138 |  906.869    3667.7   810.81 |      10 |    17.061 |\n",
+    "gun_np_25_7.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  1598.000 |  1598.356 | 1074.817    1955.7   286.26 |      10 |    15.984 |\n",
+    "gun_np_50_1.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  3962.000 |  3962.514 | 2568.144    6292.2  1138.24 |      10 |    39.625 |\n",
+    "gun_np_50_2.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  3771.000 |  3771.321 | 3111.184    4891.2   606.17 |      10 |    37.713 |\n",
+    "gun_np_50_3.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  4266.000 |  4266.345 | 3128.854    5726.2   918.44 |      10 |    42.663 |\n",
+    "gun_np_50_4.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  4008.000 |  4007.004 | 3067.614    6363.1   935.73 |      10 |    40.070 |\n",
+    "gun_np_50_5.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  3833.000 |  3834.250 | 2535.937    4735.0   781.68 |      10 |    38.343 |\n",
+    "gun_np_50_6.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  3658.000 |  3662.497 | 2667.986    5553.0  1050.26 |      10 |    36.625 |\n",
+    "gun_np_50_7.txt:TIMER.TIMER          INFO  MyDDMarlinPandora            |  3844.000 |  3845.911 | 2562.266    6607.6  1196.24 |      10 |    38.459 |\n",
+    "\"\"\"\n",
+    "\n",
+    "timing_data = {}\n",
+    "for line in timing_data_cpu.strip().split(\"\\n\"):\n",
+    "    lspl = line.split()\n",
+    "    nptcl = int(lspl[0].split(\":\")[0].split(\"_\")[2])\n",
+    "    dt = float(lspl[4])\n",
+    "    if not (nptcl in timing_data):\n",
+    "        timing_data[nptcl] = []\n",
+    "    timing_data[nptcl].append(dt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "means = []\n",
+    "stds = []\n",
+    "xs = []\n",
+    "for k in sorted(timing_data.keys()):\n",
+    "    means.append(np.mean(timing_data[k]))\n",
+    "    stds.append(np.std(timing_data[k]))\n",
+    "    xs.append(k)\n",
+    "xs = np.array(xs)\n",
+    "means = np.array(means)\n",
+    "stds = np.array(stds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.errorbar(xs, means/means[0], stds/means[0], marker=\"o\", label=\"baseline PF\")\n",
+    "plt.plot([25,200], [1,8], color=\"black\", label=\"linear scaling\", ls=\"--\")\n",
+    "plt.legend()\n",
+    "plt.ylabel(\"relative time per event, $T(N)/T(25)$\")\n",
+    "plt.xlabel(\"number of $\\pi^-$ particles per event, $N$\")\n",
+    "#plt.xlim(0,100)\n",
+    "#plt.ylim(0,10)\n",
+    "plt.title(\"Baseline PF on CPU\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/baseline_pf.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/baseline_pf.pdf\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# gpu_scaling_x = np.array([1,2,4,8])\n",
+    "# gpu_scaling_y = np.array([443.72, 209.913, 92.476, 34.263])\n",
+    "# gpu_scaling_y /= gpu_scaling_y[0]\n",
+    "# plt.plot(gpu_scaling_x, 1.0/gpu_scaling_y, lw=0, marker=\"o\")\n",
+    "# plt.plot([1,8],[1,8], color=\"black\", ls=\"--\", label=\"linear scaling\")\n",
+    "# plt.xlabel(\"Number of GPUs\")\n",
+    "# plt.ylabel(\"Training epoch throughput\\nincrease over 1 GPU\")\n",
+    "# plt.title(\"Scaling test on LUMI HPC: MI250X\")\n",
+    "# plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Figure 6: mixed precision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hist_fp32 = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments/clic_fp32_bs1_*\")))\n",
+    "hist_bf16 = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments/clic_bf16_bs1_*\")))\n",
+    "hist_bf16_bs2 = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments/clic_bf16_bs2_*\")))\n",
+    "hist_fp16 = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments/clic_fp16_bs1_*\")))\n",
+    "hist_fp16_bs2 = get_histories(list(glob.glob(\"/home/joosep/particleflow/experiments/clic_fp16_bs2_*\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_losses(hist, label, color, ls, marker, scaler=1.0):\n",
+    "    vals = np.stack([hist[i][\"loss\"] for i in range(len(hist))])\n",
+    "    m = np.mean(vals, axis=0)\n",
+    "    s = np.std(vals, axis=0)\n",
+    "    xs = np.arange(len(m))\n",
+    "    plt.errorbar(xs+1, m/scaler, s/scaler, label=label, marker=marker, color=color, ls=ls)\n",
+    "    \n",
+    "def plot_time(hists, colors):\n",
+    "    ms = []\n",
+    "    ss = []\n",
+    "    for hist in hists:\n",
+    "        vals = np.stack([hist[i][\"time\"] for i in range(len(hist))])\n",
+    "        dt = vals[:, -1] - vals[:, 0]\n",
+    "        m = np.mean(dt, axis=0)\n",
+    "        s = np.std(dt, axis=0)\n",
+    "        ms.append(m)\n",
+    "        ss.append(s)\n",
+    "    ms = np.array(ms)\n",
+    "    ss = np.array(ss)\n",
+    "    ms0 = ms[0]\n",
+    "    ms /= ms0\n",
+    "    ss /= ms0\n",
+    "    plt.bar(range(len(ms)), ms, color=colors)\n",
+    "    plt.errorbar(range(len(ms)), ms, ss, linewidth=0, elinewidth=2.0, color=\"black\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss_scaler = np.mean([hist_fp32[i][\"loss\"][-1] for i in range(len(hist_fp32))])\n",
+    "\n",
+    "plot_losses(hist_fp32, \"FP32\", \"green\", \"-\", \"o\", loss_scaler)\n",
+    "plot_losses(hist_bf16, \"BF16\", \"blue\", \"-\", \"^\", loss_scaler)\n",
+    "plot_losses(hist_bf16_bs2, \"BF16, Bx2\", \"blue\", \"--\", \"^\", loss_scaler)\n",
+    "#plot_losses(hist_fp16, \"FP16\", \"red\", \"-\", \"v\", loss_scaler)\n",
+    "#plot_losses(hist_fp16_bs2, \"FP16, Bx2\", \"red\", \"--\", \"v\", loss_scaler)\n",
+    "\n",
+    "#plt.yscale(\"log\")\n",
+    "plt.axhline(1.0, color=\"black\", ls=\"--\")\n",
+    "plt.ylabel(\"Relative loss wrt. FP32 @ epoch 10\")\n",
+    "plt.xlabel(\"Training epoch\")\n",
+    "plt.ylim(0.5,1.5)\n",
+    "plt.xlim(2,10)\n",
+    "plt.legend()\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mixed_precision_loss_scaling.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mixed_precision_loss_scaling.pdf\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_time([hist_fp32, hist_fp16, hist_fp16_bs2, hist_bf16, hist_bf16_bs2], [\"green\", \"red\", \"red\", \"blue\", \"blue\"])\n",
+    "plt.xticks([0,1,2,3,4], [\"FP32\", \"FP16\", \"FP16, Bx2\", \"BF16\", \"BF16, Bx2\"])\n",
+    "plt.axvline(2.5, color=\"black\", ls=\"--\")\n",
+    "plt.text(2.75, 2.15, \"BF16 ops not yet fully\\nsupported on GPU by TF\", fontsize=16)\n",
+    "plt.ylim(0,2.5)\n",
+    "plt.ylabel(\"Relative training time wrt. FP32\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mixed_precision_timing.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/mixed_precision_timing.pdf\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hists = sorted(glob.glob(\"/home/joosep/particleflow/experiments-archive/hits/clic-hits-ln_*/logs/history/history_*.json\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# loss = []\n",
+    "# val_loss = []\n",
+    "\n",
+    "# for hist in hists:\n",
+    "#     val_loss.append(json.load(open(hist))[\"val_loss\"])\n",
+    "#     loss.append(json.load(open(hist))[\"loss\"])\n",
+    "\n",
+    "# loss = np.array(loss)\n",
+    "# val_loss = np.array(val_loss)\n",
+    "\n",
+    "# plt.plot(loss, label=\"train\", marker=\"o\")\n",
+    "# plt.plot(val_loss, label=\"val\", marker=\"o\")\n",
+    "# plt.legend(title=format_dataset_name(\"clic_edm_ttbar_pf\"))\n",
+    "# plt.ylim(0.0,0.3)\n",
+    "# plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jet_iqr_vals = []\n",
+    "met_iqr_vals = []\n",
+    "\n",
+    "for hist in hists:\n",
+    "    jet_iqr_vals.append(json.load(open(hist))[\"val_jet_iqr\"])\n",
+    "    met_iqr_vals.append(json.load(open(hist))[\"val_met_iqr\"])\n",
+    "\n",
+    "jet_iqr_vals = np.array(jet_iqr_vals)\n",
+    "met_iqr_vals = np.array(met_iqr_vals)\n",
+    "\n",
+    "plt.plot(jet_iqr_vals, label=\"jet response IQR\", marker=\"o\")\n",
+    "plt.plot(met_iqr_vals/5, label=\"MET response IQR / 5\", marker=\"o\")\n",
+    "plt.legend(title=format_dataset_name(\"clic_edm_ttbar_pf\"))\n",
+    "plt.ylim(0,0.2)\n",
+    "plt.xlim(0, 12)\n",
+    "plt.ylabel(\"Response IQR (a.u.)\")\n",
+    "plt.xlabel(\"Training epoch\")\n",
+    "#plt.title(\"Training on tracks and calorimeter hits\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/hitbased_res_iqr.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/hitbased_res_iqr.pdf\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jet_med_vals = []\n",
+    "met_med_vals = []\n",
+    "\n",
+    "for hist in hists:\n",
+    "    jet_med_vals.append(json.load(open(hist))[\"val_jet_med\"])\n",
+    "    met_med_vals.append(json.load(open(hist))[\"val_met_med\"])\n",
+    "\n",
+    "jet_med_vals = np.array(jet_med_vals)\n",
+    "met_med_vals = np.array(met_med_vals)\n",
+    "\n",
+    "plt.plot(jet_med_vals, label=\"jet response median\", marker=\"o\")\n",
+    "plt.plot(met_med_vals, label=\"MET response median\", marker=\"o\")\n",
+    "plt.legend(title=format_dataset_name(\"clic_edm_ttbar_pf\"))\n",
+    "plt.axhline(1.0, color=\"black\", ls=\"--\")\n",
+    "plt.ylim(0.8,1.2)\n",
+    "plt.xlim(0, 12)\n",
+    "\n",
+    "plt.ylabel(\"Response median (a.u.)\")\n",
+    "plt.xlabel(\"Training epoch\")\n",
+    "#plt.title(\"Training on tracks and calorimeter hits\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/hitbased_res_med.png\")\n",
+    "plt.savefig(\"plots_mlpf_clic_2023/hitbased_res_med.pdf\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/parameters/clic-hits-ln.yaml b/parameters/clic-hits-ln.yaml
new file mode 100644
index 000000000..c515b7993
--- /dev/null
+++ b/parameters/clic-hits-ln.yaml
@@ -0,0 +1,285 @@
+backend: tensorflow
+
+dataset:
+  schema: clic
+  target_particles: gen
+  num_input_features: 15
+  #(none=0, track=1, hit=2)
+  num_input_classes: 3
+  #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
+  num_output_classes: 6
+  cls_weight_by_pt: no
+  reg_weight_by_pt: no
+
+loss:
+  classification_loss_coef: 100.0
+  charge_loss_coef: 1.0
+  pt_loss_coef: 10.0
+  eta_loss_coef: 10.0
+  sin_phi_loss_coef: 10.0
+  cos_phi_loss_coef: 10.0
+  energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
+  charge_loss:
+    type: CategoricalCrossentropy
+    from_logits: yes
+  energy_loss:
+    type: Huber
+  pt_loss:
+    type: Huber
+  sin_phi_loss:
+    type: Huber
+  cos_phi_loss:
+    type: Huber
+  eta_loss:
+    type: Huber
+  event_loss: none #none, sliced_wasserstein, gen_jet_logcosh, gen_jet_mse, hist_2d
+  event_loss_coef: 0.0
+  met_loss: none
+  met_loss_coef: 1.0
+
+tensorflow:
+  eager: no
+
+setup:
+  train: yes
+  weights:
+  weights_config:
+  lr: 0.0001
+  num_epochs: 20
+  dtype: float32
+  trainable:
+  lr_schedule: cosinedecay  # cosinedecay, exponentialdecay, onecycle, none
+  optimizer: adam  # adam, adamw, sgd
+  horovod_enabled: no
+  cls_output_as_logits: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_hits_normalizations
+
+batching:
+  # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
+  bucket_by_sequence_length: yes
+  # these sizes were sort of tuned for an 8GB GPU
+  # - max_sequence_length, batch_size_per_gpu
+
+  bucket_batch_sizes: auto
+  # use this batch multiplier to increase all batch sizes by a constant factor
+  batch_multiplier: 1
+
+optimizer:
+  adam:
+    amsgrad: no
+  adamw:
+    amsgrad: yes
+    weight_decay: 0.001
+  sgd:
+    nesterov: no
+    momentum: 0.9
+
+# LR Schedules
+exponentialdecay:
+  decay_steps: 2000
+  decay_rate: 0.99
+  staircase: yes
+onecycle:
+  mom_min: 0.85
+  mom_max: 0.95
+  warmup_ratio: 0.3
+  div_factor: 25.0
+  final_div: 100000.0
+
+parameters:
+  model: gnn_dense
+  input_encoding: clic
+  node_update_mode: additive
+  do_node_encoding: yes
+  node_encoding_hidden_dim: 512
+
+  combined_graph_layer:
+    bin_size: 256
+    max_num_bins: 200
+    distance_dim: 128
+    layernorm: yes
+    dropout: 0.2
+    dist_activation: elu
+    ffn_dist_num_layers: 2
+    ffn_dist_hidden_dim: 128
+
+    # MPNN
+    #kernel:
+    # type: NodePairTrainableKernel
+    # activation: elu
+    #num_node_messages: 1
+    #node_message:
+    # type: NodeMessageLearnable
+    # output_dim: 64
+    # hidden_dim: 128
+    # num_layers: 2
+    # activation: elu
+    #activation: elu
+
+    # GCN
+    kernel:
+      type: NodePairGaussianKernel
+      dist_mult: 0.1
+      clip_value_low: 0.0
+      dist_norm: l2
+    num_node_messages: 2
+    node_message:
+      type: GHConvDense
+      output_dim: 512
+      activation: elu
+      #if this is enabled, it will break float16 training
+      normalize_degrees: no
+    activation: elu
+
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
+  output_decoding:
+    activation: elu
+    regression_use_classification: yes
+    dropout: 0.2
+
+    pt_as_correction: no
+
+    id_dim_decrease: yes
+    charge_dim_decrease: yes
+    pt_dim_decrease: yes
+    eta_dim_decrease: yes
+    phi_dim_decrease: yes
+    energy_dim_decrease: yes
+
+    id_hidden_dim: 256
+    charge_hidden_dim: 256
+    pt_hidden_dim: 256
+    eta_hidden_dim: 256
+    phi_hidden_dim: 256
+    energy_hidden_dim: 256
+
+    id_num_layers: 2
+    charge_num_layers: 2
+    pt_num_layers: 2
+    eta_num_layers: 2
+    phi_num_layers: 2
+    energy_num_layers: 2
+    layernorm: yes
+    mask_reg_cls0: yes
+
+  skip_connection: no
+  debug: no
+
+timing:
+  num_ev: 100
+  num_iter: 3
+
+callbacks:
+  checkpoint:
+    monitor: "val_loss"
+  plot_freq: 1
+  tensorboard:
+    dump_history: yes
+    hist_freq: 1
+
+hypertune:
+  algorithm: hyperband  # random, bayesian, hyperband
+  random:
+    objective: val_loss
+    max_trials: 100
+  bayesian:
+    objective: val_loss
+    max_trials: 100
+    num_initial_points: 2
+  hyperband:
+    objective: val_loss
+    max_epochs: 10
+    factor: 3
+    iterations: 1
+    executions_per_trial: 1
+
+raytune:
+  local_dir:  # Note: please specify an absolute path
+  sched:  asha # asha, hyperband
+  search_alg:  # bayes, bohb, hyperopt, nevergrad, scikit
+  default_metric: "val_loss"
+  default_mode: "min"
+  # Tune schedule specific parameters
+  asha:
+    max_t: 200
+    reduction_factor: 4
+    brackets: 1
+    grace_period: 10
+  hyperband:
+    max_t: 200
+    reduction_factor: 4
+  hyperopt:
+    n_random_steps: 10
+  nevergrad:
+    n_random_steps: 10
+
+train_test_datasets:
+  physical:
+    batch_per_gpu: 1
+    datasets:
+      - clic_edm_ttbar_hits_pf
+      - clic_edm_qq_hits_pf
+  gun:
+    batch_per_gpu: 5
+    datasets:
+      - clic_edm_single_kaon0l_hits_pf
+      - clic_edm_single_pi_hits_pf
+      - clic_edm_single_pi0_hits_pf
+      - clic_edm_single_neutron_hits_pf
+      - clic_edm_single_electron_hits_pf
+      - clic_edm_single_muon_hits_pf
+
+validation_dataset: clic_edm_ttbar_hits_pf
+validation_batch_size: 20
+validation_num_events: 2000
+
+evaluation_datasets:
+  clic_edm_ttbar_hits_pf:
+    batch_size: 10
+    num_events: 10000
+
+evaluation_jet_algo: ee_genkt_algorithm
+
+datasets:
+  clic_edm_ttbar_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_qq_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_kaon0l_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_gamma_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_pi_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_pi0_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_neutron_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_electron_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_muon_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
diff --git a/parameters/clic-hits.yaml b/parameters/clic-hits.yaml
index 4dfe43f0c..88ca88362 100644
--- a/parameters/clic-hits.yaml
+++ b/parameters/clic-hits.yaml
@@ -3,7 +3,7 @@ backend: tensorflow
 dataset:
   schema: clic
   target_particles: gen
-  num_input_features: 18
+  num_input_features: 15
   #(none=0, track=1, hit=2)
   num_input_classes: 3
   #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
@@ -19,8 +19,12 @@ loss:
   sin_phi_loss_coef: 10.0
   cos_phi_loss_coef: 10.0
   energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: Huber
@@ -44,37 +48,21 @@ setup:
   train: yes
   weights:
   weights_config:
-  lr: 0.001
+  lr: 0.0005
   num_epochs: 20
   dtype: float32
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
   lr_schedule: cosinedecay  # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: no
   cls_output_as_logits: yes
   small_graph_opt: no
+  normalizer_cache: parameters/clic_hits_normalizations
 
 batching:
   # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
-  bucket_by_sequence_length: no
-  # these sizes were sort of tuned for an 8GB GPU
-  # - max_sequence_length, batch_size_per_gpu
-
-#on 8GB GPU
-  bucket_batch_sizes:
-    - 25, 200
-    - 50, 100
-    - 100, 50
-    - 200, 20
-    - 500, 10
-    - 1000, 5
-    - 2000, 3
-    - 3000, 2
-    - 4000, 2
-    - 5000, 1
-    - 6000, 1
-    - inf, 1
+  bucket_by_sequence_length: yes
+  bucket_batch_sizes: auto
   # use this batch multiplier to increase all batch sizes by a constant factor
   batch_multiplier: 1
 
@@ -105,7 +93,7 @@ parameters:
   input_encoding: clic
   node_update_mode: additive
   do_node_encoding: yes
-  node_encoding_hidden_dim: 256
+  node_encoding_hidden_dim: 512
 
   combined_graph_layer:
     bin_size: 256
@@ -114,8 +102,8 @@ parameters:
     layernorm: yes
     dropout: 0.0
     dist_activation: elu
-    ffn_dist_num_layers: 3
-    ffn_dist_hidden_dim: 64
+    ffn_dist_num_layers: 2
+    ffn_dist_hidden_dim: 128
 
     # MPNN
     #kernel:
@@ -139,7 +127,7 @@ parameters:
     num_node_messages: 2
     node_message:
       type: GHConvDense
-      output_dim: 256
+      output_dim: 512
       activation: elu
       #if this is enabled, it will break float16 training
       normalize_degrees: no
@@ -161,19 +149,19 @@ parameters:
     phi_dim_decrease: yes
     energy_dim_decrease: yes
 
-    id_hidden_dim: 128
-    charge_hidden_dim: 128
-    pt_hidden_dim: 128
-    eta_hidden_dim: 128
-    phi_hidden_dim: 128
-    energy_hidden_dim: 128
+    id_hidden_dim: 256
+    charge_hidden_dim: 256
+    pt_hidden_dim: 256
+    eta_hidden_dim: 256
+    phi_hidden_dim: 256
+    energy_hidden_dim: 256
 
-    id_num_layers: 1
-    charge_num_layers: 1
-    pt_num_layers: 1
-    eta_num_layers: 1
-    phi_num_layers: 1
-    energy_num_layers: 1
+    id_num_layers: 2
+    charge_num_layers: 2
+    pt_num_layers: 2
+    eta_num_layers: 2
+    phi_num_layers: 2
+    energy_num_layers: 2
     layernorm: yes
     mask_reg_cls0: yes
 
@@ -234,9 +222,18 @@ train_test_datasets:
     datasets:
       - clic_edm_ttbar_hits_pf
       - clic_edm_qq_hits_pf
+  gun:
+    batch_per_gpu: 5
+    datasets:
+      - clic_edm_single_kaon0l_hits_pf
+      - clic_edm_single_pi_hits_pf
+      - clic_edm_single_pi0_hits_pf
+      - clic_edm_single_neutron_hits_pf
+      - clic_edm_single_electron_hits_pf
+      - clic_edm_single_muon_hits_pf
 
 validation_dataset: clic_edm_ttbar_hits_pf
-validation_batch_size: 10
+validation_batch_size: 20
 validation_num_events: 2000
 
 evaluation_datasets:
@@ -248,10 +245,38 @@ evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
   clic_edm_ttbar_hits_pf:
-    version: 1.0.0
+    version: 1.2.0
     data_dir:
     manual_dir:
   clic_edm_qq_hits_pf:
-    version: 1.0.0
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_kaon0l_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_gamma_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_pi_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_pi0_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_neutron_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_electron_hits_pf:
+    version: 1.2.0
+    data_dir:
+    manual_dir:
+  clic_edm_single_muon_hits_pf:
+    version: 1.2.0
     data_dir:
     manual_dir:
diff --git a/parameters/clic.yaml b/parameters/clic.yaml
index 9fab7e31f..58f0ecd2f 100644
--- a/parameters/clic.yaml
+++ b/parameters/clic.yaml
@@ -19,8 +19,12 @@ loss:
   sin_phi_loss_coef: 10.0
   cos_phi_loss_coef: 10.0
   energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: Huber
@@ -48,12 +52,12 @@ setup:
   num_epochs: 100
   dtype: float32
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: cosinedecay  # cosinedecay, exponentialdecay, onecycle, none
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: no
   cls_output_as_logits: yes
-  small_graph_opt: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
 
 batching:
   # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
@@ -260,22 +264,22 @@ evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
   clic_edm_ttbar_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_ttbar_pu10_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_qq_pf:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_ww_fullhad_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_zh_tautau_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/cms-gen.yaml b/parameters/cms-gen.yaml
index c27e2f4ec..382e30a34 100644
--- a/parameters/cms-gen.yaml
+++ b/parameters/cms-gen.yaml
@@ -31,8 +31,12 @@ loss:
   sin_phi_loss_coef: 1.0
   cos_phi_loss_coef: 1.0
   energy_loss_coef: 1.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: MeanSquaredLogarithmicError
@@ -60,16 +64,17 @@ setup:
   weights:
   weights_config:
   lr: 0.0001
-  num_events_validation: 500
   num_epochs: 50
   dtype: float32
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
   lr_schedule: none  # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: no
   cls_output_as_logits: yes
-  small_graph_opt: yes
+  #if enabled, do not create LSH bins for small graphs (less than one bin size)
+  #enabling results in some speedup for gun samples, but must be disabled for XLA
+  small_graph_opt: no
+  normalizer_cache: parameters/cms_normalizations
 
 batching:
   # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
diff --git a/parameters/delphes.yaml b/parameters/delphes.yaml
index 919710900..0ebf8e673 100644
--- a/parameters/delphes.yaml
+++ b/parameters/delphes.yaml
@@ -19,8 +19,12 @@ loss:
   sin_phi_loss_coef: 1.0
   cos_phi_loss_coef: 1.0
   energy_loss_coef: 1.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: Huber
@@ -50,19 +54,15 @@ setup:
   weights:
   weights_config:
   lr: 1e-5
-  num_events_train: 45000
-  num_events_test: 5000
-  num_events_validation: 10
   num_epochs: 50
-  num_val_files: 5
   dtype: float32
   trainable:
-  classification_loss_type: categorical_cross_entropy
   lr_schedule: exponentialdecay  # exponentialdecay, onecycle
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: False
-  cls_output_as_logits: False
+  cls_output_as_logits: yes
   small_graph_opt: no
+  normalizer_cache: parameters/delphes_normalizations
 
 batching:
   # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
diff --git a/parameters/clic-finetune.yaml b/parameters/mixedprecision/clic_bf16_bs1.yaml
similarity index 72%
rename from parameters/clic-finetune.yaml
rename to parameters/mixedprecision/clic_bf16_bs1.yaml
index 2efe15121..de6563abb 100644
--- a/parameters/clic-finetune.yaml
+++ b/parameters/mixedprecision/clic_bf16_bs1.yaml
@@ -13,14 +13,18 @@ dataset:
 
 loss:
   classification_loss_coef: 200.0
-  charge_loss_coef: 0.0001
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 0.0001
-  sin_phi_loss_coef: 0.0001
-  cos_phi_loss_coef: 0.0001
-  energy_loss_coef: 0.0001
+  charge_loss_coef: 1.0
+  pt_loss_coef: 10.0
+  eta_loss_coef: 10.0
+  sin_phi_loss_coef: 10.0
+  cos_phi_loss_coef: 10.0
+  energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: Huber
@@ -42,41 +46,21 @@ tensorflow:
 
 setup:
   train: yes
-  weights: experiments/clic_20230412_155159_717751.gpu1.local/weights/weights-100-9.948204.hdf5
+  weights:
   weights_config:
   lr: 0.0005
-  num_epochs: 200
-  dtype: float32
+  num_epochs: 100
+  dtype: bfloat16
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: cosinedecay  # cosinedecay, exponentialdecay, onecycle, none
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: no
   cls_output_as_logits: yes
-  small_graph_opt: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
 
 batching:
-  # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
-  bucket_by_sequence_length: yes
-  # these sizes were sort of tuned for an 8GB GPU
-  # - max_sequence_length, batch_size_per_gpu
-
-#on 8GB GPU
-  bucket_batch_sizes:
-    - 25, 200
-    - 50, 100
-    - 100, 50
-    - 200, 20
-    - 500, 10
-    - 1000, 5
-    - 2000, 3
-    - 3000, 2
-    - 4000, 2
-    - 5000, 1
-    - 6000, 1
-    - inf, 1
-  # use this batch multiplier to increase all batch sizes by a constant factor
-  batch_multiplier: 1
+  bucket_by_sequence_length: no
 
 optimizer:
   adam:
@@ -108,14 +92,14 @@ parameters:
   node_encoding_hidden_dim: 256
 
   combined_graph_layer:
-    bin_size: 640
+    bin_size: 256
     max_num_bins: 200
     distance_dim: 128
     layernorm: yes
     dropout: 0.0
     dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
+    ffn_dist_num_layers: 3
+    ffn_dist_hidden_dim: 64
 
     # MPNN
     #kernel:
@@ -145,8 +129,8 @@ parameters:
       normalize_degrees: no
     activation: elu
 
-  num_graph_layers_id: 5
-  num_graph_layers_reg: 5
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
   output_decoding:
     activation: elu
     regression_use_classification: yes
@@ -161,19 +145,19 @@ parameters:
     phi_dim_decrease: yes
     energy_dim_decrease: yes
 
-    id_hidden_dim: 512
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
+    id_hidden_dim: 128
+    charge_hidden_dim: 128
+    pt_hidden_dim: 128
+    eta_hidden_dim: 128
+    phi_hidden_dim: 128
+    energy_hidden_dim: 128
 
-    id_num_layers: 3
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
+    id_num_layers: 1
+    charge_num_layers: 1
+    pt_num_layers: 1
+    eta_num_layers: 1
+    phi_num_layers: 1
+    energy_num_layers: 1
     layernorm: yes
     mask_reg_cls0: yes
 
@@ -230,11 +214,11 @@ raytune:
 
 train_test_datasets:
   physical:
-    batch_per_gpu: 1
+    batch_per_gpu: 20
     datasets:
       - clic_edm_ttbar_pf
-      - clic_edm_qq_pf
 
+do_validation_callback: false
 validation_dataset: clic_edm_ttbar_pf
 validation_batch_size: 100
 validation_num_events: 2000
@@ -260,22 +244,22 @@ evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
   clic_edm_ttbar_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_ttbar_pu10_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_qq_pf:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_ww_fullhad_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_zh_tautau_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/mixedprecision/clic_bf16_bs2.yaml b/parameters/mixedprecision/clic_bf16_bs2.yaml
new file mode 100644
index 000000000..81024750b
--- /dev/null
+++ b/parameters/mixedprecision/clic_bf16_bs2.yaml
@@ -0,0 +1,265 @@
+backend: tensorflow
+
+dataset:
+  schema: clic
+  target_particles: gen
+  num_input_features: 17
+  #(none=0, track=1, cluster=2)
+  num_input_classes: 3
+  #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
+  num_output_classes: 6
+  cls_weight_by_pt: no
+  reg_weight_by_pt: no
+
+loss:
+  classification_loss_coef: 200.0
+  charge_loss_coef: 1.0
+  pt_loss_coef: 10.0
+  eta_loss_coef: 10.0
+  sin_phi_loss_coef: 10.0
+  cos_phi_loss_coef: 10.0
+  energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
+  charge_loss:
+    type: CategoricalCrossentropy
+    from_logits: yes
+  energy_loss:
+    type: Huber
+  pt_loss:
+    type: Huber
+  sin_phi_loss:
+    type: Huber
+  cos_phi_loss:
+    type: Huber
+  eta_loss:
+    type: Huber
+  event_loss: none #none, sliced_wasserstein, gen_jet_logcosh, gen_jet_mse, hist_2d
+  event_loss_coef: 0.0
+  met_loss: none
+  met_loss_coef: 1.0
+
+tensorflow:
+  eager: no
+
+setup:
+  train: yes
+  weights:
+  weights_config:
+  lr: 0.001
+  num_epochs: 100
+  dtype: bfloat16
+  trainable:
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
+  optimizer: adam  # adam, adamw, sgd
+  horovod_enabled: no
+  cls_output_as_logits: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
+
+batching:
+  bucket_by_sequence_length: no
+
+optimizer:
+  adam:
+    amsgrad: no
+  adamw:
+    amsgrad: yes
+    weight_decay: 0.001
+  sgd:
+    nesterov: no
+    momentum: 0.9
+
+# LR Schedules
+exponentialdecay:
+  decay_steps: 2000
+  decay_rate: 0.99
+  staircase: yes
+onecycle:
+  mom_min: 0.85
+  mom_max: 0.95
+  warmup_ratio: 0.3
+  div_factor: 25.0
+  final_div: 100000.0
+
+parameters:
+  model: gnn_dense
+  input_encoding: clic
+  node_update_mode: additive
+  do_node_encoding: yes
+  node_encoding_hidden_dim: 256
+
+  combined_graph_layer:
+    bin_size: 256
+    max_num_bins: 200
+    distance_dim: 128
+    layernorm: yes
+    dropout: 0.0
+    dist_activation: elu
+    ffn_dist_num_layers: 3
+    ffn_dist_hidden_dim: 64
+
+    # MPNN
+    #kernel:
+    # type: NodePairTrainableKernel
+    # activation: elu
+    #num_node_messages: 1
+    #node_message:
+    # type: NodeMessageLearnable
+    # output_dim: 64
+    # hidden_dim: 128
+    # num_layers: 2
+    # activation: elu
+    #activation: elu
+
+    # GCN
+    kernel:
+      type: NodePairGaussianKernel
+      dist_mult: 0.1
+      clip_value_low: 0.0
+      dist_norm: l2
+    num_node_messages: 2
+    node_message:
+      type: GHConvDense
+      output_dim: 256
+      activation: elu
+      #if this is enabled, it will break float16 training
+      normalize_degrees: no
+    activation: elu
+
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
+  output_decoding:
+    activation: elu
+    regression_use_classification: yes
+    dropout: 0.1
+
+    pt_as_correction: no
+
+    id_dim_decrease: yes
+    charge_dim_decrease: yes
+    pt_dim_decrease: yes
+    eta_dim_decrease: yes
+    phi_dim_decrease: yes
+    energy_dim_decrease: yes
+
+    id_hidden_dim: 128
+    charge_hidden_dim: 128
+    pt_hidden_dim: 128
+    eta_hidden_dim: 128
+    phi_hidden_dim: 128
+    energy_hidden_dim: 128
+
+    id_num_layers: 1
+    charge_num_layers: 1
+    pt_num_layers: 1
+    eta_num_layers: 1
+    phi_num_layers: 1
+    energy_num_layers: 1
+    layernorm: yes
+    mask_reg_cls0: yes
+
+  skip_connection: no
+  debug: no
+
+timing:
+  num_ev: 100
+  num_iter: 3
+
+callbacks:
+  checkpoint:
+    monitor: "val_loss"
+  plot_freq: 1
+  tensorboard:
+    dump_history: yes
+    hist_freq: 1
+
+hypertune:
+  algorithm: hyperband  # random, bayesian, hyperband
+  random:
+    objective: val_loss
+    max_trials: 100
+  bayesian:
+    objective: val_loss
+    max_trials: 100
+    num_initial_points: 2
+  hyperband:
+    objective: val_loss
+    max_epochs: 10
+    factor: 3
+    iterations: 1
+    executions_per_trial: 1
+
+raytune:
+  local_dir:  # Note: please specify an absolute path
+  sched:  asha # asha, hyperband
+  search_alg:  # bayes, bohb, hyperopt, nevergrad, scikit
+  default_metric: "val_loss"
+  default_mode: "min"
+  # Tune schedule specific parameters
+  asha:
+    max_t: 200
+    reduction_factor: 4
+    brackets: 1
+    grace_period: 10
+  hyperband:
+    max_t: 200
+    reduction_factor: 4
+  hyperopt:
+    n_random_steps: 10
+  nevergrad:
+    n_random_steps: 10
+
+train_test_datasets:
+  physical:
+    batch_per_gpu: 40
+    datasets:
+      - clic_edm_ttbar_pf
+
+do_validation_callback: false
+validation_dataset: clic_edm_ttbar_pf
+validation_batch_size: 100
+validation_num_events: 2000
+
+evaluation_datasets:
+  clic_edm_qq_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pu10_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_zh_tautau_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ww_fullhad_pf:
+    batch_size: 50
+    num_events: -1
+
+evaluation_jet_algo: ee_genkt_algorithm
+
+datasets:
+  clic_edm_ttbar_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_ttbar_pu10_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_qq_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_ww_fullhad_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_zh_tautau_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
diff --git a/parameters/test-eventloss/met.yaml b/parameters/mixedprecision/clic_fp16_bs1.yaml
similarity index 56%
rename from parameters/test-eventloss/met.yaml
rename to parameters/mixedprecision/clic_fp16_bs1.yaml
index 92142f65c..2fbee5413 100644
--- a/parameters/test-eventloss/met.yaml
+++ b/parameters/mixedprecision/clic_fp16_bs1.yaml
@@ -1,56 +1,44 @@
 backend: tensorflow
 
 dataset:
-  schema: cms
+  schema: clic
   target_particles: gen
-  num_input_features: 41
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8)
-  num_output_classes: 9
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
+  num_input_features: 17
+  #(none=0, track=1, cluster=2)
+  num_input_classes: 3
+  #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
+  num_output_classes: 6
   cls_weight_by_pt: no
+  reg_weight_by_pt: no
 
 loss:
-  classification_loss_coef: 1.0
+  classification_loss_coef: 200.0
   charge_loss_coef: 1.0
-  pt_loss_coef: 1.0
-  eta_loss_coef: 1.0
-  sin_phi_loss_coef: 1.0
-  cos_phi_loss_coef: 1.0
-  energy_loss_coef: 1.0
+  pt_loss_coef: 10.0
+  eta_loss_coef: 10.0
+  sin_phi_loss_coef: 10.0
+  cos_phi_loss_coef: 10.0
+  energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
+  charge_loss:
+    type: CategoricalCrossentropy
+    from_logits: yes
   energy_loss:
     type: Huber
   pt_loss:
     type: Huber
   sin_phi_loss:
     type: Huber
-    delta: 0.1
   cos_phi_loss:
     type: Huber
-    delta: 0.1
   eta_loss:
     type: Huber
-    delta: 0.1
-  event_loss: none
+  event_loss: none #none, sliced_wasserstein, gen_jet_logcosh, gen_jet_mse, hist_2d
   event_loss_coef: 0.0
-  met_loss:
-    type: Huber
-    delta: 10.0
+  met_loss: none
   met_loss_coef: 1.0
 
 tensorflow:
@@ -61,20 +49,22 @@ setup:
   weights:
   weights_config:
   lr: 0.0005
-  num_events_validation: 200
-  num_epochs: 50
-  dtype: float32
+  num_epochs: 100
+  dtype: float16
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: none  # exponentialdecay, onecycle, none
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
-  horovod_enabled: False
+  horovod_enabled: no
+  cls_output_as_logits: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
+
+batching:
+  bucket_by_sequence_length: no
 
 optimizer:
   adam:
     amsgrad: no
-    #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16
-    pcgrad: yes
   adamw:
     amsgrad: yes
     weight_decay: 0.001
@@ -96,20 +86,20 @@ onecycle:
 
 parameters:
   model: gnn_dense
-  input_encoding: cms
-  node_update_mode: concat
-  do_node_encoding: no
-  node_encoding_hidden_dim: 128
+  input_encoding: clic
+  node_update_mode: additive
+  do_node_encoding: yes
+  node_encoding_hidden_dim: 256
 
   combined_graph_layer:
-    bin_size: 100
+    bin_size: 256
     max_num_bins: 200
-    distance_dim: 64
+    distance_dim: 128
     layernorm: yes
     dropout: 0.0
     dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
+    ffn_dist_num_layers: 3
+    ffn_dist_hidden_dim: 64
 
     # MPNN
     #kernel:
@@ -133,18 +123,20 @@ parameters:
     num_node_messages: 2
     node_message:
       type: GHConvDense
-      output_dim: 128
+      output_dim: 256
       activation: elu
       #if this is enabled, it will break float16 training
-      normalize_degrees: yes
+      normalize_degrees: no
     activation: elu
 
-  num_graph_layers_id: 2
-  num_graph_layers_reg: 2
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
   output_decoding:
     activation: elu
     regression_use_classification: yes
-    dropout: 0.0
+    dropout: 0.1
+
+    pt_as_correction: no
 
     id_dim_decrease: yes
     charge_dim_decrease: yes
@@ -153,23 +145,23 @@ parameters:
     phi_dim_decrease: yes
     energy_dim_decrease: yes
 
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
+    id_hidden_dim: 128
+    charge_hidden_dim: 128
+    pt_hidden_dim: 128
+    eta_hidden_dim: 128
+    phi_hidden_dim: 128
+    energy_hidden_dim: 128
+
+    id_num_layers: 1
+    charge_num_layers: 1
+    pt_num_layers: 1
+    eta_num_layers: 1
+    phi_num_layers: 1
+    energy_num_layers: 1
     layernorm: yes
-    mask_reg_cls0: no
+    mask_reg_cls0: yes
 
-  skip_connection: yes
+  skip_connection: no
   debug: no
 
 timing:
@@ -222,30 +214,52 @@ raytune:
 
 train_test_datasets:
   physical:
-    batch_per_gpu: 5
+    batch_per_gpu: 20
     datasets:
-      - cms_pf_ttbar
-      - cms_pf_ztt
-      - cms_pf_qcd
-      - cms_pf_qcd_high_pt
+      - clic_edm_ttbar_pf
 
-validation_datasets:
-  - cms_pf_qcd_high_pt
+do_validation_callback: false
+validation_dataset: clic_edm_ttbar_pf
+validation_batch_size: 100
+validation_num_events: 2000
+
+evaluation_datasets:
+  clic_edm_qq_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pu10_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_zh_tautau_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ww_fullhad_pf:
+    batch_size: 50
+    num_events: -1
+
+evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
-  cms_pf_ttbar:
+  clic_edm_ttbar_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_ttbar_pu10_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_ztt:
+  clic_edm_qq_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_qcd:
+  clic_edm_ww_fullhad_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_qcd_high_pt:
+  clic_edm_zh_tautau_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/test-eventloss/baseline.yaml b/parameters/mixedprecision/clic_fp16_bs2.yaml
similarity index 56%
rename from parameters/test-eventloss/baseline.yaml
rename to parameters/mixedprecision/clic_fp16_bs2.yaml
index 5edbebb65..31d8fd7fa 100644
--- a/parameters/test-eventloss/baseline.yaml
+++ b/parameters/mixedprecision/clic_fp16_bs2.yaml
@@ -1,55 +1,45 @@
 backend: tensorflow
 
 dataset:
-  schema: cms
+  schema: clic
   target_particles: gen
-  num_input_features: 41
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8)
-  num_output_classes: 9
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
+  num_input_features: 17
+  #(none=0, track=1, cluster=2)
+  num_input_classes: 3
+  #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
+  num_output_classes: 6
   cls_weight_by_pt: no
+  reg_weight_by_pt: no
 
 loss:
-  classification_loss_coef: 1.0
+  classification_loss_coef: 200.0
   charge_loss_coef: 1.0
-  pt_loss_coef: 1.0
-  eta_loss_coef: 1.0
-  sin_phi_loss_coef: 1.0
-  cos_phi_loss_coef: 1.0
-  energy_loss_coef: 1.0
+  pt_loss_coef: 10.0
+  eta_loss_coef: 10.0
+  sin_phi_loss_coef: 10.0
+  cos_phi_loss_coef: 10.0
+  energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
+  charge_loss:
+    type: CategoricalCrossentropy
+    from_logits: yes
   energy_loss:
     type: Huber
   pt_loss:
     type: Huber
   sin_phi_loss:
     type: Huber
-    delta: 0.1
   cos_phi_loss:
     type: Huber
-    delta: 0.1
   eta_loss:
     type: Huber
-    delta: 0.1
-  event_loss: none
+  event_loss: none #none, sliced_wasserstein, gen_jet_logcosh, gen_jet_mse, hist_2d
   event_loss_coef: 0.0
   met_loss: none
-  met_loss_coef: 0.0
+  met_loss_coef: 1.0
 
 tensorflow:
   eager: no
@@ -58,21 +48,23 @@ setup:
   train: yes
   weights:
   weights_config:
-  lr: 0.0005
-  num_events_validation: 200
-  num_epochs: 50
-  dtype: float32
+  lr: 0.001
+  num_epochs: 100
+  dtype: float16
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: none  # exponentialdecay, onecycle, none
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
-  horovod_enabled: False
+  horovod_enabled: no
+  cls_output_as_logits: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
+
+batching:
+  bucket_by_sequence_length: no
 
 optimizer:
   adam:
     amsgrad: no
-    #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16
-    pcgrad: yes
   adamw:
     amsgrad: yes
     weight_decay: 0.001
@@ -94,20 +86,20 @@ onecycle:
 
 parameters:
   model: gnn_dense
-  input_encoding: cms
-  node_update_mode: concat
-  do_node_encoding: no
-  node_encoding_hidden_dim: 128
+  input_encoding: clic
+  node_update_mode: additive
+  do_node_encoding: yes
+  node_encoding_hidden_dim: 256
 
   combined_graph_layer:
-    bin_size: 100
+    bin_size: 256
     max_num_bins: 200
-    distance_dim: 64
+    distance_dim: 128
     layernorm: yes
     dropout: 0.0
     dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
+    ffn_dist_num_layers: 3
+    ffn_dist_hidden_dim: 64
 
     # MPNN
     #kernel:
@@ -131,18 +123,20 @@ parameters:
     num_node_messages: 2
     node_message:
       type: GHConvDense
-      output_dim: 128
+      output_dim: 256
       activation: elu
       #if this is enabled, it will break float16 training
-      normalize_degrees: yes
+      normalize_degrees: no
     activation: elu
 
-  num_graph_layers_id: 2
-  num_graph_layers_reg: 2
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
   output_decoding:
     activation: elu
     regression_use_classification: yes
-    dropout: 0.0
+    dropout: 0.1
+
+    pt_as_correction: no
 
     id_dim_decrease: yes
     charge_dim_decrease: yes
@@ -151,23 +145,23 @@ parameters:
     phi_dim_decrease: yes
     energy_dim_decrease: yes
 
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
+    id_hidden_dim: 128
+    charge_hidden_dim: 128
+    pt_hidden_dim: 128
+    eta_hidden_dim: 128
+    phi_hidden_dim: 128
+    energy_hidden_dim: 128
+
+    id_num_layers: 1
+    charge_num_layers: 1
+    pt_num_layers: 1
+    eta_num_layers: 1
+    phi_num_layers: 1
+    energy_num_layers: 1
     layernorm: yes
-    mask_reg_cls0: no
+    mask_reg_cls0: yes
 
-  skip_connection: yes
+  skip_connection: no
   debug: no
 
 timing:
@@ -220,30 +214,52 @@ raytune:
 
 train_test_datasets:
   physical:
-    batch_per_gpu: 5
+    batch_per_gpu: 40
     datasets:
-      - cms_pf_ttbar
-      - cms_pf_ztt
-      - cms_pf_qcd
-      - cms_pf_qcd_high_pt
+      - clic_edm_ttbar_pf
+
+do_validation_callback: false
+validation_dataset: clic_edm_ttbar_pf
+validation_batch_size: 100
+validation_num_events: 2000
 
-validation_datasets:
-  - cms_pf_qcd_high_pt
+evaluation_datasets:
+  clic_edm_qq_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ttbar_pu10_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_zh_tautau_pf:
+    batch_size: 50
+    num_events: -1
+  clic_edm_ww_fullhad_pf:
+    batch_size: 50
+    num_events: -1
+
+evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
-  cms_pf_ttbar:
+  clic_edm_ttbar_pf:
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_ttbar_pu10_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_ztt:
+  clic_edm_qq_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_qcd:
+  clic_edm_ww_fullhad_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
-  cms_pf_qcd_high_pt:
+  clic_edm_zh_tautau_pf:
     version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/clic-fp16.yaml b/parameters/mixedprecision/clic_fp32_bs1.yaml
similarity index 77%
rename from parameters/clic-fp16.yaml
rename to parameters/mixedprecision/clic_fp32_bs1.yaml
index 66ae4250f..53622e9ef 100644
--- a/parameters/clic-fp16.yaml
+++ b/parameters/mixedprecision/clic_fp32_bs1.yaml
@@ -12,15 +12,19 @@ dataset:
   reg_weight_by_pt: no
 
 loss:
-  classification_loss_coef: 100.0
+  classification_loss_coef: 200.0
   charge_loss_coef: 1.0
   pt_loss_coef: 10.0
   eta_loss_coef: 10.0
   sin_phi_loss_coef: 10.0
   cos_phi_loss_coef: 10.0
   energy_loss_coef: 10.0
+  cls_loss:
+    type: SigmoidFocalCrossEntropy
+    from_logits: yes
+    gamma: 2.0
   charge_loss:
-    type: BinaryCrossentropy
+    type: CategoricalCrossentropy
     from_logits: yes
   energy_loss:
     type: Huber
@@ -44,39 +48,20 @@ setup:
   train: yes
   weights:
   weights_config:
-  lr: 0.0002
-  num_epochs: 200
-  dtype: float16
+  lr: 0.0005
+  num_epochs: 100
+  dtype: float32
   trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: cosinedecay  # cosinedecay, exponentialdecay, onecycle, none
+  lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
   optimizer: adam  # adam, adamw, sgd
   horovod_enabled: no
   cls_output_as_logits: yes
-  small_graph_opt: yes
+  small_graph_opt: no
+  normalizer_cache: parameters/clic_normalizations
 
 batching:
   # if enabled, use dynamic batching instead of the fixed-size batches configured in batch_per_gpu
-  bucket_by_sequence_length: yes
-  # these sizes were sort of tuned for an 8GB GPU
-  # - max_sequence_length, batch_size_per_gpu
-
-#on 8GB GPU
-  bucket_batch_sizes:
-    - 25, 200
-    - 50, 100
-    - 100, 50
-    - 200, 20
-    - 500, 10
-    - 1000, 5
-    - 2000, 3
-    - 3000, 2
-    - 4000, 2
-    - 5000, 1
-    - 6000, 1
-    - inf, 1
-  # use this batch multiplier to increase all batch sizes by a constant factor
-  batch_multiplier: 1
+  bucket_by_sequence_length: no
 
 optimizer:
   adam:
@@ -108,14 +93,14 @@ parameters:
   node_encoding_hidden_dim: 256
 
   combined_graph_layer:
-    bin_size: 640
+    bin_size: 256
     max_num_bins: 200
     distance_dim: 128
     layernorm: yes
     dropout: 0.0
     dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
+    ffn_dist_num_layers: 3
+    ffn_dist_hidden_dim: 64
 
     # MPNN
     #kernel:
@@ -145,8 +130,8 @@ parameters:
       normalize_degrees: no
     activation: elu
 
-  num_graph_layers_id: 5
-  num_graph_layers_reg: 5
+  num_graph_layers_id: 6
+  num_graph_layers_reg: 6
   output_decoding:
     activation: elu
     regression_use_classification: yes
@@ -161,19 +146,19 @@ parameters:
     phi_dim_decrease: yes
     energy_dim_decrease: yes
 
-    id_hidden_dim: 512
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
+    id_hidden_dim: 128
+    charge_hidden_dim: 128
+    pt_hidden_dim: 128
+    eta_hidden_dim: 128
+    phi_hidden_dim: 128
+    energy_hidden_dim: 128
 
-    id_num_layers: 3
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
+    id_num_layers: 1
+    charge_num_layers: 1
+    pt_num_layers: 1
+    eta_num_layers: 1
+    phi_num_layers: 1
+    energy_num_layers: 1
     layernorm: yes
     mask_reg_cls0: yes
 
@@ -230,11 +215,11 @@ raytune:
 
 train_test_datasets:
   physical:
-    batch_per_gpu: 1
+    batch_per_gpu: 20
     datasets:
       - clic_edm_ttbar_pf
-      - clic_edm_qq_pf
 
+do_validation_callback: false
 validation_dataset: clic_edm_ttbar_pf
 validation_batch_size: 100
 validation_num_events: 2000
@@ -246,6 +231,9 @@ evaluation_datasets:
   clic_edm_ttbar_pf:
     batch_size: 50
     num_events: -1
+  clic_edm_ttbar_pu10_pf:
+    batch_size: 50
+    num_events: -1
   clic_edm_zh_tautau_pf:
     batch_size: 50
     num_events: -1
@@ -257,18 +245,22 @@ evaluation_jet_algo: ee_genkt_algorithm
 
 datasets:
   clic_edm_ttbar_pf:
-    version: 1.3.0
+    version: 1.4.0
+    data_dir:
+    manual_dir:
+  clic_edm_ttbar_pu10_pf:
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_qq_pf:
-    version: 1.3.1
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_ww_fullhad_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
   clic_edm_zh_tautau_pf:
-    version: 1.3.0
+    version: 1.4.0
     data_dir:
     manual_dir:
diff --git a/parameters/test-eventloss/genjet_logcosh.yaml b/parameters/test-eventloss/genjet_logcosh.yaml
deleted file mode 100644
index 5ba5adb13..000000000
--- a/parameters/test-eventloss/genjet_logcosh.yaml
+++ /dev/null
@@ -1,249 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: gen
-  num_input_features: 41
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8)
-  num_output_classes: 9
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  cls_weight_by_pt: no
-
-loss:
-  classification_loss_coef: 1.0
-  charge_loss_coef: 1.0
-  pt_loss_coef: 1.0
-  eta_loss_coef: 1.0
-  sin_phi_loss_coef: 1.0
-  cos_phi_loss_coef: 1.0
-  energy_loss_coef: 1.0
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-  event_loss: gen_jet_logcosh
-  event_loss_coef: 1.0
-  met_loss: none
-  met_loss_coef: 0.0
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 0.0005
-  num_events_validation: 200
-  num_epochs: 50
-  dtype: float32
-  trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: none  # exponentialdecay, onecycle, none
-  optimizer: adam  # adam, adamw, sgd
-  horovod_enabled: False
-
-optimizer:
-  adam:
-    amsgrad: no
-    #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16
-    pcgrad: yes
-  adamw:
-    amsgrad: yes
-    weight_decay: 0.001
-  sgd:
-    nesterov: no
-    momentum: 0.9
-
-# LR Schedules
-exponentialdecay:
-  decay_steps: 2000
-  decay_rate: 0.99
-  staircase: yes
-onecycle:
-  mom_min: 0.85
-  mom_max: 0.95
-  warmup_ratio: 0.3
-  div_factor: 25.0
-  final_div: 100000.0
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  node_update_mode: concat
-  do_node_encoding: no
-  node_encoding_hidden_dim: 128
-
-  combined_graph_layer:
-    bin_size: 100
-    max_num_bins: 200
-    distance_dim: 64
-    layernorm: yes
-    dropout: 0.0
-    dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
-
-    # MPNN
-    #kernel:
-    # type: NodePairTrainableKernel
-    # activation: elu
-    #num_node_messages: 1
-    #node_message:
-    # type: NodeMessageLearnable
-    # output_dim: 64
-    # hidden_dim: 128
-    # num_layers: 2
-    # activation: elu
-    #activation: elu
-
-    # GCN
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-      dist_norm: l2
-    num_node_messages: 2
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: elu
-      #if this is enabled, it will break float16 training
-      normalize_degrees: yes
-    activation: elu
-
-  num_graph_layers_id: 2
-  num_graph_layers_reg: 2
-  output_decoding:
-    activation: elu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: yes
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-callbacks:
-  checkpoint:
-    monitor: "val_loss"
-  plot_freq: 1
-  tensorboard:
-    dump_history: yes
-    hist_freq: 1
-
-hypertune:
-  algorithm: hyperband  # random, bayesian, hyperband
-  random:
-    objective: val_loss
-    max_trials: 100
-  bayesian:
-    objective: val_loss
-    max_trials: 100
-    num_initial_points: 2
-  hyperband:
-    objective: val_loss
-    max_epochs: 10
-    factor: 3
-    iterations: 1
-    executions_per_trial: 1
-
-raytune:
-  local_dir:  # Note: please specify an absolute path
-  sched:  asha # asha, hyperband
-  search_alg:  # bayes, bohb, hyperopt, nevergrad, scikit
-  default_metric: "val_loss"
-  default_mode: "min"
-  # Tune schedule specific parameters
-  asha:
-    max_t: 200
-    reduction_factor: 4
-    brackets: 1
-    grace_period: 10
-  hyperband:
-    max_t: 200
-    reduction_factor: 4
-  hyperopt:
-    n_random_steps: 10
-  nevergrad:
-    n_random_steps: 10
-
-train_test_datasets:
-  physical:
-    batch_per_gpu: 5
-    datasets:
-      - cms_pf_ttbar
-      - cms_pf_ztt
-      - cms_pf_qcd
-      - cms_pf_qcd_high_pt
-
-validation_datasets:
-  - cms_pf_qcd_high_pt
-
-datasets:
-  cms_pf_ttbar:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_ztt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd_high_pt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
diff --git a/parameters/test-eventloss/h2d.yaml b/parameters/test-eventloss/h2d.yaml
deleted file mode 100644
index 71a0e9487..000000000
--- a/parameters/test-eventloss/h2d.yaml
+++ /dev/null
@@ -1,249 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: gen
-  num_input_features: 41
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8)
-  num_output_classes: 9
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  cls_weight_by_pt: no
-
-loss:
-  classification_loss_coef: 1.0
-  charge_loss_coef: 1.0
-  pt_loss_coef: 1.0
-  eta_loss_coef: 1.0
-  sin_phi_loss_coef: 1.0
-  cos_phi_loss_coef: 1.0
-  energy_loss_coef: 1.0
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-  event_loss: hist_2d
-  event_loss_coef: 1.0
-  met_loss: none
-  met_loss_coef: 0.0
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 0.0005
-  num_events_validation: 200
-  num_epochs: 50
-  dtype: float32
-  trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: none  # exponentialdecay, onecycle, none
-  optimizer: adam  # adam, adamw, sgd
-  horovod_enabled: False
-
-optimizer:
-  adam:
-    amsgrad: no
-    #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16
-    pcgrad: yes
-  adamw:
-    amsgrad: yes
-    weight_decay: 0.001
-  sgd:
-    nesterov: no
-    momentum: 0.9
-
-# LR Schedules
-exponentialdecay:
-  decay_steps: 2000
-  decay_rate: 0.99
-  staircase: yes
-onecycle:
-  mom_min: 0.85
-  mom_max: 0.95
-  warmup_ratio: 0.3
-  div_factor: 25.0
-  final_div: 100000.0
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  node_update_mode: concat
-  do_node_encoding: no
-  node_encoding_hidden_dim: 128
-
-  combined_graph_layer:
-    bin_size: 100
-    max_num_bins: 200
-    distance_dim: 64
-    layernorm: yes
-    dropout: 0.0
-    dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
-
-    # MPNN
-    #kernel:
-    # type: NodePairTrainableKernel
-    # activation: elu
-    #num_node_messages: 1
-    #node_message:
-    # type: NodeMessageLearnable
-    # output_dim: 64
-    # hidden_dim: 128
-    # num_layers: 2
-    # activation: elu
-    #activation: elu
-
-    # GCN
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-      dist_norm: l2
-    num_node_messages: 2
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: elu
-      #if this is enabled, it will break float16 training
-      normalize_degrees: yes
-    activation: elu
-
-  num_graph_layers_id: 2
-  num_graph_layers_reg: 2
-  output_decoding:
-    activation: elu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: yes
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-callbacks:
-  checkpoint:
-    monitor: "val_loss"
-  plot_freq: 1
-  tensorboard:
-    dump_history: yes
-    hist_freq: 1
-
-hypertune:
-  algorithm: hyperband  # random, bayesian, hyperband
-  random:
-    objective: val_loss
-    max_trials: 100
-  bayesian:
-    objective: val_loss
-    max_trials: 100
-    num_initial_points: 2
-  hyperband:
-    objective: val_loss
-    max_epochs: 10
-    factor: 3
-    iterations: 1
-    executions_per_trial: 1
-
-raytune:
-  local_dir:  # Note: please specify an absolute path
-  sched:  asha # asha, hyperband
-  search_alg:  # bayes, bohb, hyperopt, nevergrad, scikit
-  default_metric: "val_loss"
-  default_mode: "min"
-  # Tune schedule specific parameters
-  asha:
-    max_t: 200
-    reduction_factor: 4
-    brackets: 1
-    grace_period: 10
-  hyperband:
-    max_t: 200
-    reduction_factor: 4
-  hyperopt:
-    n_random_steps: 10
-  nevergrad:
-    n_random_steps: 10
-
-train_test_datasets:
-  physical:
-    batch_per_gpu: 5
-    datasets:
-      - cms_pf_ttbar
-      - cms_pf_ztt
-      - cms_pf_qcd
-      - cms_pf_qcd_high_pt
-
-validation_datasets:
-  - cms_pf_qcd_high_pt
-
-datasets:
-  cms_pf_ttbar:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_ztt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd_high_pt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
diff --git a/parameters/test-eventloss/swd.yaml b/parameters/test-eventloss/swd.yaml
deleted file mode 100644
index 8be1dc3d4..000000000
--- a/parameters/test-eventloss/swd.yaml
+++ /dev/null
@@ -1,249 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: gen
-  num_input_features: 41
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7, tau=8)
-  num_output_classes: 9
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  cls_weight_by_pt: no
-
-loss:
-  classification_loss_coef: 1.0
-  charge_loss_coef: 1.0
-  pt_loss_coef: 1.0
-  eta_loss_coef: 1.0
-  sin_phi_loss_coef: 1.0
-  cos_phi_loss_coef: 1.0
-  energy_loss_coef: 1.0
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-  event_loss: sliced_wasserstein
-  event_loss_coef: 1.0
-  met_loss: none
-  met_loss_coef: 0.0
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 0.0005
-  num_events_validation: 200
-  num_epochs: 50
-  dtype: float32
-  trainable:
-  classification_loss_type: sigmoid_focal_crossentropy
-  lr_schedule: none  # exponentialdecay, onecycle, none
-  optimizer: adam  # adam, adamw, sgd
-  horovod_enabled: False
-
-optimizer:
-  adam:
-    amsgrad: no
-    #pcgrad does not work with LossScaleOptimizer, so it must be disabled for float16
-    pcgrad: yes
-  adamw:
-    amsgrad: yes
-    weight_decay: 0.001
-  sgd:
-    nesterov: no
-    momentum: 0.9
-
-# LR Schedules
-exponentialdecay:
-  decay_steps: 2000
-  decay_rate: 0.99
-  staircase: yes
-onecycle:
-  mom_min: 0.85
-  mom_max: 0.95
-  warmup_ratio: 0.3
-  div_factor: 25.0
-  final_div: 100000.0
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  node_update_mode: concat
-  do_node_encoding: no
-  node_encoding_hidden_dim: 128
-
-  combined_graph_layer:
-    bin_size: 100
-    max_num_bins: 200
-    distance_dim: 64
-    layernorm: yes
-    dropout: 0.0
-    dist_activation: elu
-    ffn_dist_num_layers: 2
-    ffn_dist_hidden_dim: 128
-
-    # MPNN
-    #kernel:
-    # type: NodePairTrainableKernel
-    # activation: elu
-    #num_node_messages: 1
-    #node_message:
-    # type: NodeMessageLearnable
-    # output_dim: 64
-    # hidden_dim: 128
-    # num_layers: 2
-    # activation: elu
-    #activation: elu
-
-    # GCN
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-      dist_norm: l2
-    num_node_messages: 2
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: elu
-      #if this is enabled, it will break float16 training
-      normalize_degrees: yes
-    activation: elu
-
-  num_graph_layers_id: 2
-  num_graph_layers_reg: 2
-  output_decoding:
-    activation: elu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: yes
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-callbacks:
-  checkpoint:
-    monitor: "val_loss"
-  plot_freq: 1
-  tensorboard:
-    dump_history: yes
-    hist_freq: 1
-
-hypertune:
-  algorithm: hyperband  # random, bayesian, hyperband
-  random:
-    objective: val_loss
-    max_trials: 100
-  bayesian:
-    objective: val_loss
-    max_trials: 100
-    num_initial_points: 2
-  hyperband:
-    objective: val_loss
-    max_epochs: 10
-    factor: 3
-    iterations: 1
-    executions_per_trial: 1
-
-raytune:
-  local_dir:  # Note: please specify an absolute path
-  sched:  asha # asha, hyperband
-  search_alg:  # bayes, bohb, hyperopt, nevergrad, scikit
-  default_metric: "val_loss"
-  default_mode: "min"
-  # Tune schedule specific parameters
-  asha:
-    max_t: 200
-    reduction_factor: 4
-    brackets: 1
-    grace_period: 10
-  hyperband:
-    max_t: 200
-    reduction_factor: 4
-  hyperopt:
-    n_random_steps: 10
-  nevergrad:
-    n_random_steps: 10
-
-train_test_datasets:
-  physical:
-    batch_per_gpu: 5
-    datasets:
-      - cms_pf_ttbar
-      - cms_pf_ztt
-      - cms_pf_qcd
-      - cms_pf_qcd_high_pt
-
-validation_datasets:
-  - cms_pf_qcd_high_pt
-
-datasets:
-  cms_pf_ttbar:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_ztt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
-  cms_pf_qcd_high_pt:
-    version: 1.4.0
-    data_dir:
-    manual_dir:
diff --git a/parameters/test-gnn/cms-0l.yaml b/parameters/test-gnn/cms-0l.yaml
deleted file mode 100644
index 5977abbc6..000000000
--- a/parameters/test-gnn/cms-0l.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 20
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: no
-    bin_size: 160
-    max_num_bins: 100
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: gelu
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-    num_node_messages: 1
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: gelu
-      normalize_degrees: yes
-    hidden_dim: 128
-    activation: gelu
-  num_graph_layers_common: 0
-  num_graph_layers_energy: 0
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/parameters/test-gnn/cms-lsh-1l.yaml b/parameters/test-gnn/cms-lsh-1l.yaml
deleted file mode 100644
index c8c4dfb7e..000000000
--- a/parameters/test-gnn/cms-lsh-1l.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 10
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: yes
-    bin_size: 160
-    max_num_bins: 100
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: gelu
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-    num_node_messages: 1
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: gelu
-      normalize_degrees: yes
-    hidden_dim: 128
-    activation: gelu
-  num_graph_layers_common: 1
-  num_graph_layers_energy: 1
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/parameters/test-gnn/cms-lsh-2l.yaml b/parameters/test-gnn/cms-lsh-2l.yaml
deleted file mode 100644
index 5eb0a83f2..000000000
--- a/parameters/test-gnn/cms-lsh-2l.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 5
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: yes
-    bin_size: 160
-    max_num_bins: 100
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: gelu
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-    num_node_messages: 1
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: gelu
-      normalize_degrees: yes
-    hidden_dim: 128
-    activation: gelu
-  num_graph_layers_common: 2
-  num_graph_layers_energy: 2
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/parameters/test-gnn/cms-lsh-3l.yaml b/parameters/test-gnn/cms-lsh-3l.yaml
deleted file mode 100644
index 6ac8b76c7..000000000
--- a/parameters/test-gnn/cms-lsh-3l.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 5
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: yes
-    bin_size: 160
-    max_num_bins: 100
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: gelu
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-    num_node_messages: 1
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: gelu
-      normalize_degrees: yes
-    hidden_dim: 128
-    activation: gelu
-  num_graph_layers_common: 3
-  num_graph_layers_energy: 3
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/parameters/test-gnn/cms-lsh-mpnn.yaml b/parameters/test-gnn/cms-lsh-mpnn.yaml
deleted file mode 100644
index 291cd98a5..000000000
--- a/parameters/test-gnn/cms-lsh-mpnn.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 4
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: yes
-    bin_size: 32
-    max_num_bins: 500
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: linear
-    kernel:
-      type: NodePairTrainableKernel
-      output_dim: 8
-      hidden_dim: 32
-      num_layers: 2
-      activation: gelu
-    node_message:
-      type: NodeMessageLearnable
-      output_dim: 256
-      hidden_dim: 128
-      num_layers: 2
-      activation: gelu
-      aggregation_direction: src
-    num_node_messages: 1
-    hidden_dim: 256
-    activation: gelu
-  num_graph_layers_common: 2
-  num_graph_layers_energy: 2
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/parameters/test-gnn/cms-nolsh-1l.yaml b/parameters/test-gnn/cms-nolsh-1l.yaml
deleted file mode 100644
index 697aac9ed..000000000
--- a/parameters/test-gnn/cms-nolsh-1l.yaml
+++ /dev/null
@@ -1,149 +0,0 @@
-backend: tensorflow
-
-dataset:
-  schema: cms
-  target_particles: cand
-  num_input_features: 15
-  num_output_features: 7
-#       NONE = 0,
-#       TRACK = 1,
-#       PS1 = 2,
-#       PS2 = 3,
-#       ECAL = 4,
-#       HCAL = 5,
-#       GSF = 6,
-#       BREM = 7,
-#       HFEM = 8,
-#       HFHAD = 9,
-#       SC = 10,
-#       HO = 11,
-  num_input_classes: 12
-  #(none=0, ch.had=1, n.had=2, hfem=3, hfhad=4, gamma=5, e=6, mu=7)
-  num_output_classes: 8
-  padded_num_elem_size: 6400
-  #(pt, eta, sin phi, cos phi, E)
-  num_momentum_outputs: 5
-  classification_loss_coef: 1.0
-  charge_loss_coef: 0.01
-  pt_loss_coef: 0.0001
-  eta_loss_coef: 100.0
-  sin_phi_loss_coef: 10.0
-  cos_phi_loss_coef: 10.0
-  energy_loss_coef: 0.0001
-  raw_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl*
-  processed_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/tfr_cand/*.tfrecords
-  num_files_per_chunk: 1
-  validation_file_path: data/TTbar_14TeV_TuneCUETP8M1_cfi/val/*.pkl*
-  energy_loss:
-    type: Huber
-  pt_loss:
-    type: Huber
-  sin_phi_loss:
-    type: Huber
-    delta: 0.1
-  cos_phi_loss:
-    type: Huber
-    delta: 0.1
-  eta_loss:
-    type: Huber
-    delta: 0.1
-
-tensorflow:
-  eager: no
-
-setup:
-  train: yes
-  weights:
-  weights_config:
-  lr: 1e-3
-  batch_size: 2
-  num_events_train: 1000
-  num_events_test: 1000
-  num_epochs: 50
-  num_val_files: 20
-  dtype: float32
-  trainable: classification
-  classification_loss_type: categorical_cross_entropy
-  lr_schedule: exponentialdecay  # exponentialdecay, onecycle
-
-sample_weights:
-  cls: inverse_sqrt
-  charge: signal_only
-  pt: signal_only
-  eta: signal_only
-  sin_phi: signal_only
-  cos_phi: signal_only
-  energy: signal_only
-
-parameters:
-  model: gnn_dense
-  input_encoding: cms
-  do_node_encoding: no
-  hidden_dim: 128
-  dropout: 0.0
-  activation: gelu
-  combined_graph_layer:
-    do_lsh: no
-    bin_size: 160
-    max_num_bins: 100
-    distance_dim: 128
-    layernorm: no
-    dropout: 0.0
-    dist_activation: gelu
-    kernel:
-      type: NodePairGaussianKernel
-      dist_mult: 0.1
-      clip_value_low: 0.0
-    num_node_messages: 1
-    node_message:
-      type: GHConvDense
-      output_dim: 128
-      activation: gelu
-      normalize_degrees: yes
-    hidden_dim: 128
-    activation: gelu
-  num_graph_layers_common: 1
-  num_graph_layers_energy: 1
-  output_decoding:
-    activation: gelu
-    regression_use_classification: yes
-    dropout: 0.0
-
-    pt_skip_gate: no
-    eta_skip_gate: yes
-    phi_skip_gate: yes
-
-    id_dim_decrease: yes
-    charge_dim_decrease: yes
-    pt_dim_decrease: yes
-    eta_dim_decrease: yes
-    phi_dim_decrease: yes
-    energy_dim_decrease: yes
-
-    id_hidden_dim: 256
-    charge_hidden_dim: 256
-    pt_hidden_dim: 256
-    eta_hidden_dim: 256
-    phi_hidden_dim: 256
-    energy_hidden_dim: 256
-
-    id_num_layers: 2
-    charge_num_layers: 2
-    pt_num_layers: 2
-    eta_num_layers: 2
-    phi_num_layers: 2
-    energy_num_layers: 2
-    layernorm: no
-    mask_reg_cls0: no
-
-  skip_connection: yes
-  debug: no
-
-timing:
-  num_ev: 100
-  num_iter: 3
-
-exponentialdecay:
-  decay_steps: 1000
-  decay_rate: 0.98
-  staircase: yes
diff --git a/requirements.txt b/requirements.txt
index 739c0fc2f..b3d2f1257 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,6 +20,7 @@ pandas
 papermill
 plotly
 pre-commit
+protobuf==3.20.3
 pyarrow
 ray[default]
 ray[tune]
@@ -29,11 +30,9 @@ scipy
 seaborn
 setGPU
 tensorflow
-tensorflow-addons
 tensorflow-datasets==4.8.0
 tensorflow-estimator
 tensorflow-probability
-tensorflow-text
 tf-models-official
 tf2onnx
 tqdm
diff --git a/scripts/generate_tfds.sh b/scripts/generate_tfds.sh
index cdd747a74..b642fbe28 100755
--- a/scripts/generate_tfds.sh
+++ b/scripts/generate_tfds.sh
@@ -30,7 +30,7 @@ export CMD="singularity exec -B /local -B /scratch/persistent --env PYTHONPATH=$
 # wait
 
 # CLIC cluster-based
-# export MANUAL_DIR=/local/joosep/mlpf/clic_edm4hep_2023_02_27
+# export MANUAL_DIR=/local/joosep/mlpf/clic_edm4hep_2023_05_09/
 # $CMD mlpf/heptfds/clic_pf_edm4hep/qq --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qq.log &
 # $CMD mlpf/heptfds/clic_pf_edm4hep/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
 # $CMD mlpf/heptfds/clic_pf_edm4hep/zh --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_zh.log &
@@ -39,9 +39,16 @@ export CMD="singularity exec -B /local -B /scratch/persistent --env PYTHONPATH=$
 # wait
 
 # CLIC hit-based
-# export MANUAL_DIR=/local/joosep/mlpf_hits/clic_edm4hep_2023_02_27
-# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/qq --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qq.log &
-# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
+# export MANUAL_DIR=/local/joosep/mlpf_hits/clic_edm4hep/
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/qq --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qq_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_kaon0L --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_kaon0L_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_ele --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_ele_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_pi0 --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_pi0_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_pi --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_pi_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_neutron --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_neutron_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_gamma --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_gamma_hits.log &
+# $CMD mlpf/heptfds/clic_pf_edm4hep_hits/single_mu --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_single_mu_hits.log &
 # wait
 
 # Delphes
diff --git a/scripts/local_test_clic_hits_pipeline.sh b/scripts/local_test_clic_hits_pipeline.sh
new file mode 100755
index 000000000..48bb2f9e1
--- /dev/null
+++ b/scripts/local_test_clic_hits_pipeline.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+export TFDS_DATA_DIR=`pwd`/tensorflow_datasets
+export PYTHONPATH=`pwd`/mlpf:$PYTHONPATH
+
+rm -Rf data/p8_ee_tt_ecm380
+mkdir -p data/p8_ee_tt_ecm380
+cd data/p8_ee_tt_ecm380
+
+#download some test data
+wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/clic_edm4hep_2023_02_27/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_1.root
+wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/clic_edm4hep_2023_02_27/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_2.root
+
+cd ../..
+
+python3 fcc/postprocessing_hits.py data/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_1.root data/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_1.parquet
+python3 fcc/postprocessing_hits.py data/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_2.root data/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_2.parquet
+
+tfds build mlpf/heptfds/clic_pf_edm4hep_hits/ttbar --manual_dir data
+
+# #Train, evaluate and make plots
+python mlpf/pipeline.py train --config parameters/clic-hits.yaml --nepochs 1 --customize pipeline_test --ntrain 2 --ntest 2
+python mlpf/pipeline.py evaluate --nevents 10 --customize pipeline_test --train-dir ./experiments/clic* --weights ./experiments/clic*/weights/weights-01-*.hdf5
+python mlpf/pipeline.py plots --train-dir ./experiments/clic*
diff --git a/scripts/local_test_clic_pipeline.sh b/scripts/local_test_clic_pipeline.sh
index f6e25eb7d..5e825e9b5 100755
--- a/scripts/local_test_clic_pipeline.sh
+++ b/scripts/local_test_clic_pipeline.sh
@@ -19,9 +19,9 @@ python3 fcc/postprocessing.py data/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_2.root d
 tfds build mlpf/heptfds/clic_pf_edm4hep/ttbar --manual_dir data
 
 # #Train, evaluate and make plots
-python mlpf/pipeline.py train --config parameters/clic.yaml --nepochs 1 --customize pipeline_test
-python mlpf/pipeline.py evaluate --nevents 100 --customize pipeline_test --train-dir ./experiments/clic* --weights ./experiments/clic*/weights/weights-01-*.hdf5
+python mlpf/pipeline.py train --config parameters/clic.yaml --nepochs 1 --customize pipeline_test --ntrain 10 --ntest 10
+python mlpf/pipeline.py evaluate --nevents 10 --customize pipeline_test --train-dir ./experiments/clic* --weights ./experiments/clic*/weights/weights-01-*.hdf5
 python mlpf/pipeline.py plots --train-dir ./experiments/clic*
 
 #try to train a fp16 model
-python mlpf/pipeline.py train --config parameters/clic-fp16.yaml --nepochs 1 --customize pipeline_test
+python mlpf/pipeline.py train --config parameters/mixedprecision/clic_fp16_bs1.yaml --nepochs 1 --customize pipeline_test --ntrain 10 --ntest 10