diff --git a/fcc/p8_ee_ZH_Htautau_ecm380.cmd b/fcc/p8_ee_ZH_Htautau_ecm380.cmd
new file mode 100644
index 000000000..142564055
--- /dev/null
+++ b/fcc/p8_ee_ZH_Htautau_ecm380.cmd
@@ -0,0 +1,27 @@
+Random:setSeed = on
+Main:numberOfEvents = 1000         ! number of events to generate
+Main:timesAllowErrors = 5          ! how many aborts before run stops
+
+! 2) Settings related to output in init(), next() and stat().
+Init:showChangedSettings = on      ! list changed settings
+Init:showChangedParticleData = off ! list changed particle data
+Next:numberCount = 100             ! print message every n events
+Next:numberShowInfo = 1            ! print event information n times
+Next:numberShowProcess = 1         ! print process record n times
+Next:numberShowEvent = 0           ! print event record n times
+
+Beams:idA = 11                   ! first beam, e+ = 11
+Beams:idB = -11                   ! second beam, e- = -11
+
+! 3) Hard process : ZH at 365 GeV
+Beams:eCM = 380  ! CM energy of collision
+HiggsSM:ffbar2HZ = on
+
+! 4) Settings for the event generation process in the Pythia8 library.
+PartonLevel:ISR = on               ! initial-state radiation
+PartonLevel:FSR = on               ! final-state radiation
+
+! 5) Non-standard settings; exemplifies tuning possibilities.
+25:m0        = 125.0               ! Higgs mass
+25:onMode    = off
+25:onIfAny   = 15
\ No newline at end of file
diff --git a/fcc/p8_ee_gg_ecm365.cmd b/fcc/p8_ee_gg_ecm365.cmd
new file mode 100644
index 000000000..95c2a39a8
--- /dev/null
+++ b/fcc/p8_ee_gg_ecm365.cmd
@@ -0,0 +1,27 @@
+Random:setSeed = on
+Main:numberOfEvents = 3000         ! number of events to generate
+Main:timesAllowErrors = 5          ! how many aborts before run stops
+
+
+! 2) Settings related to output in init(), next() and stat().
+Init:showChangedSettings = on      ! list changed settings
+Init:showChangedParticleData = off ! list changed particle data
+Next:numberCount = 100            ! print message every n events
+Next:numberShowInfo = 1            ! print event information n times
+Next:numberShowProcess = 1         ! print process record n times
+Next:numberShowEvent = 0           ! print event record n times
+Stat:showPartonLevel = off
+
+! 3) Beam parameter settings. Values below agree with default ones.
+Beams:idA = 11                   ! first beam, e- = 11
+Beams:idB = -11                  ! second beam, e+ = -11
+
+! 4) Hard process : photon collisions at 365
+Beams:eCM = 365
+PhotonCollision:gmgm2qqbar = on
+PhotonCollision:gmgm2ccbar = on
+PhotonCollision:gmgm2bbbar = on
+
+PartonLevel:ISR = on               ! initial-state radiation
+PartonLevel:FSR = on               ! final-state radiation
+
diff --git a/fcc/p8_ee_qcd_ecm365.cmd b/fcc/p8_ee_qcd_ecm365.cmd
new file mode 100644
index 000000000..8d2bfb389
--- /dev/null
+++ b/fcc/p8_ee_qcd_ecm365.cmd
@@ -0,0 +1,27 @@
+Random:setSeed = on
+Main:numberOfEvents = 6000         ! number of events to generate
+Main:timesAllowErrors = 5          ! how many aborts before run stops
+
+
+! 2) Settings related to output in init(), next() and stat().
+Init:showChangedSettings = on      ! list changed settings
+Init:showChangedParticleData = off ! list changed particle data
+Next:numberCount = 100            ! print message every n events
+Next:numberShowInfo = 100            ! print event information n times
+Next:numberShowProcess = 100         ! print process record n times
+Next:numberShowEvent = 100           ! print event record n times
+Stat:showPartonLevel = off
+
+! 3) Beam parameter settings.
+Beams:idA = 11                   ! first beam, e- = 11
+Beams:idB = -11                  ! second beam, e+ = -11
+
+! s-channel gamma/Z 
+Beams:eCM = 365               ! CM energy of collision
+HardQCD:all = on
+WeakSingleBoson:ffbar2ffbar(s:gmZ) = on
+
+! Decay Z to quarks
+23:onMode   = off
+23:onIfAny = 1 2 3 4 5 6
+
diff --git a/fcc/p8_ee_qcd_ecm380.cmd b/fcc/p8_ee_qcd_ecm380.cmd
new file mode 100644
index 000000000..35c64fae1
--- /dev/null
+++ b/fcc/p8_ee_qcd_ecm380.cmd
@@ -0,0 +1,22 @@
+Random:setSeed = on
+Main:numberOfEvents = 6000         ! number of events to generate
+Main:timesAllowErrors = 5          ! how many aborts before run stops
+
+
+! 2) Settings related to output in init(), next() and stat().
+Init:showChangedSettings = on      ! list changed settings
+Init:showChangedParticleData = off ! list changed particle data
+Next:numberCount = 100            ! print message every n events
+Next:numberShowInfo = 100            ! print event information n times
+Next:numberShowProcess = 100         ! print process record n times
+Next:numberShowEvent = 100           ! print event record n times
+Stat:showPartonLevel = off
+
+! 3) Beam parameter settings.
+Beams:idA = 11                   ! first beam, e- = 11
+Beams:idB = -11                  ! second beam, e+ = -11
+
+! s-channel gamma/Z 
+Beams:eCM = 380               ! CM energy of collision
+HardQCD:all = on
+WeakSingleBoson:ffbar2ffbar(s:gmZ) = on
diff --git a/fcc/postprocessing.py b/fcc/postprocessing.py
new file mode 100644
index 000000000..75daf8e76
--- /dev/null
+++ b/fcc/postprocessing.py
@@ -0,0 +1,759 @@
+import bz2
+import numpy as np
+import awkward
+import matplotlib.pyplot as plt
+import uproot
+import vector
+import glob
+import networkx as nx
+import tqdm
+import numba
+import sys
+from scipy.sparse import coo_matrix
+
+track_coll = "SiTracks_Refitted"
+mc_coll = "MCParticles"
+
+#the feature matrices will be saved in this order
+particle_feature_order = ["PDG", "charge", "pt", "eta", "phi", "energy"]
+
+#arrange track and cluster features such that pt (et), eta, phi, p (energy) are in the same spot
+#so we can easily use them in skip connections
+track_feature_order = [
+    "type", "pt", "eta", "phi", "p",
+    "chi2", "ndf", "dEdx", "dEdxError",
+    "radiusOfInnermostHit", "tanLambda", "D0", "omega",
+    "Z0", "time"
+]
+cluster_feature_order = [
+    "type", "et", "eta", "phi", "energy",
+    "position.x", "position.y", "position.z", "iTheta",
+    "energy_ecal", "energy_hcal", "energy_other", "num_hits",
+    "sigma_x", "sigma_y", "sigma_z"
+]
+
+def track_pt(omega):
+    a = 3 * 10**-4
+    b = 4  # B-field in tesla, from clicRec_e4h_input
+
+    return a * np.abs(b / omega)
+
+def map_pdgid_to_candid(pdgid, charge):
+    if pdgid == 0:
+        return 0
+
+    #photon, electron, muon
+    if pdgid in [22, 11, 13]:
+        return pdgid
+
+    # charged hadron
+    if abs(charge) > 0:
+        return 211
+
+    # neutral hadron
+    return 130
+
+def map_charged_to_neutral(pdg):
+    if pdg == 0:
+        return 0
+    if pdg == 11 or pdg == 22:
+        return 22
+    return 130
+
+def map_neutral_to_charged(pdg):
+    if pdg == 130 or pdg == 22:
+        return 211
+    return pdg
+
+def sanitize(arr):
+    arr[np.isnan(arr)] = 0.0 
+    arr[np.isinf(arr)] = 0.0 
+
+class EventData:
+    def __init__(self,
+        gen_features,
+        hit_features,
+        cluster_features,
+        track_features,
+        genparticle_to_hit,
+        genparticle_to_track,
+        hit_to_cluster,
+        ):
+        self.gen_features = gen_features 
+        self.hit_features = hit_features 
+        self.cluster_features = cluster_features 
+        self.track_features = track_features 
+        self.genparticle_to_hit = genparticle_to_hit 
+        self.genparticle_to_track = genparticle_to_track 
+        self.hit_to_cluster = hit_to_cluster 
+
+def get_cluster_subdet_energies(hit_list, hit_data, collectionIDs_reverse, iev):
+    """
+    This function calculates the energy contribution from each of four subdetectors in a particle physics experiment, based on a list of hits and their corresponding data.
+
+    Args:
+    hit_list: a list of tuples, where each tuple contains a collection ID and a hit index
+    hit_data: a dictionary containing data for each hit in the experiment, organized by collection
+    collectionIDs_reverse: a dictionary mapping collection IDs to collection names
+    iev: the event number for the current event
+
+    Returns:
+    A tuple containing the energy contributions from each of the four subdetectors:
+    (ecal_energy, hcal_energy, muon_energy, other_energy)
+    """
+
+    ecal_energy = 0.0
+    hcal_energy = 0.0
+    muon_energy = 0.0
+    other_energy = 0.0
+
+    for coll_id, hit_idx in hit_list:
+        coll = collectionIDs_reverse[coll_id]
+        hit_energy = hit_data[coll][iev][coll+".energy"][hit_idx]
+
+        if coll.startswith("ECAL"):
+            ecal_energy += hit_energy
+        elif coll.startswith("HCAL"):
+            hcal_energy += hit_energy
+        elif coll == "MUON":
+            muon_energy += hit_energy
+        else:
+            other_energy += hit_energy
+
+    return ecal_energy, hcal_energy, muon_energy, other_energy
+
+def hits_to_features(hit_data, iev, coll, feats):
+    feat_arr = {f: hit_data[coll + "." + f][iev] for f in feats}
+
+    sdcoll = "subdetector"
+    feat_arr[sdcoll] = np.zeros(len(feat_arr["type"]), dtype=np.int32)
+    if coll.startswith("ECAL"):
+        feat_arr[sdcoll][:] = 0
+    elif coll.startswith("HCAL"):
+        feat_arr[sdcoll][:] = 1
+    else:
+        feat_arr[sdcoll][:] = 2
+    return awkward.Record(feat_arr)
+
+def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev):
+    feats = ["type", "cellID", "energy", "energyError", "time", "position.x", "position.y", "position.z"]
+    
+    hit_idx_global = 0
+    hit_idx_global_to_local = {}
+    hit_feature_matrix = []
+    for col in sorted(hit_data.keys()):
+        icol = collectionIDs[col]
+        hit_features = hits_to_features(hit_data[col], iev, col, feats)
+        hit_feature_matrix.append(hit_features)
+        for ihit in range(len(hit_data[col][col+".energy"][iev])):
+            hit_idx_global_to_local[hit_idx_global] = (icol, ihit)
+            hit_idx_global += 1
+    hit_idx_local_to_global = {v: k for k, v in hit_idx_global_to_local.items()}
+    hit_feature_matrix = awkward.Record({
+        k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) for k in hit_feature_matrix[0].fields})
+
+    #add all edges from genparticle to calohit
+    calohit_to_gen_weight = calohit_links["CalohitMCTruthLink"]["CalohitMCTruthLink.weight"][iev]
+    calohit_to_gen_calo_colid = calohit_links["CalohitMCTruthLink#0"]["CalohitMCTruthLink#0.collectionID"][iev]
+    calohit_to_gen_gen_colid = calohit_links["CalohitMCTruthLink#1"]["CalohitMCTruthLink#1.collectionID"][iev]
+    calohit_to_gen_calo_idx = calohit_links["CalohitMCTruthLink#0"]["CalohitMCTruthLink#0.index"][iev]
+    calohit_to_gen_gen_idx = calohit_links["CalohitMCTruthLink#1"]["CalohitMCTruthLink#1.index"][iev]
+    genparticle_to_hit_matrix_coo0 = []
+    genparticle_to_hit_matrix_coo1 = []
+    genparticle_to_hit_matrix_w = []
+    for calo_colid, calo_idx, gen_colid, gen_idx, w in zip(calohit_to_gen_calo_colid, calohit_to_gen_calo_idx, calohit_to_gen_gen_colid, calohit_to_gen_gen_idx, calohit_to_gen_weight):
+        genparticle_to_hit_matrix_coo0.append(gen_idx)
+        genparticle_to_hit_matrix_coo1.append(hit_idx_local_to_global[(calo_colid, calo_idx)])
+        genparticle_to_hit_matrix_w.append(w)
+
+    return hit_feature_matrix, (genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w), hit_idx_local_to_global
+
+def hit_cluster_adj(prop_data, hit_idx_local_to_global, iev):
+    coll_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.collectionID"][iev]
+    idx_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.index"][iev]
+    hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev]
+    hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev]
+
+    #index in the array of all hits
+    hit_to_cluster_matrix_coo0 = []
+    #index in the cluster array
+    hit_to_cluster_matrix_coo1 = []
+
+    #weight
+    hit_to_cluster_matrix_w = []
+
+    #loop over all clusters
+    for icluster in range(len(hits_begin)):
+
+        #get the slice in the hit array corresponding to this cluster
+        hbeg = hits_begin[icluster]
+        hend = hits_end[icluster]
+        idx_range = idx_arr[hbeg:hend]
+        coll_range = coll_arr[hbeg:hend]
+
+        #add edges from hit to cluster
+        for icol, idx in zip(coll_range, idx_range):
+            hit_to_cluster_matrix_coo0.append(hit_idx_local_to_global[(icol, idx)])
+            hit_to_cluster_matrix_coo1.append(icluster)
+            hit_to_cluster_matrix_w.append(1.0)
+    return hit_to_cluster_matrix_coo0, hit_to_cluster_matrix_coo1, hit_to_cluster_matrix_w
+
+def gen_to_features(prop_data, iev):
+    gen_arr = prop_data[mc_coll][iev]
+    gen_arr = {k.replace(mc_coll+".", ""): gen_arr[k] for k in gen_arr.fields}
+
+    MCParticles_p4 = vector.awk(awkward.zip({
+        "mass": gen_arr["mass"],
+        "x": gen_arr["momentum.x"],
+        "y": gen_arr["momentum.y"],
+        "z": gen_arr["momentum.z"]}))
+    gen_arr["pt"] = MCParticles_p4.pt
+    gen_arr["eta"] = MCParticles_p4.eta
+    gen_arr["phi"] = MCParticles_p4.phi
+    gen_arr["energy"] = MCParticles_p4.energy
+
+    return awkward.Record({
+        "PDG": gen_arr["PDG"],
+        "generatorStatus": gen_arr["generatorStatus"],
+        "charge": gen_arr["charge"],
+        "pt": gen_arr["pt"],
+        "eta": gen_arr["eta"],
+        "phi": gen_arr["phi"],
+        "energy": gen_arr["energy"],
+        })
+
+def genparticle_track_adj(sitrack_links):
+    trk_to_gen_trkidx = sitrack_links["SiTracksMCTruthLink#0"]["SiTracksMCTruthLink#0.index"][iev]
+    trk_to_gen_genidx = sitrack_links["SiTracksMCTruthLink#1"]["SiTracksMCTruthLink#1.index"][iev]
+    trk_to_gen_w = sitrack_links["SiTracksMCTruthLink"]["SiTracksMCTruthLink.weight"][iev]
+
+    genparticle_to_track_matrix_coo0 = awkward.to_numpy(trk_to_gen_genidx)
+    genparticle_to_track_matrix_coo1 = awkward.to_numpy(trk_to_gen_trkidx)
+    genparticle_to_track_matrix_w = awkward.to_numpy(trk_to_gen_w)
+    
+    return genparticle_to_track_matrix_coo0, genparticle_to_track_matrix_coo1, genparticle_to_track_matrix_w
+
+def cluster_to_features(prop_data, hit_features, hit_to_cluster, iev):
+    cluster_arr = prop_data["PandoraClusters"][iev]
+    feats = ["type", "position.x", "position.y", "position.z", "iTheta", "phi", "energy"]
+    ret = {feat: cluster_arr["PandoraClusters." + feat] for feat in feats}
+
+    hit_idx = np.array(hit_to_cluster[0])
+    cluster_idx = np.array(hit_to_cluster[1])
+    cl_energy_ecal = []
+    cl_energy_hcal = []
+    cl_energy_other = []
+    num_hits = []
+    cl_sigma_x = []
+    cl_sigma_y = []
+    cl_sigma_z = []
+
+    n_cl = len(ret["energy"])
+    for cl in range(n_cl):
+        msk_cl = cluster_idx == cl
+        hits = hit_idx[msk_cl]
+
+        num_hits.append(len(hits))
+
+        subdets = hit_features["subdetector"][hits]
+
+        hits_energy = hit_features["energy"][hits]
+
+        hits_posx = hit_features["position.x"][hits]
+        hits_posy = hit_features["position.y"][hits]
+        hits_posz = hit_features["position.z"][hits]
+
+        energy_ecal = np.sum(hits_energy[subdets==0])
+        energy_hcal = np.sum(hits_energy[subdets==1])
+        energy_other = np.sum(hits_energy[subdets==2])
+
+        cl_energy_ecal.append(energy_ecal)
+        cl_energy_hcal.append(energy_hcal)
+        cl_energy_other.append(energy_other)
+
+        cl_sigma_x.append(np.std(hits_posx))
+        cl_sigma_y.append(np.std(hits_posy))
+        cl_sigma_z.append(np.std(hits_posz))
+
+    ret["energy_ecal"] = np.array(cl_energy_ecal)
+    ret["energy_hcal"] = np.array(cl_energy_hcal)
+    ret["energy_other"] = np.array(cl_energy_other)
+    ret["num_hits"] = np.array(num_hits)
+    ret["sigma_x"] = np.array(cl_sigma_x)
+    ret["sigma_y"] = np.array(cl_sigma_y)
+    ret["sigma_z"] = np.array(cl_sigma_z)
+
+    tt = np.tan(ret["iTheta"] / 2.0)
+    eta = awkward.to_numpy(-np.log(tt, where=tt>0))
+    eta[tt<=0] = 0.0
+    ret["eta"] = eta
+
+    costheta = np.cos(ret["iTheta"])
+    ez = ret["energy"]*costheta
+    ret["et"]  = np.sqrt(ret["energy"]**2 - ez**2)
+
+    #override cluster type with 1
+    ret["type"] = 2*np.ones(n_cl, dtype=np.float32)
+
+    return awkward.Record(ret)
+
+def track_to_features(prop_data, iev):
+    track_arr = prop_data[track_coll][iev]
+    feats_from_track = ["type", "chi2", "ndf", "dEdx", "dEdxError", "radiusOfInnermostHit"]
+    ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track}
+    n_tr = len(ret["type"])
+
+    #FIXME: add additional track features from track state
+
+    #get the index of the first track state
+    trackstate_idx = prop_data[track_coll][track_coll + ".trackStates_begin"][iev]
+    #get the properties of the track at the first track state (at the origin)
+    for k in ["tanLambda", "D0", "phi", "omega", "Z0", "time"]:
+        ret[k] = prop_data["SiTracks_1"]["SiTracks_1." + k][iev][trackstate_idx]
+
+    ret["pt"] = track_pt(ret["omega"])
+    ret["px"] = np.cos(ret["phi"]) * ret["pt"]
+    ret["py"] = np.sin(ret["phi"]) * ret["pt"]
+    ret["pz"] = ret["tanLambda"] * ret["pt"]
+    ret["p"] = np.sqrt(ret["px"]**2 + ret["py"]**2 + ret["pz"]**2)
+    cos_theta = np.divide(ret["pz"], ret["p"], where=ret["p"]>0)
+    theta = np.arccos(cos_theta)
+    tt = np.tan(theta / 2.0)
+    eta = awkward.to_numpy(-np.log(tt, where=tt>0))
+    eta[tt<=0] = 0.0
+    ret["eta"] = eta
+
+    #override track type with 1
+    ret["type"] = 1*np.ones(n_tr, dtype=np.float32)
+
+    return awkward.Record(ret)
+
+def filter_adj(adj, all_to_filtered):
+    i0s_new = []
+    i1s_new = []
+    ws_new = []
+    for i0, i1, w in zip(*adj):
+        if i0 in all_to_filtered:
+            i0_new = all_to_filtered[i0]
+            i0s_new.append(i0_new)
+            i1s_new.append(i1)
+            ws_new.append(w)
+    return np.array(i0s_new), np.array(i1s_new), np.array(ws_new)
+
+def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev):
+    gen_features = gen_to_features(prop_data, iev)
+    hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(hit_data, calohit_links, iev)
+    hit_to_cluster = hit_cluster_adj(prop_data, hit_idx_local_to_global, iev)
+    cluster_features = cluster_to_features(prop_data, hit_features, hit_to_cluster, iev)
+    track_features = track_to_features(prop_data, iev)
+    genparticle_to_track = genparticle_track_adj(sitrack_links)
+
+    n_gp = awkward.count(gen_features["PDG"])
+    n_track = awkward.count(track_features["type"])
+    n_hit = awkward.count(hit_features["type"])
+    n_cluster = awkward.count(cluster_features["type"])
+
+    if len(genparticle_to_track[0])>0:
+        gp_to_track = coo_matrix(
+            (genparticle_to_track[2],
+            (genparticle_to_track[0], genparticle_to_track[1])),
+            shape=(n_gp, n_track)
+        ).max(axis=1).todense()
+    else:
+        gp_to_track = np.zeros((n_gp, 1))
+
+    gp_to_calohit = coo_matrix(
+        (genparticle_to_hit[2],
+        (genparticle_to_hit[0], genparticle_to_hit[1])),
+        shape=(n_gp, n_hit)
+    )
+    calohit_to_cluster = coo_matrix(
+        (hit_to_cluster[2],
+        (hit_to_cluster[0], hit_to_cluster[1])),
+        shape=(n_hit, n_cluster)
+    )
+    gp_to_cluster = (gp_to_calohit*calohit_to_cluster).sum(axis=1)
+
+    #60% of the hits of a track must come from the genparticle
+    gp_in_tracker = np.array(gp_to_track>=0.6)[:, 0]
+
+    #at least 10% of the energy of the genparticle should be matched to a calorimeter cluster
+    gp_in_calo = (np.array(gp_to_cluster)[:, 0]/gen_features["energy"])>0.1
+
+    gp_interacted_with_detector = gp_in_tracker | gp_in_calo
+
+    mask_visible = (
+        (gen_features["generatorStatus"]==1) & 
+        (gen_features["PDG"]!=12) & 
+        (gen_features["PDG"]!=14) & 
+        (gen_features["PDG"]!=16) & 
+        (gen_features["energy"]>0.01) &
+        gp_interacted_with_detector
+    )
+    idx_all_masked = np.where(mask_visible)[0]
+    genpart_idx_all_to_filtered = {idx_all: idx_filtered for idx_filtered, idx_all in enumerate(idx_all_masked)}
+
+    gen_features = awkward.Record({
+        feat: gen_features[feat][mask_visible] for feat in gen_features.fields
+    })
+
+    genparticle_to_hit = filter_adj(genparticle_to_hit, genpart_idx_all_to_filtered)
+    genparticle_to_track = filter_adj(genparticle_to_track, genpart_idx_all_to_filtered)
+
+    return EventData(
+        gen_features,
+        hit_features,
+        cluster_features,
+        track_features,
+        genparticle_to_hit,
+        genparticle_to_track,
+        hit_to_cluster
+    )
+
+def assign_genparticles_to_obj_and_merge(gpdata):
+
+    n_gp = awkward.count(gpdata.gen_features["PDG"])
+    n_track = awkward.count(gpdata.track_features["type"])
+    n_hit = awkward.count(gpdata.hit_features["type"])
+    n_cluster = awkward.count(gpdata.cluster_features["type"])
+
+    gp_to_track = np.array(coo_matrix(
+        (gpdata.genparticle_to_track[2],
+        (gpdata.genparticle_to_track[0], gpdata.genparticle_to_track[1])),
+        shape=(n_gp, n_track)
+    ).todense())
+
+    gp_to_calohit = coo_matrix(
+        (gpdata.genparticle_to_hit[2],
+        (gpdata.genparticle_to_hit[0], gpdata.genparticle_to_hit[1])),
+        shape=(n_gp, n_hit)
+    )
+    calohit_to_cluster = coo_matrix(
+        (gpdata.hit_to_cluster[2],
+        (gpdata.hit_to_cluster[0], gpdata.hit_to_cluster[1])),
+        shape=(n_hit, n_cluster)
+    )
+
+    gp_to_cluster = np.array((gp_to_calohit*calohit_to_cluster).todense())
+
+    #map each genparticle to a track or a cluster
+    gp_to_obj = -1*np.ones((n_gp, 2), dtype=np.int32)
+    set_used_tracks = set([])
+    set_used_clusters = set([])
+    gps_sorted_energy = sorted(range(n_gp), key=lambda x: gpdata.gen_features["energy"][x], reverse=True)
+
+    for igp in gps_sorted_energy:
+
+        #first check if we can match the genparticle to a track
+        matched_tracks = gp_to_track[igp]
+        trks = np.where(matched_tracks)[0]
+        trks = sorted(trks, key=lambda x: matched_tracks[x], reverse=True)
+        for trk in trks:
+            #if the track was not already used for something else
+            if trk not in set_used_tracks:
+                gp_to_obj[igp, 0] = trk
+                set_used_tracks.add(trk)
+                break
+
+        #if there was no matched track, try a cluster
+        if gp_to_obj[igp, 0] == -1:
+            matched_clusters = gp_to_cluster[igp]
+            clusters = np.where(matched_clusters)[0]
+            clusters = sorted(clusters, key=lambda x: matched_clusters[x], reverse=True)
+            for cl in clusters:
+                if cl not in set_used_clusters:
+                    gp_to_obj[igp, 1] = cl
+                    set_used_clusters.add(cl)
+                    break
+
+    #the genparticles that could not be matched to a track or cluster are merged to the closest genparticle
+    unmatched = np.where((gp_to_obj[:, 0]==-1) & (gp_to_obj[:, 1]==-1))[0]
+    mask_gp_unmatched = np.ones(n_gp, dtype=bool)
+
+    pt_arr = np.array(awkward.to_numpy(gpdata.gen_features["pt"]))
+    eta_arr = np.array(awkward.to_numpy(gpdata.gen_features["eta"]))
+    phi_arr = np.array(awkward.to_numpy(gpdata.gen_features["phi"]))
+    energy_arr = np.array(awkward.to_numpy(gpdata.gen_features["energy"]))
+
+    #now merge unmatched genparticles to their closest genparticle 
+    for igp_unmatched in unmatched:
+        mask_gp_unmatched[igp_unmatched] = False
+        idx_best_cluster = np.argmax(gp_to_cluster[igp_unmatched])
+        idx_gp_bestcluster = np.where(gp_to_obj[:, 1]==idx_best_cluster)[0]
+
+        #if the genparticle is not matched to any cluster, then it left a few hits to some other track
+        #this is rare, happens only for low-pT particles and we don"t want to try to reconstruct it
+        if (len(idx_gp_bestcluster)!=1):
+            print("unmatched pt=", pt_arr[igp_unmatched])
+            continue
+
+        idx_gp_bestcluster = idx_gp_bestcluster[0]
+
+        vec0 = vector.obj(
+            pt=gpdata.gen_features["pt"][igp_unmatched],
+            eta=gpdata.gen_features["eta"][igp_unmatched],
+            phi=gpdata.gen_features["phi"][igp_unmatched],
+            e=gpdata.gen_features["energy"][igp_unmatched],
+        )
+        vec1 = vector.obj(
+            pt=gpdata.gen_features["pt"][idx_gp_bestcluster],
+            eta=gpdata.gen_features["eta"][idx_gp_bestcluster],
+            phi=gpdata.gen_features["phi"][idx_gp_bestcluster],
+            e=gpdata.gen_features["energy"][idx_gp_bestcluster],
+        )
+        vec = vec0+vec1
+        pt_arr[idx_gp_bestcluster] = vec.pt
+        eta_arr[idx_gp_bestcluster] = vec.eta
+        phi_arr[idx_gp_bestcluster] = vec.phi
+        energy_arr[idx_gp_bestcluster] = vec.energy
+
+    gen_features_new = {
+        "PDG": np.abs(gpdata.gen_features["PDG"][mask_gp_unmatched]),
+        "charge": gpdata.gen_features["charge"][mask_gp_unmatched],
+        "pt": pt_arr[mask_gp_unmatched],
+        "eta": eta_arr[mask_gp_unmatched],
+        "phi": phi_arr[mask_gp_unmatched],
+        "energy": energy_arr[mask_gp_unmatched],
+    }
+    assert((np.sum(gen_features_new["energy"])-np.sum(gpdata.gen_features["energy"])) < 1e-2)
+
+    idx_all_masked = np.where(mask_gp_unmatched)[0]
+    genpart_idx_all_to_filtered = {idx_all: idx_filtered for idx_filtered, idx_all in enumerate(idx_all_masked)}
+    genparticle_to_hit = filter_adj(gpdata.genparticle_to_hit, genpart_idx_all_to_filtered)
+    genparticle_to_track = filter_adj(gpdata.genparticle_to_track, genpart_idx_all_to_filtered)
+    gp_to_obj = gp_to_obj[mask_gp_unmatched]
+
+    return EventData(
+        gen_features_new,
+        gpdata.hit_features,
+        gpdata.cluster_features,
+        gpdata.track_features,
+        genparticle_to_hit,
+        genparticle_to_track,
+        gpdata.hit_to_cluster
+    ), gp_to_obj
+
+
+#for each PF element (track, cluster), get the index of the best-matched particle (gen or reco)
+#if the PF element has no best-matched particle, returns -1
+def assign_to_recoobj(n_obj, obj_to_ptcl, used_particles):
+    obj_to_ptcl_all = -1 * np.ones(n_obj, dtype=np.int64)
+    for iobj in range(n_obj):
+        if iobj in obj_to_ptcl:
+            iptcl = obj_to_ptcl[iobj]
+            obj_to_ptcl_all[iobj] = iptcl
+            assert(used_particles[iptcl] == 0)
+            used_particles[iptcl] = 1
+    return obj_to_ptcl_all
+
+def get_recoptcl_to_obj(n_rps, reco_arr):
+    track_to_rp = {}
+    cluster_to_rp = {}
+    for irp in range(n_rps):
+        assigned = False
+        trks_begin = reco_arr["tracks_begin"][irp]
+        trks_end = reco_arr["tracks_end"][irp]
+        for itrk in range(trks_begin, trks_end):
+            assert(itrk not in track_to_rp)
+            track_to_rp[itrk] = irp
+            assigned = True
+
+        #only look for clusters if tracks were not found
+        if not assigned:
+            cls_begin = reco_arr["clusters_begin"][irp]
+            cls_end = reco_arr["clusters_end"][irp]
+            for icls in range(cls_begin, cls_end):
+                assert(icls not in cluster_to_rp)
+                cluster_to_rp[icls] = irp
+    return track_to_rp, cluster_to_rp
+
+def get_reco_properties(prop_data, iev):
+    reco_arr = prop_data["MergedRecoParticles"][iev]
+    reco_arr = {k.replace("MergedRecoParticles.", ""): reco_arr[k] for k in reco_arr.fields}
+
+    reco_p4 = vector.awk(awkward.zip({
+        "mass": reco_arr["mass"],
+        "x": reco_arr["momentum.x"],
+        "y": reco_arr["momentum.y"],
+        "z": reco_arr["momentum.z"]}))
+    reco_arr["pt"] = reco_p4.pt
+    reco_arr["eta"] = reco_p4.eta
+    reco_arr["phi"] = reco_p4.phi
+    reco_arr["energy"] = reco_p4.energy
+
+    msk = reco_arr["type"]!=0
+    reco_arr = awkward.Record({k: reco_arr[k][msk] for k in reco_arr.keys()})
+    return reco_arr
+
+def get_particle_feature_matrix(pfelem_to_particle, feature_dict, features):
+    feats = []
+    for feat in features:
+        feat_arr = feature_dict[feat]
+        if len(feat_arr)==0:
+            feat_arr_reordered = feat_arr
+        else:
+            feat_arr_reordered = awkward.to_numpy(feat_arr[pfelem_to_particle])
+            feat_arr_reordered[pfelem_to_particle==-1] = 0.0
+        feats.append(feat_arr_reordered)
+    feats = np.array(feats)
+    return feats.T
+
+def get_feature_matrix(feature_dict, features):
+    feats = []
+    for feat in features:
+        feat_arr = awkward.to_numpy(feature_dict[feat])
+        feats.append(feat_arr)
+    feats = np.array(feats)
+    return feats.T
+
+if __name__ == "__main__":
+
+    fn = sys.argv[1]
+    fi = uproot.open(fn)
+    
+    arrs = fi["events"]
+    
+    collectionIDs = {k: v for k, v in
+        zip(fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
+        fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0])}
+    collectionIDs_reverse = {v: k for k, v in collectionIDs.items()}
+    
+    prop_data = arrs.arrays([mc_coll, track_coll, "SiTracks_1", "PandoraClusters", "PandoraClusters#1", "PandoraClusters#0", "MergedRecoParticles"])
+    calohit_links = arrs.arrays(["CalohitMCTruthLink", "CalohitMCTruthLink#0", "CalohitMCTruthLink#1"])
+    sitrack_links = arrs.arrays(["SiTracksMCTruthLink", "SiTracksMCTruthLink#0", "SiTracksMCTruthLink#1"])
+
+    hit_data = {
+        "ECALBarrel": arrs["ECALBarrel"].array(),
+        "ECALEndcap": arrs["ECALEndcap"].array(),
+        "ECALOther": arrs["ECALOther"].array(),
+        "HCALBarrel": arrs["HCALBarrel"].array(),
+        "HCALEndcap": arrs["HCALEndcap"].array(),
+        "HCALOther": arrs["HCALOther"].array(),
+        "MUON": arrs["MUON"].array(),
+    }
+
+    ret = []
+    for iev in tqdm.tqdm(range(arrs.num_entries)):
+
+        #get the reco particles
+        reco_arr = get_reco_properties(prop_data, iev)
+        reco_type = np.abs(reco_arr["type"])
+        n_rps = len(reco_type)
+        reco_features = awkward.Record({
+            "PDG": np.abs(reco_type),
+            "charge": reco_arr["charge"],
+            "pt": reco_arr["pt"],
+            "eta": reco_arr["eta"],
+            "phi": reco_arr["phi"],
+            "energy": reco_arr["energy"]
+        })
+
+        #get the genparticles and the links between genparticles and tracks/clusters
+        gpdata = get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev)
+
+        #find the reconstructable genparticles and associate them to the best track/cluster
+        gpdata_cleaned, gp_to_obj = assign_genparticles_to_obj_and_merge(gpdata)
+
+        n_tracks = len(gpdata_cleaned.track_features["type"])
+        n_clusters = len(gpdata_cleaned.cluster_features["type"])
+        n_gps = len(gpdata_cleaned.gen_features["PDG"])
+
+        assert(len(gp_to_obj) == len(gpdata_cleaned.gen_features["PDG"]))
+        assert(gp_to_obj.shape[1] == 2)
+        
+        #for each reco particle, find the tracks and clusters associated with it
+        #construct track/cluster -> recoparticle maps
+        track_to_rp, cluster_to_rp = get_recoptcl_to_obj(n_rps, reco_arr)
+
+        #get the track/cluster -> genparticle map
+        track_to_gp = {itrk: igp for igp, itrk in enumerate(gp_to_obj[:, 0]) if itrk != -1}
+        cluster_to_gp = {icl: igp for igp, icl in enumerate(gp_to_obj[:, 1]) if icl != -1}
+
+        used_gps = np.zeros(n_gps, dtype=np.int64)
+        track_to_gp_all = assign_to_recoobj(n_tracks, track_to_gp, used_gps)
+        cluster_to_gp_all = assign_to_recoobj(n_clusters, cluster_to_gp, used_gps)
+        #all genparticles must be assigned to some PFElement
+        assert(np.all(used_gps == 1))
+
+        used_rps = np.zeros(n_rps, dtype=np.int64)
+        track_to_rp_all = assign_to_recoobj(n_tracks, track_to_rp, used_rps)
+        cluster_to_rp_all = assign_to_recoobj(n_clusters, cluster_to_rp, used_rps)
+        #all reco particles must be assigned to some PFElement
+        assert(np.all(used_rps == 1))
+
+        gps_track = get_particle_feature_matrix(
+            track_to_gp_all,
+            gpdata_cleaned.gen_features,
+            particle_feature_order
+        )
+        gps_track[:, 0] = np.array([
+            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(gps_track[:, 0], gps_track[:, 1])]
+        )
+        gps_cluster = get_particle_feature_matrix(
+            cluster_to_gp_all,
+            gpdata_cleaned.gen_features,
+            particle_feature_order
+        )
+        gps_cluster[:, 0] = np.array([
+            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(gps_cluster[:, 0], gps_cluster[:, 1])]
+        )
+        gps_cluster[:, 1] = 0
+
+        rps_track = get_particle_feature_matrix(
+            track_to_rp_all,
+            reco_features,
+            particle_feature_order
+        )
+        rps_track[:, 0] = np.array([
+            map_neutral_to_charged(map_pdgid_to_candid(p, c)) for p, c in zip(rps_track[:, 0], rps_track[:, 1])]
+        )
+        rps_cluster = get_particle_feature_matrix(
+            cluster_to_rp_all,
+            reco_features,
+            particle_feature_order
+        )
+        rps_cluster[:, 0] = np.array([
+            map_charged_to_neutral(map_pdgid_to_candid(p, c)) for p, c in zip(rps_cluster[:, 0], rps_cluster[:, 1])]
+        )
+        rps_cluster[:, 1] = 0
+
+        #all initial gen/reco particle energy must be reconstructable
+        assert(abs(
+            np.sum(gps_track[:, 5]) + np.sum(gps_cluster[:, 5]) - np.sum(gpdata_cleaned.gen_features["energy"])
+            ) < 1e-2)
+
+        assert(abs(
+            np.sum(rps_track[:, 5]) + np.sum(rps_cluster[:, 5]) - np.sum(reco_features["energy"])
+            ) < 1e-2)
+
+
+        #we don"t want to try to reconstruct charged particles from primary clusters, make sure the charge is 0
+        assert(np.all(gps_cluster[:, 1] == 0))
+        assert(np.all(rps_cluster[:, 1] == 0))
+
+        X_track = get_feature_matrix(gpdata_cleaned.track_features, track_feature_order)
+        X_cluster = get_feature_matrix(gpdata_cleaned.cluster_features, cluster_feature_order)
+        ygen_track = gps_track
+        ygen_cluster = gps_cluster
+        ycand_track = rps_track
+        ycand_cluster = rps_cluster
+
+        sanitize(X_track)
+        sanitize(X_cluster)
+        sanitize(ygen_track)
+        sanitize(ygen_cluster)
+        sanitize(ycand_track)
+        sanitize(ycand_cluster)
+
+        this_ev = awkward.Record({
+            "X_track": X_track,
+            "X_cluster": X_cluster,
+            "ygen_track": ygen_track,
+            "ygen_cluster": ygen_cluster,
+            "ycand_track": ycand_track,
+            "ycand_cluster": ycand_cluster
+        })
+        ret.append(this_ev)
+
+    ret = awkward.Record({k: awkward.from_iter([r[k] for r in ret]) for k in ret[0].fields})
+    awkward.to_parquet(ret, fn.replace(".root", ".parquet"))
diff --git a/fcc/run_sim.sh b/fcc/run_sim.sh
index 5a1c4dd52..361117d97 100755
--- a/fcc/run_sim.sh
+++ b/fcc/run_sim.sh
@@ -17,7 +17,11 @@ NUM=$1
 
 #SAMPLE=p8_ee_Z_Ztautau_ecm125
 #SAMPLE=p8_ee_tt_ecm365
-SAMPLE=p8_ee_ZZ_fullhad_ecm365
+#SAMPLE=p8_ee_ZZ_fullhad_ecm365
+#SAMPLE=p8_ee_qcd_ecm365
+#SAMPLE=p8_ee_qcd_ecm380
+#SAMPLE=p8_ee_ZH_Htautau_ecm380
+SAMPLE=p8_ee_qcd_ecm380
 
 mkdir -p $OUTDIR/$SAMPLE
 
diff --git a/mlpf/customizations.py b/mlpf/customizations.py
index d8597c79b..7dde643cb 100644
--- a/mlpf/customizations.py
+++ b/mlpf/customizations.py
@@ -16,12 +16,12 @@ def customize_pipeline_test(config):
         config["evaluation_datasets"] = {"cms_pf_ttbar": {"batch_size": 2, "num_events": -1}}
 
     # For CLIC, keep only ttbar
-    if "clic_ttbar_pf" in config["datasets"]:
-        config["train_test_datasets"]["physical"]["datasets"] = ["clic_ttbar_pf"]
+    if "clic_edm_ttbar_pf" in config["datasets"]:
+        config["train_test_datasets"]["physical"]["datasets"] = ["clic_edm_ttbar_pf"]
         config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]}
         config["train_test_datasets"]["physical"]["batch_per_gpu"] = 50
-        config["validation_dataset"] = "clic_ttbar_pf"
-        config["evaluation_datasets"] = {"clic_ttbar_pf": {"batch_size": 50, "num_events": -1}}
+        config["validation_dataset"] = "clic_edm_ttbar_pf"
+        config["evaluation_datasets"] = {"clic_edm_ttbar_pf": {"batch_size": 50, "num_events": -1}}
 
     # validate only on a small number of events
     config["validation_num_events"] = config["validation_batch_size"] * 2
diff --git a/mlpf/heptfds/clic_pf/higgsbb.py b/mlpf/heptfds/clic_pf/higgsbb.py
index 7fc93212e..799cfc526 100644
--- a/mlpf/heptfds/clic_pf/higgsbb.py
+++ b/mlpf/heptfds/clic_pf/higgsbb.py
@@ -20,11 +20,14 @@
 
 
 class ClicHiggsBbbarPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_higgs_bbar_full201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_higgs_bbar_full201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf/higgsgg.py b/mlpf/heptfds/clic_pf/higgsgg.py
index 08fa20f2b..709e43bca 100644
--- a/mlpf/heptfds/clic_pf/higgsgg.py
+++ b/mlpf/heptfds/clic_pf/higgsgg.py
@@ -20,11 +20,14 @@
 
 
 class ClicHiggsGgPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_higgs_gamgam_full201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_higgs_gamgam_full201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf/higgszz4l.py b/mlpf/heptfds/clic_pf/higgszz4l.py
index d82ebe45a..94351aec6 100644
--- a/mlpf/heptfds/clic_pf/higgszz4l.py
+++ b/mlpf/heptfds/clic_pf/higgszz4l.py
@@ -20,11 +20,14 @@
 
 
 class ClicHiggsZz4lPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_higgs_zz_4l_full201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_higgs_zz_4l_full201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf/qcd.py b/mlpf/heptfds/clic_pf/qcd.py
index 78fd71c55..a571b25ef 100644
--- a/mlpf/heptfds/clic_pf/qcd.py
+++ b/mlpf/heptfds/clic_pf/qcd.py
@@ -20,11 +20,14 @@
 
 
 class ClicQcdPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_qcd_all_rfull201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_qcd_all_rfull201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf/ttbar.py b/mlpf/heptfds/clic_pf/ttbar.py
index 2e6182e49..150b9ed4a 100644
--- a/mlpf/heptfds/clic_pf/ttbar.py
+++ b/mlpf/heptfds/clic_pf/ttbar.py
@@ -20,11 +20,14 @@
 
 
 class ClicTtbarPf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_ttbar_rfull201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_ttbar_rfull201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf/zpoleee.py b/mlpf/heptfds/clic_pf/zpoleee.py
index 38be9eb3c..19e57432a 100644
--- a/mlpf/heptfds/clic_pf/zpoleee.py
+++ b/mlpf/heptfds/clic_pf/zpoleee.py
@@ -20,11 +20,14 @@
 
 
 class ClicZpoleeePf(tfds.core.GeneratorBasedBuilder):
-    VERSION = tfds.core.Version("1.0.0")
+    VERSION = tfds.core.Version("1.1.0")
     RELEASE_NOTES = {
         "1.0.0": "Initial release.",
+        "1.1.0": "Fix postprocessing bug with charge",
     }
     MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    mkdir -p data
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic $MANUAL_DIR/
     """
 
     def _info(self) -> tfds.core.DatasetInfo:
@@ -56,7 +59,8 @@ def _info(self) -> tfds.core.DatasetInfo:
         )
 
     def _split_generators(self, dl_manager: tfds.download.DownloadManager):
-        return split_sample(Path("data/clic/gev380ee_pythia6_zpole_ee_rfull201/"))
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gev380ee_pythia6_zpole_ee_rfull201"))
 
     def _generate_examples(self, files):
         return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/qcd.py b/mlpf/heptfds/clic_pf_edm4hep/qcd.py
new file mode 100644
index 000000000..a720f12d7
--- /dev/null
+++ b/mlpf/heptfds/clic_pf_edm4hep/qcd.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with ee -> gamma/Z* -> quarks
+"""
+
+_CITATION = """
+"""
+
+
+class ClicEdmQcdPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.0.0")
+    RELEASE_NOTES = {
+        "1.0.0": "Initial release.",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    """
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        return split_sample(Path("data/p8_ee_qcd_ecm365/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/ttbar.py b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py
new file mode 100644
index 000000000..e054e5a21
--- /dev/null
+++ b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with ttbar
+"""
+
+_CITATION = """
+"""
+
+
+class ClicEdmTtbarPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.0.0")
+    RELEASE_NOTES = {
+        "1.0.0": "Initial release.",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    """
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        return split_sample(Path("data/p8_ee_tt_ecm365/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py b/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
new file mode 100644
index 000000000..877446b86
--- /dev/null
+++ b/mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
@@ -0,0 +1,169 @@
+import awkward as ak
+import fastjet
+import numpy as np
+import vector
+
+# from fcc/postprocessing.py
+X_FEATURES_TRK = [
+    "type",
+    "pt",
+    "eta",
+    "phi",
+    "p",
+    "chi2",
+    "ndf",
+    "dEdx",
+    "dEdxError",
+    "radiusOfInnermostHit",
+    "tanLambda",
+    "D0",
+    "omega",
+    "Z0",
+    "time",
+]
+X_FEATURES_CL = [
+    "type",
+    "et",
+    "eta",
+    "phi",
+    "energy",
+    "position.x",
+    "position.y",
+    "position.z",
+    "iTheta",
+    "energy_ecal",
+    "energy_hcal",
+    "energy_other",
+    "num_hits",
+    "sigma_x",
+    "sigma_y",
+    "sigma_z",
+]
+
+Y_FEATURES = ["PDG", "charge", "pt", "eta", "phi", "energy", "jet_idx"]
+labels = [0, 211, 130, 22, 11, 13]
+
+
+def split_sample(path, test_frac=0.8):
+    files = sorted(list(path.glob("*.parquet")))
+    print("Found {} files in {}".format(files, path))
+    assert len(files) > 0
+    idx_split = int(test_frac * len(files))
+    files_train = files[:idx_split]
+    files_test = files[idx_split:]
+    assert len(files_train) > 0
+    assert len(files_test) > 0
+    return {
+        "train": generate_examples(files_train),
+        "test": generate_examples(files_test),
+    }
+
+
+def generate_examples(files):
+    jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
+    min_jet_pt = 1.0  # GeV
+    for fi in files:
+        ret = ak.from_parquet(fi)
+
+        X_track = ret["X_track"]
+        X_cluster = ret["X_cluster"]
+
+        assert len(X_track) == len(X_cluster)
+        nev = len(X_track)
+
+        for iev in range(nev):
+
+            X1 = ak.to_numpy(X_track[iev])
+            X2 = ak.to_numpy(X_cluster[iev])
+
+            if len(X1) == 0 or len(X2) == 0:
+                continue
+
+            ygen_track = ak.to_numpy(ret["ygen_track"][iev])
+            ygen_cluster = ak.to_numpy(ret["ygen_cluster"][iev])
+            ycand_track = ak.to_numpy(ret["ycand_track"][iev])
+            ycand_cluster = ak.to_numpy(ret["ycand_cluster"][iev])
+
+            if len(ygen_track) == 0 or len(ygen_cluster) == 0:
+                continue
+
+            # pad feature dim between tracks and clusters to the same size
+            if X1.shape[1] < X2.shape[1]:
+                X1 = np.pad(X1, [[0, 0], [0, X2.shape[1] - X1.shape[1]]])
+            if X2.shape[1] < X1.shape[1]:
+                X2 = np.pad(X2, [[0, 0], [0, X1.shape[1] - X2.shape[1]]])
+
+            # concatenate tracks and clusters in features and targets
+            X = np.concatenate([X1, X2])
+            ygen = np.concatenate([ygen_track, ygen_cluster])
+            ycand = np.concatenate([ycand_track, ycand_cluster])
+
+            assert ygen.shape[0] == X.shape[0]
+            assert ycand.shape[0] == X.shape[0]
+
+            # add jet_idx column
+            ygen = np.concatenate(
+                [
+                    ygen.astype(np.float32),
+                    np.zeros((len(ygen), 1), dtype=np.float32),
+                ],
+                axis=-1,
+            )
+            ycand = np.concatenate(
+                [
+                    ycand.astype(np.float32),
+                    np.zeros((len(ycand), 1), dtype=np.float32),
+                ],
+                axis=-1,
+            )
+
+            # replace PID with index in labels array
+            arr = np.array([labels.index(p) for p in ygen[:, 0]])
+            ygen[:, 0][:] = arr[:]
+            arr = np.array([labels.index(p) for p in ycand[:, 0]])
+            ycand[:, 0][:] = arr[:]
+
+            # prepare gen candidates for clustering
+            cls_id = ygen[..., 0]
+            valid = cls_id != 0
+            # save mapping of index after masking -> index before masking as numpy array
+            # inspired from:
+            # https://stackoverflow.com/questions/432112/1044443#comment54747416_1044443
+            cumsum = np.cumsum(valid) - 1
+            _, index_mapping = np.unique(cumsum, return_index=True)
+
+            pt = ygen[valid, Y_FEATURES.index("pt")]
+            eta = ygen[valid, Y_FEATURES.index("eta")]
+            phi = ygen[valid, Y_FEATURES.index("phi")]
+            energy = ygen[valid, Y_FEATURES.index("energy")]
+            vec = vector.awk(ak.zip({"pt": pt, "eta": eta, "phi": phi, "energy": energy}))
+
+            # cluster jets, sort jet indices in descending order by pt
+            cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
+            jets = vector.awk(cluster.inclusive_jets(min_pt=min_jet_pt))
+            sorted_jet_idx = ak.argsort(jets.pt, axis=-1, ascending=False).to_list()
+            # retrieve corresponding indices of constituents
+            constituent_idx = cluster.constituent_index(min_pt=min_jet_pt).to_list()
+
+            # add index information to ygen and ycand
+            # index jets in descending order by pt starting from 1:
+            # 0 is null (unclustered),
+            # 1 is 1st highest-pt jet,
+            # 2 is 2nd highest-pt jet, ...
+            for jet_idx in sorted_jet_idx:
+                jet_constituents = [
+                    index_mapping[idx] for idx in constituent_idx[jet_idx]
+                ]  # map back to constituent index *before* masking
+                ygen[jet_constituents, Y_FEATURES.index("jet_idx")] = jet_idx + 1  # jet index starts from 1
+                ycand[jet_constituents, Y_FEATURES.index("jet_idx")] = jet_idx + 1
+
+            yield str(fi) + "_" + str(iev), {
+                "X": X.astype(np.float32),
+                "ygen": ygen,
+                "ycand": ycand,
+            }
+
+
+if __name__ == "__main__":
+    for ex in generate_examples(["data/p8_ee_ZZ_fullhad_ecm365/reco_p8_ee_ZZ_fullhad_ecm365_1.parquet"]):
+        print(ex[0], ex[1]["X"].shape, ex[1]["ygen"].shape, ex[1]["ycand"].shape)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/zh_htautau.py b/mlpf/heptfds/clic_pf_edm4hep/zh_htautau.py
new file mode 100644
index 000000000..579e4c690
--- /dev/null
+++ b/mlpf/heptfds/clic_pf_edm4hep/zh_htautau.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with ZH, H->tautau
+"""
+
+_CITATION = """
+"""
+
+
+class ClicEdmZhHtautauPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.0.0")
+    RELEASE_NOTES = {
+        "1.0.0": "Initial release.",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    """
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        return split_sample(Path("data/p8_ee_ZH_Htautau_ecm380/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/zzfullhad.py b/mlpf/heptfds/clic_pf_edm4hep/zzfullhad.py
new file mode 100644
index 000000000..8fb70abed
--- /dev/null
+++ b/mlpf/heptfds/clic_pf_edm4hep/zzfullhad.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with ZZ->fullhadronic
+"""
+
+_CITATION = """
+"""
+
+
+class ClicEdmZzFullhadPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.0.0")
+    RELEASE_NOTES = {
+        "1.0.0": "Initial release.",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    """
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        return split_sample(Path("data/p8_ee_ZZ_fullhad_ecm365/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/plotting/plot_utils.py b/mlpf/plotting/plot_utils.py
index 2b5853a88..f2307c881 100644
--- a/mlpf/plotting/plot_utils.py
+++ b/mlpf/plotting/plot_utils.py
@@ -69,6 +69,9 @@
     "cms_pf_qcd_high_pt": r"CMS high-$p_T$ QCD+PU events",
     "cms_pf_ttbar": r"CMS $\mathrm{t}\overline{\mathrm{t}}$+PU events",
     "cms_pf_single_neutron": r"CMS single neutron particle gun events",
+    "clic_edm_ttbar_pf": r"CLIC $ee \rightarrow \mathrm{t}\overline{\mathrm{t}}$",
+    "clic_edm_qcd_pf": r"CLIC $ee \rightarrow \gamma/\mathrm{Z}^* \rightarrow \mathrm{hadrons}$",
+    "clic_edm_zz_fullhad_pf": r"CLIC $ee \rightarrow \mathrm{ZZ} \rightarrow \mathrm{hadrons}$",
 }
 
 
diff --git a/mlpf/tfmodel/datasets/BaseDatasetFactory.py b/mlpf/tfmodel/datasets/BaseDatasetFactory.py
index 9cd37f2f1..1495bd6b4 100644
--- a/mlpf/tfmodel/datasets/BaseDatasetFactory.py
+++ b/mlpf/tfmodel/datasets/BaseDatasetFactory.py
@@ -53,20 +53,13 @@ def unpack_target_cms(y, num_output_classes, config):
 def unpack_target_clic(y, num_output_classes, config):
     msk_pid = tf.cast(y[..., 0:1] != 0, tf.float32)
 
-    px = y[..., 2:3] * msk_pid
-    py = y[..., 3:4] * msk_pid
-    pz = y[..., 4:5] * msk_pid
+    pt = y[..., 2:3] * msk_pid
+    eta = y[..., 3:4] * msk_pid
+    phi = y[..., 4:5] * msk_pid
     energy = y[..., 5:6] * msk_pid
 
-    pt = tf.math.sqrt(px**2 + py**2) * msk_pid
-    p = tf.math.sqrt(px**2 + py**2 + pz**2) * msk_pid
-
-    cos_theta = tf.math.divide_no_nan(pz, p)
-    theta = tf.math.acos(cos_theta)
-    eta = -tf.math.log(tf.math.tan(theta / 2.0)) * msk_pid
-
-    sin_phi = tf.math.divide_no_nan(py, pt) * msk_pid
-    cos_phi = tf.math.divide_no_nan(px, pt) * msk_pid
+    sin_phi = tf.math.sin(phi) * msk_pid
+    cos_phi = tf.math.cos(phi) * msk_pid
 
     ret = {
         "cls": tf.one_hot(tf.cast(y[..., 0], tf.int32), num_output_classes),
@@ -81,7 +74,7 @@ def unpack_target_clic(y, num_output_classes, config):
     if config["loss"]["event_loss"] != "none":
         jet_idx = y[..., 6:7] * msk_pid
         pt_e_eta_phi = tf.concat([pt, energy, eta, sin_phi, cos_phi, jet_idx, msk_pid], axis=-1)
-        ret["pt_e_eta_phi"] = pt_e_eta_phi * msk_pid
+        ret["pt_e_eta_phi"] = pt_e_eta_phi
 
     if config["loss"]["met_loss"] != "none":
         px = pt * cos_phi
@@ -122,6 +115,9 @@ def func(data_item):
         X = data_item["X"]
         y = data_item["y{}".format(target_particles)]
 
+        X = tf.clip_by_value(X, -1e12, 1e12)
+        y = tf.clip_by_value(y, -1e12, 1e12)
+
         # mask to keep only nonzero (not zero-padded due to batching) elements
         msk_elems = tf.cast(X[..., 0:1] != 0, tf.float32)
 
diff --git a/mlpf/tfmodel/model.py b/mlpf/tfmodel/model.py
index b64b028b1..7492be892 100644
--- a/mlpf/tfmodel/model.py
+++ b/mlpf/tfmodel/model.py
@@ -202,19 +202,7 @@ def call(self, X):
         # X[:, :, 1:] - all the other non-categorical features
         Xprop = X[:, :, 1:]
 
-        px = X[:, :, 1:2]
-        py = X[:, :, 2:3]
-        pz = X[:, :, 3:4]
-        pt = tf.math.sqrt(px**2 + py**2)
-        p = tf.math.sqrt(px**2 + py**2 + pz**2)
-        cos_theta = tf.math.divide_no_nan(pz, p)
-        theta = tf.math.acos(cos_theta)
-        eta = -tf.math.log(tf.math.tan(theta / 2.0))
-
-        sin_phi = tf.math.divide_no_nan(py, pt)
-        cos_phi = tf.math.divide_no_nan(px, pt)
-
-        return tf.concat([Xid, pt, sin_phi, cos_phi, eta, Xprop], axis=-1)
+        return tf.concat([Xid, Xprop], axis=-1)
 
 
 """
@@ -876,32 +864,19 @@ def call(self, args, training=False):
         out_charge = self.ffn_charge(X_encoded, training=training)
         out_charge = out_charge * msk_input_outtype
 
+        orig_pt = tf.cast(X_input[:, :, 1:2], out_dtype)
         orig_eta = tf.cast(X_input[:, :, 2:3], out_dtype)
 
         # FIXME: better schema propagation between hep_tfds
         # skip connection from raw input values
-        if self.schema == "cms":
+        if self.schema == "cms" or self.schema == "clic":
             orig_sin_phi = tf.cast(tf.math.sin(X_input[:, :, 3:4]) * msk_input, out_dtype)
             orig_cos_phi = tf.cast(tf.math.cos(X_input[:, :, 3:4]) * msk_input, out_dtype)
             orig_energy = tf.cast(X_input[:, :, 4:5] * msk_input, out_dtype)
-            orig_pt = X_input[:, :, 1:2]
         elif self.schema == "delphes":
             orig_sin_phi = tf.cast(X_input[:, :, 3:4] * msk_input, out_dtype)
             orig_cos_phi = tf.cast(X_input[:, :, 4:5] * msk_input, out_dtype)
             orig_energy = tf.cast(X_input[:, :, 5:6] * msk_input, out_dtype)
-            orig_pt = X_input[:, :, 1:2]
-        elif self.schema == "clic":
-            px = X_input[:, :, 1:2]
-            py = X_input[:, :, 2:3]
-            pz = X_input[:, :, 3:4]
-            orig_pt = tf.math.sqrt(px**2 + py**2)
-            p = tf.math.sqrt(px**2 + py**2 + pz**2)
-            cos_theta = tf.math.divide_no_nan(pz, p)
-            theta = tf.math.acos(cos_theta)
-            orig_eta = -tf.math.log(tf.math.tan(theta / 2.0))
-            orig_sin_phi = tf.math.divide_no_nan(py, orig_pt)
-            orig_cos_phi = tf.math.divide_no_nan(px, orig_pt)
-            orig_energy = p
 
         if self.regression_use_classification:
             X_encoded = tf.concat(
@@ -920,9 +895,11 @@ def call(self, args, training=False):
         pred_eta = orig_eta + pred_eta_corr[:, :, 0:1]
         pred_sin_phi = orig_sin_phi + pred_phi_corr[:, :, 0:1]
         pred_cos_phi = orig_cos_phi + pred_phi_corr[:, :, 1:2]
-        # pred_eta = tf.clip_by_value(pred_eta, -7,7)
-        # pred_sin_phi = tf.clip_by_value(pred_sin_phi, -1,1)
-        # pred_cos_phi = tf.clip_by_value(pred_cos_phi, -1,1)
+
+        # FIXME: check that this is helpful
+        pred_eta = tf.clip_by_value(pred_eta, -7, 7)
+        pred_sin_phi = tf.clip_by_value(pred_sin_phi, -1, 1)
+        pred_cos_phi = tf.clip_by_value(pred_cos_phi, -1, 1)
 
         X_encoded_energy = tf.concat([X_encoded, X_encoded_energy], axis=-1)
         if self.regression_use_classification:
diff --git a/mlpf/tfmodel/model_setup.py b/mlpf/tfmodel/model_setup.py
index f18438746..818ed0b61 100644
--- a/mlpf/tfmodel/model_setup.py
+++ b/mlpf/tfmodel/model_setup.py
@@ -115,7 +115,7 @@ def epoch_end(self, epoch, logs, comet_experiment=None):
         N_jets = len(awkward.flatten(yvals["jets_gen_pt"]))
         N_jets_matched_pred = len(yvals["jet_gen_to_pred_genpt"])
         for name, val in [
-            ("jet_matched_frac", N_jets_matched_pred / N_jets),
+            ("jet_matched_frac", N_jets_matched_pred / N_jets if N_jets > 0 else float("nan")),
             ("jet_wd", jet_distances["wd"]),
             ("jet_iqr", jet_distances["iqr"]),
             ("jet_med", jet_distances["p50"]),
@@ -486,6 +486,9 @@ def eval_model(
             energy = awkward.from_iter([np.array(v[m], np.float32) for v, m in zip(awkvals[typ]["energy"], valid)])
             phi = awkward.from_iter([np.array(v[m], np.float32) for v, m in zip(phi, valid)])
 
+            if verbose:
+                print(typ, pt)
+
             # If there were no particles, build dummy arrays with the correct datatype
             if len(awkward.flatten(pt)) == 0:
                 pt = build_dummy_array(len(pt), np.float64)
diff --git a/notebooks/clic.ipynb b/notebooks/clic.ipynb
index c3ca29d94..1d15f457a 100644
--- a/notebooks/clic.ipynb
+++ b/notebooks/clic.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "foster-monte",
    "metadata": {},
    "outputs": [],
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "ad0a7854",
    "metadata": {
     "tags": [
@@ -37,16 +37,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "935316c1",
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:38<00:00, 38.65s/it]\n"
+     ]
+    }
+   ],
    "source": [
     "# Load the datasets, process to flattened (X,ygen,ycand) format\n",
     "ret = []\n",
-    "filelist = list(glob.glob(\"{}/*.parquet\".format(path)))\n",
+    "filelist = list(glob.glob(\"{}/*.parquet\".format(path)))[:1]\n",
     "print(len(filelist))\n",
     "for fi in tqdm.tqdm(filelist):\n",
     "    ret += postprocessing.prepare_data_clic(fi)"
@@ -54,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "47c33634",
    "metadata": {},
    "outputs": [],
@@ -64,6 +79,27 @@
     "ycand = awkward.from_iter([r[2] for r in ret])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ae5a2ea8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([0., 1., 2., 3., 4., 5.]), array([9625, 6801,  661, 4465,  166,  128]))"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.unique(awkward.to_numpy(awkward.flatten(ygen[:, :, 0])), return_counts=True)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "1a5ecc39",
diff --git a/parameters/clic.yaml b/parameters/clic.yaml
index ede8aaa11..e836893b0 100644
--- a/parameters/clic.yaml
+++ b/parameters/clic.yaml
@@ -3,13 +3,13 @@ backend: tensorflow
 dataset:
   schema: clic
   target_particles: gen
-  num_input_features: 12
+  num_input_features: 16
   num_output_features: 5
   #(none=0, track=1, cluster=2)
   num_input_classes: 3
   #(none=0, ch.had=1, n.had=2, gamma=3, e=4, mu=5)
   num_output_classes: 6
-  #(px, py, pz, E)
+  #(pt, eta, phi, E)
   num_momentum_outputs: 4
   cls_weight_by_pt: no
   reg_weight_by_pt: no
@@ -235,44 +235,34 @@ train_test_datasets:
   physical:
     batch_per_gpu: 1
     datasets:
-      - clic_ttbar_pf
-      - clic_qcd_pf
-      - clic_higgs_bbbar_pf
-      - clic_higgs_gg_pf
-      - clic_higgs_zz4l_pf
-      - clic_zpoleee_pf
+      - clic_edm_ttbar_pf
+      - clic_edm_zz_fullhad_pf
+      - clic_edm_qcd_pf
+      - clic_edm_zh_htautau_pf
 
-validation_dataset: clic_ttbar_pf
+validation_dataset: clic_edm_ttbar_pf
 validation_batch_size: 50
 validation_num_events: 500
 
 evaluation_datasets:
-  clic_ttbar_pf:
+  clic_edm_ttbar_pf:
     batch_size: 50
     num_events: -1
 
 datasets:
-  clic_ttbar_pf:
+  clic_edm_ttbar_pf:
     version: 1.0.0
     data_dir:
     manual_dir:
-  clic_qcd_pf:
+  clic_edm_zz_fullhad_pf:
     version: 1.0.0
     data_dir:
     manual_dir:
-  clic_higgs_bbbar_pf:
+  clic_edm_qcd_pf:
     version: 1.0.0
     data_dir:
     manual_dir:
-  clic_higgs_gg_pf:
-    version: 1.0.0
-    data_dir:
-    manual_dir:
-  clic_higgs_zz4l_pf:
-    version: 1.0.0
-    data_dir:
-    manual_dir:
-  clic_zpoleee_pf:
+  clic_edm_zh_htautau_pf:
     version: 1.0.0
     data_dir:
     manual_dir:
diff --git a/scripts/generate_tfds.sh b/scripts/generate_tfds.sh
index f68b07e14..486c34787 100755
--- a/scripts/generate_tfds.sh
+++ b/scripts/generate_tfds.sh
@@ -1,10 +1,11 @@
 #!/bin/bash
 
 # Tallinn
-MANUAL_DIR=/local/joosep/mlpf/cms/v2/
-DATA_DIR=/scratch-persistent/joosep/tensorflow_datasets
-IMG=/home/software/singularity/tf-2.10.0.simg
-CMD="singularity exec -B /local -B /scratch-persistent--env PYTHONPATH=$PYTHONPATH $IMG tfds build "
+export MANUAL_DIR=/local/joosep/mlpf/cms/v2
+export DATA_DIR=/scratch-persistent/joosep/tensorflow_datasets
+export IMG=/home/software/singularity/tf-2.11.0.simg
+export PYTHONPATH=`pwd`/mlpf
+export CMD="singularity exec -B /local -B /scratch-persistent --env PYTHONPATH=$PYTHONPATH $IMG tfds build "
 
 # Desktop
 # IMG=/home/joosep/HEP-KBFI/singularity/tf-2.10.0.simg
@@ -29,12 +30,11 @@ $CMD mlpf/heptfds/cms_pf/singletau --data_dir $DATA_DIR --manual_dir $MANUAL_DIR
 wait
 
 # CLIC
-$CMD mlpf/heptfds/clic_pf/qcd --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd.log &
-$CMD mlpf/heptfds/clic_pf/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
-$CMD mlpf/heptfds/clic_pf/zpoleee --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_zpoleee.log &
-$CMD mlpf/heptfds/clic_pf/higgsbb --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_higgsbb.log &
-$CMD mlpf/heptfds/clic_pf/higgszz4l --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_higgszz4l.log &
-$CMD mlpf/heptfds/clic_pf/higgsgg --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_higgsgg.log &
+export MANUAL_DIR=/local/joosep/mlpf/clic_edm4hep
+$CMD mlpf/heptfds/clic_pf_edm4hep/qcd --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd.log &
+$CMD mlpf/heptfds/clic_pf_edm4hep/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
+$CMD mlpf/heptfds/clic_pf_edm4hep/zh_htautau --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_zh_htautau.log &
+$CMD mlpf/heptfds/clic_pf/zzfullhad --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_zzfullhad.log &
 wait
 
 $CMD mlpf/heptfds/delphes_pf/delphes_pf &> logs/tfds_delphes.log &
diff --git a/scripts/local_test_clic_pipeline.sh b/scripts/local_test_clic_pipeline.sh
index dcddc5bd1..bb3c57b6b 100755
--- a/scripts/local_test_clic_pipeline.sh
+++ b/scripts/local_test_clic_pipeline.sh
@@ -3,27 +3,27 @@ set -e
 export TFDS_DATA_DIR=`pwd`/tensorflow_datasets
 export PYTHONPATH=`pwd`/mlpf:$PYTHONPATH
 
-rm -Rf data/clic/gev380ee_pythia6_ttbar_rfull201
-mkdir -p data/clic/gev380ee_pythia6_ttbar_rfull201
-cd data/clic/gev380ee_pythia6_ttbar_rfull201
+rm -Rf data/p8_ee_tt_ecm365
+mkdir -p data/p8_ee_tt_ecm365
+cd data/p8_ee_tt_ecm365
 
 #download some test data
-wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic/gev380ee_pythia6_ttbar_rfull201/pythia6_ttbar_0001_pandora.parquet
-wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic/gev380ee_pythia6_ttbar_rfull201/pythia6_ttbar_0002_pandora.parquet
-wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic/gev380ee_pythia6_ttbar_rfull201/pythia6_ttbar_0003_pandora.parquet
-wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic/gev380ee_pythia6_ttbar_rfull201/pythia6_ttbar_0004_pandora.parquet
-wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic/gev380ee_pythia6_ttbar_rfull201/pythia6_ttbar_0005_pandora.parquet
+wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic_edm4hep/reco_p8_ee_tt_ecm365_1.root
+wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/clic_edm4hep/reco_p8_ee_tt_ecm365_2.root
 
-cd ../../..
+cd ../..
 
-#run the clic data validation notebook
-cd notebooks
-papermill --inject-output-path --log-output -p path ../data/clic/gev380ee_pythia6_ttbar_rfull201/ clic.ipynb ./out.ipynb
-cd ..
+python3 fcc/postprocessing.py data/p8_ee_tt_ecm365/reco_p8_ee_tt_ecm365_1.root
+python3 fcc/postprocessing.py data/p8_ee_tt_ecm365/reco_p8_ee_tt_ecm365_2.root
 
-tfds build mlpf/heptfds/clic_pf/ttbar --manual_dir `pwd`
+tfds build mlpf/heptfds/clic_pf_edm4hep/ttbar
 
-#Train, evaluate and make plots
+# #run the clic data validation notebook
+# cd notebooks
+# papermill --inject-output-path --log-output -p path ../data/clic/gev380ee_pythia6_ttbar_rfull201/ clic.ipynb ./out.ipynb
+# cd ..
+
+# #Train, evaluate and make plots
 python mlpf/pipeline.py train --config parameters/clic.yaml --nepochs 1 --customize pipeline_test
 python mlpf/pipeline.py evaluate --nevents 5 --customize pipeline_test --train-dir ./experiments/clic* --weights ./experiments/clic*/weights/weights-01-*.hdf5
 python mlpf/pipeline.py plots --train-dir ./experiments/clic*