diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d62240322..cf16ce5eb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -24,7 +24,8 @@ jobs:
             tensorflow-probability==0.12.2 tensorflow-addons==0.13.0 \
             tqdm click tensorflow-datasets 'ray[default]' 'ray[tune]' \
             tf-models-official tensorflow-text \
-            tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad
+            tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \
+            tensorflow-estimator==2.6.0 keras==2.6.0
           git submodule init
           git submodule update
       - name: Run delphes TF model
@@ -45,7 +46,8 @@ jobs:
             tensorflow-probability==0.12.2 tensorflow-addons==0.13.0 \
             tqdm click tensorflow-datasets 'ray[default]' 'ray[tune]' \
             tf-models-official tensorflow-text \
-            tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad
+            tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \
+            tensorflow-estimator==2.6.0 keras==2.6.0
           git submodule init
           git submodule update
       - name: Run CMS TF model using the pipeline
diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py
index 182071712..ee72dfb72 100644
--- a/mlpf/pipeline.py
+++ b/mlpf/pipeline.py
@@ -87,6 +87,7 @@ def customize_pipeline_test(config):
     if "physical" in config["train_test_datasets"]:
         config["train_test_datasets"]["physical"]["datasets"] = ["cms_pf_ttbar"]
         config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]}
+        config["train_test_datasets"]["physical"]["batch_per_gpu"] = 5
 
     return config
 
diff --git a/mlpf/plotting/cms_fwlite.py b/mlpf/plotting/cms_fwlite.py
index f638fc961..81eec7d21 100644
--- a/mlpf/plotting/cms_fwlite.py
+++ b/mlpf/plotting/cms_fwlite.py
@@ -34,6 +34,32 @@ def get(self, event):
             ("energy", "[o.energy() for o in obj]"),
         ]
     ))
+    expressions.append(Expression(
+        "ak4PFJetsPuppi",
+        "vector<reco::PFJet>",
+        [
+            ("pt", "[o.pt() for o in obj]"),
+            ("eta", "[o.eta() for o in obj]"),
+            ("phi", "[o.phi() for o in obj]"),
+            ("energy", "[o.energy() for o in obj]"),
+        ]
+    ))
+    expressions.append(Expression(
+        "pfMet",
+        "vector<reco::PFMET>",
+        [
+            ("pt", "[o.pt() for o in obj]"),
+            ("phi", "[o.phi() for o in obj]"),
+        ]
+    ))
+    expressions.append(Expression(
+        "pfMetPuppi",
+        "vector<reco::PFMET>",
+        [
+            ("pt", "[o.pt() for o in obj]"),
+            ("phi", "[o.phi() for o in obj]"),
+        ]
+    ))
     expressions.append(Expression(
         "particleFlow",
         "vector<reco::PFCandidate>",
diff --git a/mlpf/tfmodel/timing.py b/mlpf/tfmodel/timing.py
index eaebf425b..ba6cd8913 100644
--- a/mlpf/tfmodel/timing.py
+++ b/mlpf/tfmodel/timing.py
@@ -1,6 +1,7 @@
 import numpy as np
 import time
 import pynvml
+import sys
 
 #pip install only onnxruntime_gpu, not onnxruntime!
 import onnxruntime
@@ -17,7 +18,7 @@
     mem_initial = mem.used/1000/1000
     print("mem_initial", mem_initial)
     
-    onnx_sess = onnxruntime.InferenceSession("model.onnx", providers=EP_list)
+    onnx_sess = onnxruntime.InferenceSession(sys.argv[1], providers=EP_list)
     time.sleep(5)
     
     mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
@@ -32,7 +33,7 @@
         for i in range(100):
 
             #allocate array in system RAM
-            X = np.array(np.random.randn(1, num_elems, 18), np.float32)
+            X = np.array(np.random.randn(1, num_elems, 25), np.float32)
             
             #transfer data to GPU, run model, transfer data back
             t0 = time.time()
diff --git a/notebooks/cms-mlpf.ipynb b/notebooks/cms-mlpf.ipynb
index af2a616e9..0b49617e2 100644
--- a/notebooks/cms-mlpf.ipynb
+++ b/notebooks/cms-mlpf.ipynb
@@ -19,7 +19,9 @@
     "import scipy\n",
     "import mplhep\n",
     "\n",
-    "import pandas"
+    "import pandas\n",
+    "import boost_histogram as bh\n",
+    "import itertools"
    ]
   },
   {
@@ -40,13 +42,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n",
+    "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n",
+    "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n",
+    "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n",
+    "    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n",
+    "\n",
+    "def cms_label_sample_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n",
     "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n",
     "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n",
     "    plt.figtext(x2, y,'Run 3 (14 TeV), $\\mathrm{t}\\overline{\\mathrm{t}}$ events',  wrap=False, horizontalalignment='left', fontsize=10)\n",
     "\n",
-    "def sample_label(ax, x=0.03, y=0.98):\n",
-    "    plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n"
+    "    \n",
+    "def sample_label(ax, x=0.01, y=0.93):\n",
+    "    plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", ha=\"left\", size=10, transform=ax.transAxes)"
    ]
   },
   {
@@ -94,7 +102,9 @@
     "ELEM_NAMES_CMS = [\"NONE\", \"TRACK\", \"PS1\", \"PS2\", \"ECAL\", \"HCAL\", \"GSF\", \"BREM\", \"HFEM\", \"HFHAD\", \"SC\", \"HO\"]\n",
     "\n",
     "CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]\n",
-    "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"gamma\", \"ele\", \"mu\"]"
+    "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"$\\gamma$\", \"$e^\\pm$\", \"$\\mu^\\pm$\"]\n",
+    "\n",
+    "class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}"
    ]
   },
   {
@@ -104,7 +114,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path = \"../experiments/cms_20210929_223058_191573.gpu0.local/evaluation/\""
+    "path = \"../experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/evaluation/\""
    ]
   },
   {
@@ -152,109 +162,117 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def plot_distribution(prefix, bins, var, particle_label, labels):\n",
+    "def get_distribution(prefix, bins, var):\n",
     "\n",
     "    hists = []\n",
-    "    for icls in range(1,8):\n",
+    "    for pid in [13,11,22,1,2,130,211]:\n",
+    "        icls = CLASS_LABELS_CMS.index(pid)\n",
     "        msk_pid = (yvals_f[prefix+\"_cls_id\"]==icls)\n",
-    "        h = np.histogram(yvals_f[prefix + \"_\" + var][msk_pid], bins=bins)\n",
+    "        h = bh.Histogram(bh.axis.Variable(bins))\n",
+    "        d = yvals_f[prefix + \"_\" + var][msk_pid]\n",
+    "        h.fill(d.flatten())\n",
     "        hists.append(h)\n",
-    "        \n",
-    "    plt.figure(figsize=(5,5))\n",
-    "    ax = plt.axes()\n",
+    "    return hists\n",
+    "\n",
+    "#     plt.figure(figsize=(5,5))\n",
+    "#     ax = plt.axes()\n",
     "    \n",
-    "    mplhep.histplot(\n",
-    "        [h[0] for h in hists], bins=hists[0][1], ax=ax, stack=True, histtype=\"fill\",\n",
-    "        label=labels\n",
-    "    )\n",
-    "    plt.legend(ncol=2, frameon=False)\n",
-    "    plt.xlabel(var)\n",
-    "    cms_label(x1=0.22, x2=0.55)\n",
-    "    plt.ylabel(\"Number of particles / bin\")\n",
-    "    plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n",
-    "    return ax"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d41ecf82",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_distribution(\"true\", np.linspace(0,2000,61), \"energy\", \"PF\", CLASS_NAMES_CMS[1:])\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(top=1e9)\n",
-    "plt.savefig(\"energy_true.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "67e4ada0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_distribution(\"pred\", np.linspace(0,2000,61), \"energy\", \"MLPF\", CLASS_NAMES_CMS[1:])\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(top=1e9)\n",
-    "plt.savefig(\"energy_pred.pdf\", bbox_inches=\"tight\")"
+    "#     mplhep.histplot(\n",
+    "#         [h[0] for h in hists], bins=hists[0][1], ax=ax, stack=True, histtype=\"fill\",\n",
+    "#         label=labels\n",
+    "#     )\n",
+    "#     plt.legend(ncol=2, frameon=False)\n",
+    "#     plt.xlabel(var)\n",
+    "#     cms_label(x1=0.22, x2=0.55)\n",
+    "#     plt.ylabel(\"Number of particles / bin\")\n",
+    "#     plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n",
+    "#     return ax"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03a0dd55",
+   "id": "d5b04426",
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_distribution(\"true\", np.linspace(0,200,61), \"pt\", \"PF\", CLASS_NAMES_CMS[1:])\n",
+    "hists_true = get_distribution(\"true\", np.linspace(0,200,61), \"pt\")\n",
+    "hists_pred = get_distribution(\"pred\", np.linspace(0,200,61), \"pt\")\n",
+    "\n",
+    "plt.figure(figsize=(7, 7))\n",
+    "ax = plt.axes()\n",
+    "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n",
+    "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n",
+    "\n",
+    "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n",
+    "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n",
+    "plt.gca().add_artist(legend1)\n",
+    "plt.ylabel(\"Total number of particles / bin\")\n",
+    "cms_label(ax)\n",
+    "sample_label(ax)\n",
+    "\n",
     "plt.yscale(\"log\")\n",
     "plt.ylim(top=1e9)\n",
     "plt.xlabel(\"PFCandidate $p_T$ [GeV]\")\n",
-    "plt.savefig(\"pt_true.pdf\", bbox_inches=\"tight\")"
+    "\n",
+    "plt.savefig(\"pt_true_vs_pred.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "851fcbd1",
+   "id": "d41ecf82",
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_distribution(\"pred\", np.linspace(0,200,61), \"pt\", \"MLPF\", CLASS_NAMES_CMS[1:])\n",
+    "hists_true = get_distribution(\"true\", np.linspace(0,2000,61), \"energy\")\n",
+    "hists_pred = get_distribution(\"pred\", np.linspace(0,2000,61), \"energy\")\n",
+    "\n",
+    "plt.figure(figsize=(7, 7))\n",
+    "ax = plt.axes()\n",
+    "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n",
+    "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n",
+    "\n",
+    "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.64), title=\"PF\")\n",
+    "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.64), title=\"MLPF\")\n",
+    "plt.gca().add_artist(legend1)\n",
+    "plt.ylabel(\"Total number of particles / bin\")\n",
+    "cms_label(ax)\n",
+    "sample_label(ax)\n",
+    "\n",
     "plt.yscale(\"log\")\n",
     "plt.ylim(top=1e9)\n",
-    "plt.xlabel(\"MLPFCandidate $p_T$ [GeV]\")\n",
-    "plt.savefig(\"pt_pred.pdf\", bbox_inches=\"tight\")"
+    "plt.xlabel(\"PFCandidate $E$ [GeV]\")\n",
+    "\n",
+    "plt.savefig(\"energy_true_vs_pred.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "80ab845d",
+   "id": "03a0dd55",
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_distribution(\"true\", np.linspace(-5,5,61), \"eta\", \"PF\", CLASS_NAMES_CMS[1:])\n",
+    "hists_true = get_distribution(\"true\", np.linspace(-6,6,61), \"eta\")\n",
+    "hists_pred = get_distribution(\"pred\", np.linspace(-6,6,61), \"eta\")\n",
+    "\n",
+    "plt.figure(figsize=(7, 7))\n",
+    "ax = plt.axes()\n",
+    "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n",
+    "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n",
+    "\n",
+    "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n",
+    "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n",
+    "plt.gca().add_artist(legend1)\n",
+    "plt.ylabel(\"Total number of particles / bin\")\n",
+    "cms_label(ax)\n",
+    "sample_label(ax)\n",
+    "\n",
     "plt.yscale(\"log\")\n",
-    "plt.ylim(bottom=1e4, top=1e6)\n",
+    "plt.ylim(top=2e9)\n",
     "plt.xlabel(\"PFCandidate $\\eta$\")\n",
-    "plt.savefig(\"eta_true.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6f9414f8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_distribution(\"pred\", np.linspace(-5,5,61), \"eta\", \"MLPF\", CLASS_NAMES_CMS[1:])\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(bottom=1e4, top=1e6)\n",
-    "plt.xlabel(\"MLPFCandidate $\\eta$\")\n",
-    "plt.savefig(\"eta_pred.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\"eta_true_vs_pred.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -309,7 +327,8 @@
     "for icls in range(1,8):\n",
     "    npred = np.sum(yvals[\"pred_cls_id\"] == icls, axis=1)\n",
     "    ncand = np.sum(yvals[\"true_cls_id\"] == icls, axis=1)\n",
-    "    plt.figure(figsize=(6,6))\n",
+    "    plt.figure(figsize=(7,7))\n",
+    "    ax = plt.axes()\n",
     "    plt.scatter(ncand, npred, marker=\".\", alpha=0.8)\n",
     "    a = 0.5*min(np.min(npred), np.min(ncand))\n",
     "    b = 1.5*max(np.max(npred), np.max(ncand))\n",
@@ -319,9 +338,10 @@
     "    plt.title(CLASS_NAMES_CMS[icls],y=1.05)\n",
     "    plt.xlabel(\"number of PFCandidates\")\n",
     "    plt.ylabel(\"number of MLPFCandidates\")\n",
-    "    cms_label(x2=0.6, y=0.89)\n",
+    "    cms_label(ax)\n",
+    "    sample_label(ax)\n",
     "    plt.savefig(\"num_cls{}.pdf\".format(icls), bbox_inches=\"tight\")\n",
-    "    plt.savefig(\"num_cls{}.png\".format(icls), bbox_inches=\"tight\", dpi=300)\n"
+    "    plt.savefig(\"num_cls{}.png\".format(icls), bbox_inches=\"tight\", dpi=300)"
    ]
   },
   {
@@ -509,9 +529,9 @@
     "        plt.plot(test_smooth, color=p1[0].get_color(), lw=2, label=\"test\")\n",
     "    \n",
     "    plt.ylim(test[-1]*(1.0-margin), test[-1]*(1.0+margin))\n",
-    "    plt.legend(loc=\"best\", frameon=False)\n",
+    "    plt.legend(loc=3, frameon=False)\n",
     "    plt.xlabel(\"epoch\")\n",
-    "    cms_label(x1=0.18)"
+    "    cms_label(ax,y=0.95)"
    ]
   },
   {
@@ -586,44 +606,29 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.figure(figsize=(8, 8))\n",
+    "plt.figure(figsize=(7,7))\n",
     "ax = plt.axes()\n",
     "plt.imshow(cm_norm, cmap=\"Blues\")\n",
     "plt.colorbar()\n",
     "\n",
-    "cms_label(x1=0.18, x2=0.52, y=0.82)\n",
+    "thresh = cm_norm.max() / 1.5\n",
+    "for i, j in itertools.product(range(cm_norm.shape[0]), range(cm_norm.shape[1])):\n",
+    "    plt.text(j, i, \"{:0.2f}\".format(cm_norm[i, j]),\n",
+    "             horizontalalignment=\"center\",\n",
+    "             color=\"white\" if cm_norm[i, j] > thresh else \"black\")\n",
+    "\n",
+    "cms_label(ax, x1=0.12)\n",
+    "#cms_label_sample_label(x1=0.18, x2=0.52, y=0.82)\n",
     "plt.xticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n",
     "plt.yticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n",
-    "plt.xlabel(\"Predicted PFCandidate\")\n",
-    "plt.ylabel(\"True PFCandidate\")\n",
-    "plt.title(\"MLPF trained on PF\", y=1.03)\n",
+    "plt.xlabel(\"MLPF candidate ID\")\n",
+    "plt.ylabel(\"PF candidate ID\")\n",
+    "plt.ylim(-0.5, 6.9)\n",
+    "plt.title(\"MLPF trained on PF\")\n",
     "plt.savefig(\"cm_normed.pdf\", bbox_inches=\"tight\")\n",
     "plt.savefig(\"cm_normed.png\", bbox_inches=\"tight\", dpi=300)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "prepared-fruit",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(8, 8))\n",
-    "ax = plt.axes()\n",
-    "plt.imshow(cm/np.sum(cm), cmap=\"Blues\")\n",
-    "plt.colorbar()\n",
-    "\n",
-    "cms_label(x1=0.18, x2=0.52, y=0.82)\n",
-    "#sample_label(ax, x=0.8, y=1.0)\n",
-    "plt.xticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n",
-    "plt.yticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n",
-    "plt.xlabel(\"Predicted PFCandidate\")\n",
-    "plt.ylabel(\"True PFCandidate\")\n",
-    "plt.title(\"MLPF trained on PF\", y=1.03)\n",
-    "plt.savefig(\"cm.pdf\", bbox_inches=\"tight\")\n",
-    "plt.savefig(\"cm.png\", bbox_inches=\"tight\", dpi=300)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -649,7 +654,7 @@
     "        plt.title(CLASS_NAMES_CMS[icls], y=1.05)\n",
     "        plt.xlabel(ivar)\n",
     "        plt.ylabel(\"Number of particles / bin\")\n",
-    "        cms_label(x1=0.2, x2=0.6)\n",
+    "        cms_label(ax)\n",
     "        plt.savefig(\"distribution_icls{}_{}.pdf\".format(icls, ivar))\n",
     "        plt.savefig(\"distribution_icls{}_{}.png\".format(icls, ivar), dpi=300)"
    ]
@@ -669,7 +674,7 @@
     "    maxval=3,\n",
     "    norm=matplotlib.colors.LogNorm()):\n",
     "    \n",
-    "    plt.figure(figsize=(6,5))\n",
+    "    plt.figure(figsize=(8,7))\n",
     "    ax = plt.axes()\n",
     "    \n",
     "    bins = np.linspace(minval, maxval, 100)\n",
@@ -693,8 +698,8 @@
     "    plt.plot([minval, maxval], [minval, maxval], color=\"black\", ls=\"--\", lw=0.5)\n",
     "    plt.xlim(minval, maxval)\n",
     "    plt.ylim(minval, maxval)\n",
-    "    cms_label(x1=0.2, x2=0.48)\n",
-    "    plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n",
+    "    cms_label(ax)\n",
+    "    plt.text(0.02, 0.94, particle_label, transform=ax.transAxes)\n",
     "    ax.set_xticks(ax.get_yticks());"
    ]
   },
@@ -761,7 +766,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_particle_regression(ivar=\"energy\", icls=3, particle_label=\"HF\", minval=0.0, maxval=4)\n",
+    "plot_particle_regression(ivar=\"energy\", icls=3, particle_label=\"HFEM\", minval=0.0, maxval=4)\n",
     "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n",
     "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n",
     "plt.savefig(\"energy_corr_cls3_log.pdf\", bbox_inches=\"tight\")\n",
@@ -775,7 +780,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_particle_regression(ivar=\"energy\", icls=4, particle_label=\"HF\", minval=0.0, maxval=4)\n",
+    "plot_particle_regression(ivar=\"energy\", icls=4, particle_label=\"HFHAD\", minval=0.0, maxval=4)\n",
     "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n",
     "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n",
     "plt.savefig(\"energy_corr_cls4_log.pdf\", bbox_inches=\"tight\")\n",
@@ -803,12 +808,34 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_particle_regression(ivar=\"energy\", icls=5, particle_label=\"gamma\", minval=0.0, maxval=4)\n",
+    "plot_particle_regression(ivar=\"energy\", icls=6, particle_label=\"e\", minval=0.0, maxval=4)\n",
     "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n",
     "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n",
-    "plt.savefig(\"energy_corr_cls5_log.pdf\", bbox_inches=\"tight\")\n",
-    "plt.savefig(\"energy_corr_cls5_log.png\", bbox_inches=\"tight\", dpi=300)"
+    "plt.savefig(\"energy_corr_cls6_log.pdf\", bbox_inches=\"tight\")\n",
+    "plt.savefig(\"energy_corr_cls6_log.png\", bbox_inches=\"tight\", dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51617bae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_particle_regression(ivar=\"energy\", icls=7, particle_label=\"mu\", minval=0.0, maxval=4)\n",
+    "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n",
+    "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n",
+    "plt.savefig(\"energy_corr_cls7_log.pdf\", bbox_inches=\"tight\")\n",
+    "plt.savefig(\"energy_corr_cls7_log.png\", bbox_inches=\"tight\", dpi=300)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17609f40",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/notebooks/cmssw.ipynb b/notebooks/cmssw.ipynb
index c52a4a9d2..eeec151df 100644
--- a/notebooks/cmssw.ipynb
+++ b/notebooks/cmssw.ipynb
@@ -9,56 +9,54 @@
    "source": [
     "import pickle\n",
     "import numpy as np\n",
-    "import mplhep\n",
     "import awkward\n",
     "import matplotlib.pyplot as plt\n",
     "import matplotlib.patches as mpatches\n",
     "\n",
     "import uproot\n",
-    "import boost_histogram as bh\n"
+    "import boost_histogram as bh\n",
+    "import mplhep\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "aa92c191",
+   "id": "4f940835",
    "metadata": {},
    "outputs": [],
    "source": [
-    "physics_process = \"qcd\"\n",
+    "CMS_PF_CLASS_NAMES = [\"none\" \"charged hadron\", \"neutral hadron\", \"hfem\", \"hfhad\", \"photon\", \"electron\", \"muon\"]\n",
     "\n",
-    "data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/out.pkl\", \"rb\")))\n",
-    "data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/out.pkl\", \"rb\")))\n",
+    "ELEM_LABELS_CMS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]\n",
+    "ELEM_NAMES_CMS = [\"NONE\", \"TRACK\", \"PS1\", \"PS2\", \"ECAL\", \"HCAL\", \"GSF\", \"BREM\", \"HFEM\", \"HFHAD\", \"SC\", \"HO\"]\n",
     "\n",
-    "fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n",
-    "fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "extreme-exhibit",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "physics_process = \"ttbar\"\n",
-    "data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/out.pkl\", \"rb\")))\n",
-    "data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/out.pkl\", \"rb\")))\n",
+    "CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]\n",
+    "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"$\\gamma$\", \"$e^\\pm$\", \"$\\mu^\\pm$\"]\n",
     "\n",
-    "fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n",
-    "fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")"
+    "class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "excited-shepherd",
+   "id": "aa92c191",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# physics_process = \"singlepi\"\n",
-    "# data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_11_3_0_pre2/11688.0_mlpf/out.pkl\", \"rb\")))\n",
-    "# data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_11_3_0_pre2/11688.0_baseline/out.pkl\", \"rb\")))"
+    "physics_process = \"ttbar\" #\"ttbar\", \"qcd\"\n",
+    "\n",
+    "if physics_process == \"qcd\":\n",
+    "    data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/out.pkl\", \"rb\")))\n",
+    "    data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/out.pkl\", \"rb\")))\n",
+    "\n",
+    "    fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n",
+    "    fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n",
+    "elif physics_process == \"ttbar\":\n",
+    "    data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/out.pkl\", \"rb\")))\n",
+    "    data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/out.pkl\", \"rb\")))\n",
+    "\n",
+    "    fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n",
+    "    fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")"
    ]
   },
   {
@@ -68,489 +66,307 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n",
-    "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n",
-    "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n",
-    "    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=True, horizontalalignment='left', fontsize=10)\n",
+    "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n",
+    "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n",
+    "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n",
+    "    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n",
+    "    \n",
+    "def sample_label(ax, physics_process=physics_process, x=0.01, y=0.93):\n",
+    "    plt.text(x, y, physics_process_str[physics_process], ha=\"left\", size=10, transform=ax.transAxes)\n",
     "    \n",
     "physics_process_str = {\n",
     "    \"ttbar\": \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\",\n",
     "    \"singlepi\": \"single $\\pi^{\\pm}$ events\",\n",
-    "    \"qcd\": \"QCD\",\n",
-    "}\n",
-    "\n",
-    "def sample_label(ax, x=0.03, y=0.98, additional_text=\"\", physics_process=physics_process):\n",
-    "    plt.text(x, y,\n",
-    "        physics_process_str[physics_process]+additional_text,\n",
-    "    va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n"
+    "    \"qcd\": \"QCD events\",\n",
+    "}"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "speaking-contents",
+   "id": "ccfdab54",
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.figure(figsize=(5, 5))\n",
-    "ax = plt.axes()\n",
+    "def plot_candidates_pf_vs_mlpf(variable, varname, bins):\n",
+    "    plt.figure(figsize=(12,12))\n",
+    "    ax = plt.axes()\n",
     "\n",
-    "bins = np.linspace(0, 500, 61)\n",
-    "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"pt\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n",
-    "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"pt\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(top=1e5)\n",
-    "cms_label()\n",
-    "sample_label(ax, x=0.02)\n",
-    "plt.xlabel(\"ak4PFJetsCHS $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"Number of jets\")\n",
-    "plt.legend(loc=\"best\")\n",
+    "    hists_baseline = []\n",
+    "    hists_mlpf = []\n",
+    "    iplot = 1\n",
+    "    for pid in [13,11,22,1,2,130,211]:\n",
+    "        msk1 = np.abs(data_baseline[\"particleFlow\"][\"pdgId\"]) == pid\n",
+    "        msk2 = np.abs(data_mlpf[\"particleFlow\"][\"pdgId\"]) == pid\n",
     "\n",
-    "plt.savefig(\"ak4jet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "formed-single",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
+    "        d1 = awkward.flatten(data_baseline[\"particleFlow\"][variable][msk1])\n",
+    "        d2 = awkward.flatten(data_mlpf[\"particleFlow\"][variable][msk2])\n",
+    "            \n",
+    "        h1 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "        h1.fill(d1)\n",
+    "        h2 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "        h2.fill(d2)\n",
+    "        \n",
+    "        ax = plt.subplot(3,3,iplot)\n",
+    "        plt.sca(ax)\n",
     "\n",
-    "bins = np.linspace(0, 2500, 61)\n",
-    "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"energy\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n",
-    "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"energy\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(top=1e5)\n",
-    "cms_label()\n",
-    "sample_label(ax, x=0.02)\n",
+    "        mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n",
+    "        mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n",
+    "        \n",
+    "        if variable!=\"eta\":\n",
+    "            plt.yscale(\"log\")\n",
     "\n",
-    "plt.xlabel(\"ak4PFJetsCHS $E$ [GeV]\")\n",
-    "plt.ylabel(\"Number of jets\")\n",
-    "plt.legend(loc=\"best\")\n",
+    "        plt.legend(loc=\"best\", frameon=False, title=class_names[pid])\n",
+    "        plt.xlabel(varname)\n",
+    "        plt.ylabel(\"Number of particles / bin\")\n",
+    "        sample_label(ax, x=0.08)\n",
     "\n",
-    "plt.savefig(\"ak4jet_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "nonprofit-polish",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
+    "        iplot += 1\n",
+    "        \n",
+    "        hists_baseline.append(h1)\n",
+    "        hists_mlpf.append(h2)\n",
+    "    plt.tight_layout()\n",
+    "    return hists_baseline, hists_mlpf\n",
     "\n",
-    "bins = np.linspace(-6, 6, 101)\n",
-    "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"eta\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n",
-    "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"eta\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n",
-    "#plt.yscale(\"log\")\n",
-    "cms_label()\n",
-    "sample_label(ax)\n",
-    "plt.ylim(top=2000)\n",
-    "plt.xlabel(\"ak4PFJetsCHS $\\eta$\")\n",
-    "plt.ylabel(\"Number of jets\")\n",
-    "plt.legend(loc=\"best\")\n",
+    "def plot_candidates_pf_vs_mlpf_single(hists):\n",
+    "    plt.figure(figsize=(7, 7))\n",
+    "    ax = plt.axes()\n",
+    "    v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists[0]], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n",
+    "    v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists[1]], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n",
     "\n",
-    "plt.savefig(\"ak4jet_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
+    "    legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n",
+    "    legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n",
+    "    plt.gca().add_artist(legend1)\n",
+    "    plt.ylabel(\"Total number of particles / bin\")\n",
+    "    cms_label(ax)\n",
+    "    sample_label(ax)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "center-heath",
+   "id": "a70c3657",
    "metadata": {},
    "outputs": [],
    "source": [
-    "color_map = {\n",
-    "    1: \"red\",\n",
-    "    2: \"blue\",\n",
-    "    11: \"orange\",\n",
-    "    22: \"cyan\",\n",
-    "    13: \"purple\",\n",
-    "    130: \"green\",\n",
-    "    211: \"black\"\n",
-    "}\n",
-    "\n",
-    "particle_labels = {\n",
-    "    1: \"HFEM\",\n",
-    "    2: \"HFHAD\",\n",
-    "    11: \"$e^\\pm$\",\n",
-    "    22: \"$\\gamma$\",\n",
-    "    13: \"$\\mu$\",\n",
-    "    130: \"neutral hadron\",\n",
-    "    211: \"charged hadron\"\n",
-    "    \n",
-    "}"
+    "hists = plot_candidates_pf_vs_mlpf(\"pt\", \"PFCandidate $p_T$ [GeV]\", np.linspace(0,200,101))\n",
+    "# plt.savefig(\"candidates_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
+    "# plt.savefig(\"candidates_pt_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "sufficient-medication",
+   "id": "1f85a942",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def draw_event(iev):\n",
-    "    pt_0 = data_mlpf[\"particleFlow\"][\"pt\"][iev]\n",
-    "    energy_0 = data_mlpf[\"particleFlow\"][\"energy\"][iev]\n",
-    "    eta_0 = data_mlpf[\"particleFlow\"][\"eta\"][iev]\n",
-    "    phi_0 = data_mlpf[\"particleFlow\"][\"phi\"][iev]\n",
-    "    pdgid_0 = np.abs(data_mlpf[\"particleFlow\"][\"pdgId\"][iev])\n",
-    "    \n",
-    "    pt_1 = data_baseline[\"particleFlow\"][\"pt\"][iev]\n",
-    "    energy_1 = data_baseline[\"particleFlow\"][\"energy\"][iev]\n",
-    "    eta_1 = data_baseline[\"particleFlow\"][\"eta\"][iev]\n",
-    "    phi_1 = data_baseline[\"particleFlow\"][\"phi\"][iev]\n",
-    "    pdgid_1 = np.abs(data_baseline[\"particleFlow\"][\"pdgId\"][iev])\n",
-    "    \n",
-    "    plt.figure(figsize=(5, 5))\n",
-    "    ax = plt.axes()\n",
-    "    plt.scatter(eta_0, phi_0, marker=\".\", s=energy_0, c=[color_map[p] for p in pdgid_0], alpha=0.6)\n",
-    "\n",
-    "    pids = [211,130,1,2,22,11,13]\n",
-    "    for p in pids:\n",
-    "        plt.plot([], [], color=color_map[p], lw=0, marker=\"o\", label=particle_labels[p])\n",
-    "    plt.legend(loc=8, frameon=False, ncol=3, fontsize=8)\n",
+    "plot_candidates_pf_vs_mlpf_single(hists)\n",
+    "plt.xlabel(\"PFCandidate $p_T$ [GeV]\")\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e7)\n",
     "\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    plt.xlim(-6,6)\n",
-    "    plt.ylim(-5,4)\n",
-    "    plt.xlabel(\"PFCandidate $\\eta$\")\n",
-    "    plt.ylabel(\"PFCandidate $\\phi$\")\n",
-    "    plt.title(\"MLPF (trained on PF), CMSSW-ONNX inference\", y=1.05)\n",
-    "    plt.savefig(\"event_mlpf_{}_iev{}.pdf\".format(physics_process, iev), bbox_inches=\"tight\")\n",
-    "    plt.savefig(\"event_mlpf_{}_iev{}.png\".format(physics_process, iev), bbox_inches=\"tight\", dpi=300)\n",
-    "    \n",
-    "    plt.figure(figsize=(5, 5))\n",
-    "    ax = plt.axes()\n",
-    "    plt.scatter(eta_1, phi_1, marker=\".\", s=energy_1, c=[color_map[p] for p in pdgid_1], alpha=0.6)\n",
-    "#     plt.scatter(\n",
-    "#         data_baseline[\"ak4PFJetsCHS\"][\"eta\"][iev],\n",
-    "#         data_baseline[\"ak4PFJetsCHS\"][\"phi\"][iev],\n",
-    "#         s=data_baseline[\"ak4PFJetsCHS\"][\"energy\"][iev], color=\"gray\", alpha=0.3\n",
-    "#     )\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    plt.xlim(-6,6)\n",
-    "    plt.ylim(-5,4)\n",
-    "    plt.xlabel(\"PFCandidate $\\eta$\")\n",
-    "    plt.ylabel(\"PFCandidate $\\phi$\")\n",
-    "    plt.title(\"Standard PF, CMSSW\", y=1.05)\n",
-    "    \n",
-    "    pids = [211,130,1,2,22,11,13]\n",
-    "    for p in pids:\n",
-    "        plt.plot([], [], color=color_map[p], lw=0, marker=\"o\", label=particle_labels[p])\n",
-    "    plt.legend(loc=8, frameon=False, ncol=3, fontsize=8)\n",
-    "    \n",
-    "    plt.savefig(\"event_pf_{}_iev{}.pdf\".format(physics_process, iev), bbox_inches=\"tight\")\n",
-    "    plt.savefig(\"event_pf_{}_iev{}.png\".format(physics_process, iev), bbox_inches=\"tight\", dpi=300)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "comfortable-albert",
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "draw_event(0)"
+    "plt.savefig(\"candidates_pt_single_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
+    "plt.savefig(\"candidates_pt_single_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "accompanied-wichita",
+   "id": "ae617dff",
    "metadata": {},
    "outputs": [],
    "source": [
-    "draw_event(1)"
+    "hists = plot_candidates_pf_vs_mlpf(\"eta\", \"PFCandidate $\\eta$\", np.linspace(-6, 6,101))\n",
+    "plt.savefig(\"candidates_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
+    "plt.savefig(\"candidates_eta_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "curious-lover",
-   "metadata": {
-    "scrolled": false
-   },
+   "id": "e7a574d5",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "draw_event(2)"
+    "plot_candidates_pf_vs_mlpf_single(hists)\n",
+    "plt.xlabel(\"PFCandidate $\\eta$\")\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e8)\n",
+    "plt.savefig(\"candidates_eta_single_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
+    "plt.savefig(\"candidates_ete_single_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3fe02a91",
+   "id": "d756ef80",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def plot_dqm(key, title, rebin=None):\n",
-    "    h1 = fi1.get(key).to_boost()\n",
-    "    h2 = fi2.get(key).to_boost()\n",
+    "def plot_pf_vs_mlpf_jet(jetcoll, variable, bins):\n",
+    "    plt.figure(figsize=(7,7))\n",
+    "    ax = plt.axes()\n",
     "\n",
-    "    fig, (ax1, ax2) = plt.subplots(2, 1)\n",
-    "    plt.sca(ax1)\n",
-    "    if rebin:\n",
-    "        h1 = h1[bh.rebin(rebin)]\n",
-    "        h2 = h2[bh.rebin(rebin)]\n",
-    "        \n",
-    "    mplhep.histplot(h1, yerr=0, label=\"PF\");\n",
-    "    mplhep.histplot(h2, yerr=0, label=\"MLPF\");\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.ylabel(\"Number of particles / bin\")\n",
-    "    sample_label(ax=ax1, additional_text=\", \"+title, physics_process=physics_process)\n",
+    "    h1 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "    h1.fill(awkward.flatten(data_baseline[jetcoll][variable]))\n",
     "\n",
-    "    plt.sca(ax2)\n",
-    "    ratio_hist = h2/h1\n",
-    "    vals_y = ratio_hist.values()\n",
-    "    vals_y[np.isnan(vals_y)] = 0\n",
-    "    plt.plot(ratio_hist.axes[0].centers, vals_y, color=\"gray\", lw=0, marker=\".\")\n",
-    "    plt.ylim(0,2)\n",
-    "    plt.axhline(1.0, color=\"black\", ls=\"--\")\n",
-    "    plt.ylabel(\"MLPF / PF\")\n",
-    "    \n",
-    "    return ax1, ax2\n",
-    "    \n",
-    "#plt.xscale(\"log\")\n",
-    "#plt.yscale(\"log\")\n",
+    "    h2 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "    h2.fill(awkward.flatten(data_mlpf[jetcoll][variable]))\n",
     "\n",
-    "log10_pt = \"$\\log_{10}[p_T/\\mathrm{GeV}]$\"\n",
-    "eta = \"$\\eta$\"\n",
-    "\n",
-    "dqm_plots_ptcl = [\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/chargedHadron/chargedHadronLog10Pt\",\n",
-    "    \"ch.had.\", log10_pt, \"ch_had_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/chargedHadron/chargedHadronEta\",\n",
-    "    \"ch.had.\", eta, \"ch_had_eta\"),\n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronLog10Pt\",\n",
-    "    \"n.had.\", log10_pt, \"n_had_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronPtLow\",\n",
-    "    \"n.had.\", \"$p_T$ [GeV]\", \"n_had_ptlow\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronPtMid\",\n",
-    "    \"n.had.\", \"$p_T$ [GeV]\", \"n_had_ptmid\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronEta\",\n",
-    "    \"n.had.\", eta, \"n_had_eta\"),\n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_hadron/HF_hadronLog10Pt\",\n",
-    "     \"HFHAD\", log10_pt, \"hfhad_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_hadron/HF_hadronEta\",\n",
-    "     \"HFHAD\", eta, \"hfhad_eta\"),\n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_EM_particle/HF_EM_particleLog10Pt\",\n",
-    "     \"HFEM\", log10_pt, \"hfem_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_EM_particle/HF_EM_particleEta\",\n",
-    "     \"HFEM\", eta, \"hfem_eta\"),\n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/photon/photonLog10Pt\",\n",
-    "    \"photon\", log10_pt, \"photon_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/photon/photonEta\",\n",
-    "    \"photon\", eta, \"photon_eta\"),\n",
-    "    \n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/electron/electronLog10Pt\",\n",
-    "    \"electron\", log10_pt, \"electron_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/electron/electronEta\",\n",
-    "    \"electron\", eta, \"electron_eta\"),    \n",
-    "    \n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/muon/muonLog10Pt\",\n",
-    "    \"muon\", log10_pt, \"muon_logpt\"),\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/muon/muonEta\",\n",
-    "    \"muon\", eta, \"muon_eta\"),\n",
-    "]\n",
+    "    mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n",
+    "    mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n",
+    "    cms_label(ax)\n",
+    "    sample_label(ax, x=0.02)\n",
     "\n",
-    "dqm_plots_jetres = [\n",
-    "    (\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/mean_delta_et_Over_et_VS_et_\",\n",
-    "    \"jets\", \"gen-jet $E_t$\", \"$\\Delta E_t / E_t$\"),\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "348eb94c",
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "for key, title, xlabel, plot_label in dqm_plots_ptcl:\n",
-    "    rh = plot_dqm(key, title)\n",
-    "    plt.xlabel(xlabel)\n",
-    "    cms_label()\n",
-    "    plt.savefig(\"dqm_{}_{}.pdf\".format(plot_label, physics_process), bbox_inches=\"tight\")\n",
-    "    plt.savefig(\"dqm_{}_{}.png\".format(plot_label, physics_process), bbox_inches=\"tight\", dpi=300)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "79d3fdfe",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/Cleanedak4PFJetsCHS/Pt\", \"ak4PFCHS jets\")\n",
-    "ax2.set_xlabel(\"jet $p_t$ [GeV]\")\n",
-    "ax1.set_ylabel(\"number of jets / bin\")\n",
-    "#plt.xscale(\"log\")\n",
-    "#plt.ylim(bottom=1, top=1e4)\n",
-    "ax1.set_yscale(\"log\")\n",
-    "ax1.set_ylim(bottom=1, top=1e5)\n",
-    "#ax2.set_ylim(0,5)\n",
+    "    plt.ylabel(\"Number of jets\")\n",
+    "    plt.legend(loc=(0.8, 0.85), frameon=False)\n",
     "\n",
-    "cms_label()\n",
-    "plt.savefig(\"dqm_jet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
-    "plt.savefig(\"dqm_jet_pt_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)"
+    "    plt.savefig(\"ak4jet_puppi_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "67ee87dc",
+   "id": "a538baca",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Pt\", \"ak4PFPuppi jets\")\n",
-    "ax2.set_xlabel(\"jet $p_t$ [GeV]\")\n",
-    "ax1.set_ylabel(\"number of jets / bin\")\n",
-    "#plt.xscale(\"log\")\n",
-    "#plt.ylim(bottom=1, top=1e4)\n",
-    "ax1.set_yscale(\"log\")\n",
-    "ax1.set_ylim(bottom=1, top=1e5)\n",
-    "#ax2.set_ylim(0,5)\n",
-    "\n",
-    "cms_label()\n",
-    "plt.savefig(\"dqm_jet_pt_puppi_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
-    "plt.savefig(\"dqm_jet_pt_puppi_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"pt\", np.linspace(0,500,61))\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e5)\n",
+    "plt.xlabel(\"ak4PFJetsCHS $p_T$ [GeV]\")\n",
+    "plt.savefig(\"ak4jet_chs_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1665549f",
+   "id": "23271180",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/Cleanedak4PFJetsCHS/Eta\", \"ak4PFCHS jets\")\n",
-    "ax2.set_xlabel(\"jet $\\eta$\")\n",
-    "ax1.set_ylabel(\"number of jets / bin\")\n",
-    "#plt.xscale(\"log\")\n",
-    "#plt.ylim(bottom=1, top=1e4)\n",
-    "#ax1.set_yscale(\"log\")\n",
-    "ax1.set_ylim(bottom=0, top=1e3)\n",
-    "#ax2.set_ylim(0,5)\n",
-    "\n",
-    "cms_label()\n",
-    "plt.savefig(\"dqm_jet_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
-    "plt.savefig(\"dqm_jet_eta_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"pt\", np.linspace(0,500,61))\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e5)\n",
+    "plt.xlabel(\"ak4PFJetsPuppi $p_T$ [GeV]\")\n",
+    "plt.savefig(\"ak4jet_puppi_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7f320f04",
+   "id": "e30d3355",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Eta\", \"ak4PFPuppi jets\")\n",
-    "ax2.set_xlabel(\"jet $\\eta$\")\n",
-    "ax1.set_ylabel(\"number of jets / bin\")\n",
-    "#plt.xscale(\"log\")\n",
-    "#plt.ylim(bottom=1, top=1e4)\n",
-    "#ax1.set_yscale(\"log\")\n",
-    "#ax1.set_ylim(bottom=0, top=20)\n",
-    "#ax2.set_ylim(0,5)\n",
-    "\n",
-    "cms_label()\n",
-    "plt.savefig(\"dqm_jet_eta_puppi_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
-    "plt.savefig(\"dqm_jet_eta_puppi_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"eta\", np.linspace(-6, 6, 61))\n",
+    "plt.ylim(0,10000)\n",
+    "plt.xlabel(\"ak4PFJetsCHS $\\eta$\")\n",
+    "plt.savefig(\"ak4jet_chs_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7908e453",
+   "id": "2e220a66",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/mean_delta_et_Over_et_VS_et_\", \"AK4 PF jets\")\n",
-    "# plt.xlabel(\"gen-jet $E_t$ [GeV]\")\n",
-    "# plt.ylabel(\"profiled $\\mu(\\Delta E_t / E_t$)\")\n",
-    "# plt.xscale(\"log\")\n",
-    "# plt.ylim(0,3)\n",
-    "# cms_label()\n",
-    "# plt.savefig(\"dqm_jet_mean_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"eta\", np.linspace(-6, 6, 61))\n",
+    "plt.ylim(0,2000)\n",
+    "plt.xlabel(\"ak4PFJetsPuppi $\\eta$\")\n",
+    "plt.savefig(\"ak4jet_puppi_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d3bdc5b1",
+   "id": "10d7a3d8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/sigma_delta_et_Over_et_VS_et_\", \"AK4 PF jets\")\n",
-    "# plt.xlabel(\"gen-jet $E_t$ [GeV]\")\n",
-    "# plt.ylabel(\"profiled $\\sigma(\\Delta E_t / E_t)$\")\n",
-    "# plt.xscale(\"log\")\n",
-    "# plt.ylim(0,10)\n",
-    "# cms_label()\n",
-    "# plt.savefig(\"dqm_jet_sigma_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"energy\", np.linspace(0,2500,61))\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e5)\n",
+    "plt.xlabel(\"ak4PFJetsCHS $E$ [GeV]\")\n",
+    "plt.savefig(\"ak4jet_chs_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "545c5575",
+   "id": "7dfa0975",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/METValidation/pfMet/MET\", \"PFMET\", rebin=1)\n",
-    "ax2.set_xlabel(\"$\\sum E_t$ [GeV]\")\n",
-    "ax1.set_ylabel(\"number of events / bin\")\n",
-    "#ax1.set_xscale(\"log\")\n",
-    "ax1.set_ylim(bottom=1, top=1000)\n",
-    "ax1.set_yscale(\"log\")\n",
-    "plt.savefig(\"dqm_met_sumet_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n",
-    "plt.savefig(\"dqm_met_sumet_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)"
+    "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"energy\", np.linspace(0,2500,61))\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e5)\n",
+    "plt.xlabel(\"ak4PFJetsPuppi $E$ [GeV]\")\n",
+    "plt.savefig(\"ak4jet_puppi_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cb82fc75",
+   "id": "674700b6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/profileRMS_delta_et_Over_et_VS_et_\", \"PFMET\")\n",
-    "# plt.xlabel(\"gen-MET $E_t$ [GeV]\")\n",
-    "# plt.ylabel(\"profiled RMS $\\Delta E_t / E_t$\")\n",
-    "# plt.xscale(\"log\")\n",
-    "# plt.ylim(0,3)\n",
-    "# cms_label()\n",
-    "# plt.savefig(\"dqm_met_profileRMS_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")"
+    "plt.figure(figsize=(7,7))\n",
+    "ax = plt.axes()\n",
+    "\n",
+    "bins = np.linspace(0, 500, 41)\n",
+    "\n",
+    "h1 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "h1.fill(awkward.flatten(data_baseline[\"pfMet\"][\"pt\"]))\n",
+    "\n",
+    "h2 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "h2.fill(awkward.flatten(data_mlpf[\"pfMet\"][\"pt\"]))\n",
+    "\n",
+    "mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n",
+    "mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e3)\n",
+    "cms_label(ax)\n",
+    "sample_label(ax, x=0.02)\n",
+    "plt.xlabel(\"pfMet $p_T$ [GeV]\")\n",
+    "plt.ylabel(\"Number of events\")\n",
+    "plt.legend(loc=(0.8, 0.85), frameon=False)\n",
+    "\n",
+    "plt.savefig(\"pfmet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "74a0e050",
+   "id": "114f3670",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/profile_delta_et_VS_et_\", \"PFMET\")\n",
-    "# plt.xlabel(\"gen-MET $E_t$ [GeV]\")\n",
-    "# plt.ylabel(\"profiled $\\Delta E_t$ [GeV]\")\n",
-    "# plt.xscale(\"log\")\n",
-    "# plt.ylim(0, 80)\n",
-    "# cms_label()\n",
-    "# plt.savefig(\"dqm_met_delta_et_VS_et.pdf\", bbox_inches=\"tight\")"
+    "plt.figure(figsize=(7,7))\n",
+    "ax = plt.axes()\n",
+    "\n",
+    "bins = np.linspace(0, 500, 41)\n",
+    "\n",
+    "h1 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "h1.fill(awkward.flatten(data_baseline[\"pfMetPuppi\"][\"pt\"]))\n",
+    "\n",
+    "h2 = bh.Histogram(bh.axis.Variable(bins))\n",
+    "h2.fill(awkward.flatten(data_mlpf[\"pfMetPuppi\"][\"pt\"]))\n",
+    "\n",
+    "mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n",
+    "mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n",
+    "plt.yscale(\"log\")\n",
+    "plt.ylim(top=1e3)\n",
+    "cms_label(ax)\n",
+    "sample_label(ax, x=0.02)\n",
+    "plt.xlabel(\"pfMet PUPPI $p_T$ [GeV]\")\n",
+    "plt.ylabel(\"Number of events\")\n",
+    "plt.legend(loc=(0.8, 0.85), frameon=False)\n",
+    "\n",
+    "plt.savefig(\"pfmet_puppi_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")"
    ]
   },
   {
@@ -561,81 +377,81 @@
    "outputs": [],
    "source": [
     "timing_output = \"\"\"\n",
-    "Nelem=1600 mean_time=5.92 ms stddev_time=5.03 ms mem_used=1018 MB\n",
-    "Nelem=1920 mean_time=6.57 ms stddev_time=1.01 ms mem_used=1110 MB\n",
-    "Nelem=2240 mean_time=6.92 ms stddev_time=0.81 ms mem_used=1127 MB\n",
-    "Nelem=2560 mean_time=7.37 ms stddev_time=0.66 ms mem_used=1136 MB\n",
-    "Nelem=2880 mean_time=8.17 ms stddev_time=0.56 ms mem_used=1123 MB\n",
-    "Nelem=3200 mean_time=8.88 ms stddev_time=1.09 ms mem_used=1121 MB\n",
-    "Nelem=3520 mean_time=9.51 ms stddev_time=0.65 ms mem_used=1121 MB\n",
-    "Nelem=3840 mean_time=10.48 ms stddev_time=0.93 ms mem_used=1255 MB\n",
-    "Nelem=4160 mean_time=11.05 ms stddev_time=0.87 ms mem_used=1255 MB\n",
-    "Nelem=4480 mean_time=12.07 ms stddev_time=0.81 ms mem_used=1230 MB\n",
-    "Nelem=4800 mean_time=12.92 ms stddev_time=0.89 ms mem_used=1230 MB\n",
-    "Nelem=5120 mean_time=13.44 ms stddev_time=0.75 ms mem_used=1230 MB\n",
-    "Nelem=5440 mean_time=14.07 ms stddev_time=0.78 ms mem_used=1230 MB\n",
-    "Nelem=5760 mean_time=15.00 ms stddev_time=0.84 ms mem_used=1230 MB\n",
-    "Nelem=6080 mean_time=15.74 ms stddev_time=1.05 ms mem_used=1230 MB\n",
-    "Nelem=6400 mean_time=16.32 ms stddev_time=1.30 ms mem_used=1230 MB\n",
-    "Nelem=6720 mean_time=17.24 ms stddev_time=0.99 ms mem_used=1230 MB\n",
-    "Nelem=7040 mean_time=17.74 ms stddev_time=0.85 ms mem_used=1230 MB\n",
-    "Nelem=7360 mean_time=18.59 ms stddev_time=1.04 ms mem_used=1230 MB\n",
-    "Nelem=7680 mean_time=19.33 ms stddev_time=0.93 ms mem_used=1499 MB\n",
-    "Nelem=8000 mean_time=20.00 ms stddev_time=1.06 ms mem_used=1499 MB\n",
-    "Nelem=8320 mean_time=20.55 ms stddev_time=1.13 ms mem_used=1499 MB\n",
-    "Nelem=8640 mean_time=21.10 ms stddev_time=0.90 ms mem_used=1499 MB\n",
-    "Nelem=8960 mean_time=22.88 ms stddev_time=1.24 ms mem_used=1499 MB\n",
-    "Nelem=9280 mean_time=23.44 ms stddev_time=1.14 ms mem_used=1499 MB\n",
-    "Nelem=9600 mean_time=23.93 ms stddev_time=1.04 ms mem_used=1499 MB\n",
-    "Nelem=9920 mean_time=24.75 ms stddev_time=0.91 ms mem_used=1499 MB\n",
-    "Nelem=10240 mean_time=25.47 ms stddev_time=1.33 ms mem_used=1499 MB\n",
-    "Nelem=10560 mean_time=26.29 ms stddev_time=1.33 ms mem_used=1499 MB\n",
-    "Nelem=10880 mean_time=26.72 ms stddev_time=1.18 ms mem_used=1490 MB\n",
-    "Nelem=11200 mean_time=29.50 ms stddev_time=2.60 ms mem_used=1502 MB\n",
-    "Nelem=11520 mean_time=28.50 ms stddev_time=0.91 ms mem_used=1491 MB\n",
-    "Nelem=11840 mean_time=29.11 ms stddev_time=1.14 ms mem_used=1491 MB\n",
-    "Nelem=12160 mean_time=30.01 ms stddev_time=1.15 ms mem_used=1499 MB\n",
-    "Nelem=12480 mean_time=30.55 ms stddev_time=0.94 ms mem_used=1499 MB\n",
-    "Nelem=12800 mean_time=31.31 ms stddev_time=1.08 ms mem_used=1499 MB\n",
-    "Nelem=13120 mean_time=32.61 ms stddev_time=1.19 ms mem_used=1499 MB\n",
-    "Nelem=13440 mean_time=33.37 ms stddev_time=1.01 ms mem_used=1499 MB\n",
-    "Nelem=13760 mean_time=34.13 ms stddev_time=1.18 ms mem_used=1499 MB\n",
-    "Nelem=14080 mean_time=34.73 ms stddev_time=1.40 ms mem_used=1499 MB\n",
-    "Nelem=14400 mean_time=35.79 ms stddev_time=1.70 ms mem_used=2036 MB\n",
-    "Nelem=14720 mean_time=36.68 ms stddev_time=1.37 ms mem_used=2036 MB\n",
-    "Nelem=15040 mean_time=37.17 ms stddev_time=0.97 ms mem_used=2036 MB\n",
-    "Nelem=15360 mean_time=38.73 ms stddev_time=1.19 ms mem_used=2036 MB\n",
-    "Nelem=15680 mean_time=39.80 ms stddev_time=1.04 ms mem_used=2036 MB\n",
-    "Nelem=16000 mean_time=40.87 ms stddev_time=1.46 ms mem_used=1996 MB\n",
-    "Nelem=16320 mean_time=41.89 ms stddev_time=1.01 ms mem_used=1996 MB\n",
-    "Nelem=16640 mean_time=43.36 ms stddev_time=1.08 ms mem_used=1996 MB\n",
-    "Nelem=16960 mean_time=44.87 ms stddev_time=1.35 ms mem_used=1996 MB\n",
-    "Nelem=17280 mean_time=46.04 ms stddev_time=0.96 ms mem_used=1996 MB\n",
-    "Nelem=17600 mean_time=47.96 ms stddev_time=1.47 ms mem_used=1996 MB\n",
-    "Nelem=17920 mean_time=49.01 ms stddev_time=1.35 ms mem_used=1996 MB\n",
-    "Nelem=18240 mean_time=50.04 ms stddev_time=1.34 ms mem_used=1956 MB\n",
-    "Nelem=18560 mean_time=51.34 ms stddev_time=1.49 ms mem_used=1956 MB\n",
-    "Nelem=18880 mean_time=52.16 ms stddev_time=1.20 ms mem_used=1956 MB\n",
-    "Nelem=19200 mean_time=53.19 ms stddev_time=1.20 ms mem_used=1956 MB\n",
-    "Nelem=19520 mean_time=54.03 ms stddev_time=0.96 ms mem_used=1956 MB\n",
-    "Nelem=19840 mean_time=55.68 ms stddev_time=1.05 ms mem_used=1956 MB\n",
-    "Nelem=20160 mean_time=56.88 ms stddev_time=1.12 ms mem_used=1956 MB\n",
-    "Nelem=20480 mean_time=57.49 ms stddev_time=1.50 ms mem_used=1956 MB\n",
-    "Nelem=20800 mean_time=60.40 ms stddev_time=3.51 ms mem_used=1959 MB\n",
-    "Nelem=21120 mean_time=61.30 ms stddev_time=3.90 ms mem_used=1959 MB\n",
-    "Nelem=21440 mean_time=60.74 ms stddev_time=1.05 ms mem_used=1948 MB\n",
-    "Nelem=21760 mean_time=61.66 ms stddev_time=1.29 ms mem_used=1948 MB\n",
-    "Nelem=22080 mean_time=63.35 ms stddev_time=1.11 ms mem_used=1948 MB\n",
-    "Nelem=22400 mean_time=64.70 ms stddev_time=1.16 ms mem_used=1948 MB\n",
-    "Nelem=22720 mean_time=65.63 ms stddev_time=0.95 ms mem_used=1948 MB\n",
-    "Nelem=23040 mean_time=67.09 ms stddev_time=1.02 ms mem_used=1948 MB\n",
-    "Nelem=23360 mean_time=68.40 ms stddev_time=1.15 ms mem_used=1948 MB\n",
-    "Nelem=23680 mean_time=69.76 ms stddev_time=0.88 ms mem_used=1948 MB\n",
-    "Nelem=24000 mean_time=71.55 ms stddev_time=0.94 ms mem_used=1948 MB\n",
-    "Nelem=24320 mean_time=73.04 ms stddev_time=1.46 ms mem_used=1948 MB\n",
-    "Nelem=24640 mean_time=74.53 ms stddev_time=1.28 ms mem_used=1948 MB\n",
-    "Nelem=24960 mean_time=76.03 ms stddev_time=1.07 ms mem_used=1948 MB\n",
-    "Nelem=25280 mean_time=77.59 ms stddev_time=0.88 ms mem_used=1948 MB\n",
+    "Nelem=1600 mean_time=4.66 ms stddev_time=2.55 ms mem_used=711 MB\n",
+    "Nelem=1920 mean_time=4.74 ms stddev_time=0.52 ms mem_used=711 MB\n",
+    "Nelem=2240 mean_time=5.53 ms stddev_time=0.63 ms mem_used=711 MB\n",
+    "Nelem=2560 mean_time=5.88 ms stddev_time=0.52 ms mem_used=711 MB\n",
+    "Nelem=2880 mean_time=6.22 ms stddev_time=0.63 ms mem_used=745 MB\n",
+    "Nelem=3200 mean_time=6.50 ms stddev_time=0.64 ms mem_used=745 MB\n",
+    "Nelem=3520 mean_time=7.07 ms stddev_time=0.61 ms mem_used=745 MB\n",
+    "Nelem=3840 mean_time=7.53 ms stddev_time=0.68 ms mem_used=745 MB\n",
+    "Nelem=4160 mean_time=7.76 ms stddev_time=0.69 ms mem_used=745 MB\n",
+    "Nelem=4480 mean_time=8.66 ms stddev_time=0.72 ms mem_used=745 MB\n",
+    "Nelem=4800 mean_time=9.00 ms stddev_time=0.57 ms mem_used=745 MB\n",
+    "Nelem=5120 mean_time=9.22 ms stddev_time=0.84 ms mem_used=745 MB\n",
+    "Nelem=5440 mean_time=9.64 ms stddev_time=0.73 ms mem_used=812 MB\n",
+    "Nelem=5760 mean_time=10.39 ms stddev_time=1.06 ms mem_used=812 MB\n",
+    "Nelem=6080 mean_time=10.77 ms stddev_time=0.69 ms mem_used=812 MB\n",
+    "Nelem=6400 mean_time=11.33 ms stddev_time=0.75 ms mem_used=812 MB\n",
+    "Nelem=6720 mean_time=12.19 ms stddev_time=0.77 ms mem_used=812 MB\n",
+    "Nelem=7040 mean_time=12.54 ms stddev_time=0.72 ms mem_used=812 MB\n",
+    "Nelem=7360 mean_time=13.08 ms stddev_time=0.78 ms mem_used=812 MB\n",
+    "Nelem=7680 mean_time=13.71 ms stddev_time=0.81 ms mem_used=812 MB\n",
+    "Nelem=8000 mean_time=14.11 ms stddev_time=0.74 ms mem_used=812 MB\n",
+    "Nelem=8320 mean_time=14.85 ms stddev_time=0.86 ms mem_used=812 MB\n",
+    "Nelem=8640 mean_time=15.36 ms stddev_time=0.79 ms mem_used=812 MB\n",
+    "Nelem=8960 mean_time=16.76 ms stddev_time=1.06 ms mem_used=812 MB\n",
+    "Nelem=9280 mean_time=17.27 ms stddev_time=0.71 ms mem_used=812 MB\n",
+    "Nelem=9600 mean_time=17.97 ms stddev_time=0.85 ms mem_used=812 MB\n",
+    "Nelem=9920 mean_time=18.73 ms stddev_time=0.94 ms mem_used=812 MB\n",
+    "Nelem=10240 mean_time=19.26 ms stddev_time=0.89 ms mem_used=812 MB\n",
+    "Nelem=10560 mean_time=19.91 ms stddev_time=0.90 ms mem_used=946 MB\n",
+    "Nelem=10880 mean_time=20.55 ms stddev_time=0.87 ms mem_used=946 MB\n",
+    "Nelem=11200 mean_time=21.82 ms stddev_time=0.78 ms mem_used=940 MB\n",
+    "Nelem=11520 mean_time=22.48 ms stddev_time=0.75 ms mem_used=940 MB\n",
+    "Nelem=11840 mean_time=23.33 ms stddev_time=0.98 ms mem_used=940 MB\n",
+    "Nelem=12160 mean_time=24.28 ms stddev_time=0.85 ms mem_used=940 MB\n",
+    "Nelem=12480 mean_time=24.85 ms stddev_time=0.67 ms mem_used=940 MB\n",
+    "Nelem=12800 mean_time=25.58 ms stddev_time=0.68 ms mem_used=940 MB\n",
+    "Nelem=13120 mean_time=26.58 ms stddev_time=0.78 ms mem_used=940 MB\n",
+    "Nelem=13440 mean_time=27.15 ms stddev_time=0.63 ms mem_used=940 MB\n",
+    "Nelem=13760 mean_time=27.72 ms stddev_time=0.85 ms mem_used=940 MB\n",
+    "Nelem=14080 mean_time=28.08 ms stddev_time=0.66 ms mem_used=940 MB\n",
+    "Nelem=14400 mean_time=28.70 ms stddev_time=0.73 ms mem_used=940 MB\n",
+    "Nelem=14720 mean_time=29.22 ms stddev_time=0.66 ms mem_used=940 MB\n",
+    "Nelem=15040 mean_time=29.73 ms stddev_time=0.80 ms mem_used=940 MB\n",
+    "Nelem=15360 mean_time=30.71 ms stddev_time=0.85 ms mem_used=940 MB\n",
+    "Nelem=15680 mean_time=31.15 ms stddev_time=0.74 ms mem_used=940 MB\n",
+    "Nelem=16000 mean_time=31.74 ms stddev_time=0.80 ms mem_used=940 MB\n",
+    "Nelem=16320 mean_time=32.27 ms stddev_time=0.77 ms mem_used=940 MB\n",
+    "Nelem=16640 mean_time=33.07 ms stddev_time=1.08 ms mem_used=940 MB\n",
+    "Nelem=16960 mean_time=33.60 ms stddev_time=0.69 ms mem_used=940 MB\n",
+    "Nelem=17280 mean_time=34.43 ms stddev_time=0.64 ms mem_used=940 MB\n",
+    "Nelem=17600 mean_time=35.34 ms stddev_time=0.75 ms mem_used=940 MB\n",
+    "Nelem=17920 mean_time=35.84 ms stddev_time=0.68 ms mem_used=940 MB\n",
+    "Nelem=18240 mean_time=36.51 ms stddev_time=0.85 ms mem_used=940 MB\n",
+    "Nelem=18560 mean_time=37.23 ms stddev_time=0.87 ms mem_used=940 MB\n",
+    "Nelem=18880 mean_time=37.72 ms stddev_time=0.78 ms mem_used=940 MB\n",
+    "Nelem=19200 mean_time=38.33 ms stddev_time=0.87 ms mem_used=940 MB\n",
+    "Nelem=19520 mean_time=38.95 ms stddev_time=0.87 ms mem_used=940 MB\n",
+    "Nelem=19840 mean_time=39.73 ms stddev_time=0.74 ms mem_used=940 MB\n",
+    "Nelem=20160 mean_time=40.27 ms stddev_time=0.81 ms mem_used=940 MB\n",
+    "Nelem=20480 mean_time=40.86 ms stddev_time=0.74 ms mem_used=940 MB\n",
+    "Nelem=20800 mean_time=41.71 ms stddev_time=0.94 ms mem_used=940 MB\n",
+    "Nelem=21120 mean_time=42.35 ms stddev_time=1.38 ms mem_used=1209 MB\n",
+    "Nelem=21440 mean_time=42.91 ms stddev_time=1.18 ms mem_used=1209 MB\n",
+    "Nelem=21760 mean_time=43.40 ms stddev_time=0.98 ms mem_used=1184 MB\n",
+    "Nelem=22080 mean_time=44.43 ms stddev_time=1.04 ms mem_used=1184 MB\n",
+    "Nelem=22400 mean_time=45.22 ms stddev_time=1.02 ms mem_used=1184 MB\n",
+    "Nelem=22720 mean_time=45.57 ms stddev_time=0.94 ms mem_used=1184 MB\n",
+    "Nelem=23040 mean_time=46.21 ms stddev_time=0.86 ms mem_used=1184 MB\n",
+    "Nelem=23360 mean_time=46.85 ms stddev_time=0.95 ms mem_used=1184 MB\n",
+    "Nelem=23680 mean_time=47.52 ms stddev_time=1.57 ms mem_used=1184 MB\n",
+    "Nelem=24000 mean_time=48.31 ms stddev_time=0.74 ms mem_used=1184 MB\n",
+    "Nelem=24320 mean_time=48.92 ms stddev_time=0.75 ms mem_used=1184 MB\n",
+    "Nelem=24640 mean_time=49.70 ms stddev_time=0.92 ms mem_used=1184 MB\n",
+    "Nelem=24960 mean_time=50.26 ms stddev_time=0.93 ms mem_used=1184 MB\n",
+    "Nelem=25280 mean_time=50.98 ms stddev_time=0.89 ms mem_used=1184 MB\n",
     "\"\"\""
    ]
   },
@@ -682,12 +498,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.figure(figsize=(5,5))\n",
+    "plt.figure(figsize=(7, 7))\n",
     "ax = plt.axes()\n",
     "plt.hist(nelem, bins=np.linspace(2000,6000,100));\n",
     "plt.ylabel(\"Number of events / bin\")\n",
     "plt.xlabel(\"PFElements per event\")\n",
-    "cms_label()\n",
+    "cms_label(ax)\n",
     "sample_label(ax, physics_process=\"ttbar\")"
    ]
   },
@@ -699,6 +515,7 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize=(10, 3))\n",
+    "ax = plt.axes()\n",
     "plt.errorbar(time_x, time_y, yerr=time_y_err, marker=\".\", label=\"MLPF\")\n",
     "plt.axvline(np.mean(nelem)-np.std(nelem), color=\"black\", ls=\"--\", lw=1.0, label=r\"$t\\bar{t}$+PU Run 3\")\n",
     "plt.axvline(np.mean(nelem)+np.std(nelem), color=\"black\", ls=\"--\", lw=1.0)\n",
@@ -707,8 +524,10 @@
     "plt.ylim(0,100)\n",
     "plt.ylabel(\"Average runtime per event [ms]\")\n",
     "plt.xlabel(\"PFElements per event\")\n",
-    "plt.legend(frameon=False)\n",
-    "cms_label(x1=0.17, x2=0.8)\n",
+    "plt.legend(loc=4, frameon=False)\n",
+    "cms_label(ax, y=0.93, x1=0.07, x2=0.99)\n",
+    "plt.text(4000, 20, \"typical Run3 range\", rotation=90)\n",
+    "plt.text(6000, 70, \"Inference with ONNXRuntime in a single CPU thread,\\nsingle GPU stream on NVIDIA RTX2060S 8GB.\\nNot a production-like setup. Synthetic inputs.\")\n",
     "plt.savefig(\"runtime_scaling.pdf\", bbox_inches=\"tight\")\n",
     "plt.savefig(\"runtime_scaling.png\", bbox_inches=\"tight\", dpi=300)"
    ]
@@ -721,6 +540,7 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize=(10, 3))\n",
+    "ax = plt.axes()\n",
     "plt.plot(time_x, gpu_mem_use, marker=\".\", label=\"MLPF\")\n",
     "plt.axvline(np.mean(nelem)-np.std(nelem), color=\"black\", ls=\"--\", lw=1.0, label=r\"$t\\bar{t}$+PU Run 3\")\n",
     "plt.axvline(np.mean(nelem)+np.std(nelem), color=\"black\", ls=\"--\", lw=1.0)\n",
@@ -729,8 +549,10 @@
     "plt.ylim(0,3000)\n",
     "plt.ylabel(\"Maximum GPU memory used [MB]\")\n",
     "plt.xlabel(\"PFElements per event\")\n",
-    "plt.legend(frameon=False, loc=4)\n",
-    "cms_label(x1=0.17, x2=0.8)\n",
+    "plt.legend(loc=4, frameon=False)\n",
+    "cms_label(ax, y=0.93, x1=0.07, x2=0.99)\n",
+    "plt.text(4000, 500, \"typical Run3 range\", rotation=90)\n",
+    "plt.text(6000, 2100, \"Inference with ONNXRuntime in a single CPU thread,\\nsingle GPU stream on NVIDIA RTX2060S 8GB.\\nNot a production-like setup. Synthetic inputs.\")\n",
     "plt.savefig(\"memory_scaling.pdf\", bbox_inches=\"tight\")\n",
     "plt.savefig(\"memory_scaling.png\", bbox_inches=\"tight\", dpi=300)"
    ]
@@ -738,7 +560,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "777ba9f3",
+   "id": "83f1f32d",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/notebooks/old/benchmarks.ipynb b/notebooks/old/benchmarks.ipynb
deleted file mode 100644
index af935721b..000000000
--- a/notebooks/old/benchmarks.ipynb
+++ /dev/null
@@ -1,443 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numba\n",
-    "import pickle\n",
-    "import glob\n",
-    "import pandas as pd\n",
-    "from matplotlib.colors import LogNorm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_df(fl, name):\n",
-    "    bls = []\n",
-    "    for fi in fl:\n",
-    "        d = pickle.load(open(fi, \"rb\"))\n",
-    "        bls += [d[name]]\n",
-    "    return pd.DataFrame(bls)\n",
-    "\n",
-    "def text_in_box(mat, thresh):\n",
-    "    for i in range(len(mat)):\n",
-    "        for j in range(len(mat)):\n",
-    "            plt.text(i, j, \"{0:.3f}\".format(mat[i,j]), ha=\"center\", va=\"center\", color=\"white\" if mat[i, j] > thresh else \"black\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n",
-    "    plt.figure(figsize=(5,5))\n",
-    "    plt.scatter(df_blocks[\"num_blocks_true\"], df_blocks[\"num_blocks_pred\"], marker=\".\", label=\"Edge classifier\", alpha=0.5)\n",
-    "    plt.scatter(df_blocks_dummy[\"num_blocks_true\"], df_blocks_dummy[\"num_blocks_pred\"], marker=\"x\", label=\"PFBlockAlgo\", alpha=0.5)\n",
-    "    plt.scatter(df_blocks_clue[\"num_blocks_true\"], df_blocks_clue[\"num_blocks_pred\"], marker=\"^\", label=\"CLUE\", alpha=0.5)\n",
-    "    plt.scatter(df_blocks_gnn[\"num_blocks_true\"], df_blocks_gnn[\"num_blocks_pred\"], marker=\"^\", label=\"GNN\", alpha=0.5)\n",
-    "    plt.xlim(0,5000)\n",
-    "    plt.ylim(0,5000)\n",
-    "    plt.plot([0,5000], [0,5000], color=\"black\", lw=1, ls=\"--\")\n",
-    "    plt.xlabel(\"number of blocks (true)\")\n",
-    "    plt.ylabel(\"number of blocks (pred)\")\n",
-    "    plt.title(\"Number of blocks, {0}\".format(sample))\n",
-    "    plt.legend(frameon=False, loc=\"best\")\n",
-    "    plt.savefig(\"num_blocks_{0}.pdf\".format(sample), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n",
-    "    plt.figure(figsize=(5,5))\n",
-    "    plt.scatter(df_blocks[\"max_block_size_true\"], df_blocks[\"max_block_size_pred\"], marker=\".\", label=\"Edge classifier\", alpha=0.3)\n",
-    "    plt.scatter(df_blocks_dummy[\"max_block_size_true\"], df_blocks_dummy[\"max_block_size_pred\"], marker=\"x\", label=\"PFBlockAlgo\", alpha=0.3)\n",
-    "    plt.scatter(df_blocks_clue[\"max_block_size_true\"], df_blocks_clue[\"max_block_size_pred\"], marker=\"^\", label=\"CLUE\", alpha=0.3)\n",
-    "    plt.scatter(df_blocks_gnn[\"max_block_size_true\"], df_blocks_gnn[\"max_block_size_pred\"], marker=\"^\", label=\"GNN\", alpha=0.3)\n",
-    "    plt.xlim(0,3000)\n",
-    "    plt.ylim(0,3000)\n",
-    "    plt.plot([0,3000], [0,3000], color=\"black\", lw=1, ls=\"--\")\n",
-    "    plt.xlabel(\"maximum block size (true)\")\n",
-    "    plt.ylabel(\"maximum block size (pred)\")\n",
-    "    plt.title(\"Block finder model, {0}\".format(sample))\n",
-    "    plt.legend(frameon=False, loc=\"best\")\n",
-    "    plt.savefig(\"block_size_{0}.pdf\".format(sample), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n",
-    "    plt.figure(figsize=(5,5))\n",
-    "    plt.scatter(df_blocks[\"edge_precision\"], df_blocks[\"edge_recall\"], marker=\".\", alpha=0.5, label=\"Edge classifier\")\n",
-    "    plt.scatter(df_blocks_dummy[\"edge_precision\"], df_blocks_dummy[\"edge_recall\"], marker=\"x\", alpha=0.5, label=\"PFBlockAlgo\")\n",
-    "    plt.scatter(df_blocks_clue[\"edge_precision\"], df_blocks_clue[\"edge_recall\"], marker=\"^\", alpha=0.5, label=\"CLUE\")\n",
-    "    plt.scatter(df_blocks_gnn[\"edge_precision\"], df_blocks_gnn[\"edge_recall\"], marker=\"^\", alpha=0.5, label=\"GNN\")\n",
-    "\n",
-    "    plt.xlim(0,1.2)\n",
-    "    plt.ylim(0,1.2)\n",
-    "\n",
-    "    plt.xlabel(\"edge precision: TP / (TP + FP)\")\n",
-    "    plt.ylabel(\"edge recall: TP / (TP + FN)\")\n",
-    "    plt.title(\"Edge classification, {0}\".format(sample))\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.savefig(\"edge_precision_recall_{0}.pdf\".format(sample), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n",
-    "    plt.figure(figsize=(5,5))\n",
-    "    b = np.logspace(0.1, 4, 40)\n",
-    "    plt.hist(df_blocks[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"Edge classifier, m={0:.0f}\".format(np.mean(df_blocks[\"max_block_size_pred\"])));\n",
-    "    plt.hist(df_blocks_dummy[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"PFBlockAlgo, m={0:.0f}\".format(np.mean(df_blocks_dummy[\"max_block_size_pred\"])));\n",
-    "    plt.hist(df_blocks_clue[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"GLUE, m={0:.0f}\".format(np.mean(df_blocks_clue[\"max_block_size_pred\"])));\n",
-    "    plt.hist(df_blocks_gnn[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"GNN, m={0:.0f}\".format(np.mean(df_blocks_gnn[\"max_block_size_pred\"])));\n",
-    "    plt.hist(df_blocks[\"max_block_size_true\"], bins=b, histtype=\"step\", lw=2, label=\"True blocks, m={0:.0f}\".format(np.mean(df_blocks[\"max_block_size_true\"])));\n",
-    "    plt.xscale(\"log\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.title(\"Maximum block size, {0}\".format(sample))\n",
-    "    plt.savefig(\"max_block_size_{0}.pdf\".format(sample), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fl = glob.glob(\"../data/NuGun_run3/step3*.pkl\")\n",
-    "df_blocks = get_df(fl, \"blocks\")\n",
-    "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n",
-    "df_blocks_clue = get_df(fl, \"blocks_clue\")\n",
-    "\n",
-    "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n",
-    "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n",
-    "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n",
-    "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fl = glob.glob(\"../data/QCD_run3/step3*.pkl\")\n",
-    "df_blocks = get_df(fl, \"blocks\")\n",
-    "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n",
-    "df_blocks_clue = get_df(fl, \"blocks_clue\")\n",
-    "\n",
-    "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n",
-    "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n",
-    "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n",
-    "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fl = glob.glob(\"../data/TTbar_run3/step3*.pkl\")\n",
-    "df_blocks = get_df(fl, \"blocks\")\n",
-    "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n",
-    "df_blocks_clue = get_df(fl, \"blocks_clue\")\n",
-    "df_blocks_gnn = get_df(fl, \"blocks_gnn\")\n",
-    "\n",
-    "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n",
-    "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n",
-    "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n",
-    "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# b = np.linspace(0,1,100)\n",
-    "# plt.hist(df_blocks[\"adjusted_mutual_info_score\"], bins=b, label=\"Edge classifier\");\n",
-    "# plt.hist(df_blocks_dummy[\"adjusted_mutual_info_score\"], bins=b, label=\"PFBlockAlgo\");\n",
-    "# plt.xlabel(\"adjusted MI score\\n(higher is better)\")\n",
-    "# plt.legend(frameon=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_true_blocks = get_df(fl, \"cand_true_blocks\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_true_blocks.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(df_true_blocks[\"num_cands_true\"], df_true_blocks[\"num_cands_pred\"], marker=\".\")\n",
-    "plt.xlim(0,4000)\n",
-    "plt.ylim(0,4000)\n",
-    "plt.title(\"True blocks, true vs. predicted candidates\")\n",
-    "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n",
-    "\n",
-    "plt.xlabel(\"number of true candidates\")\n",
-    "plt.ylabel(\"number of predicted candidates\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "mat = df_true_blocks[\"ncand_confusion_matrix\"].sum()\n",
-    "mat = 100.0 * mat / np.sum(mat)\n",
-    "plt.imshow(mat, cmap=\"Blues\")\n",
-    "text_in_box(mat, 60)\n",
-    "plt.colorbar()\n",
-    "labels = range(4)\n",
-    "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.xlim(-0.5,3.5)\n",
-    "plt.ylim(-0.5,3.5)\n",
-    "plt.title(\"True blocks, true vs. predicted candidates\")\n",
-    "plt.xlabel(\"Number of true candidates\")\n",
-    "plt.ylabel(\"Number of predicted candidates\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "mat = df_true_blocks[\"pdgid_confusion_matrix\"].sum()\n",
-    "mat = 100.0 * mat / np.sum(mat)\n",
-    "plt.imshow(mat, cmap=\"Blues\")\n",
-    "text_in_box(mat, 20)\n",
-    "plt.colorbar()\n",
-    "labels = [-211, -13, 0, 1, 2, 13, 22, 130, 211]\n",
-    "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.xlim(-0.5,8.5)\n",
-    "plt.ylim(-0.5,8.5)\n",
-    "\n",
-    "plt.title(\"True blocks, true vs. predicted candidates (%)\")\n",
-    "plt.xlabel(\"pdgid of true candidates\")\n",
-    "plt.ylabel(\"pdgid of predicted candidates\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(0, 10, 20)\n",
-    "mat = df_true_blocks[\"pt_matrix\"].sum()\n",
-    "plt.title(\"True blocks, true vs. predicted candidates\")\n",
-    "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n",
-    "plt.colorbar()\n",
-    "\n",
-    "plt.xlabel(\"true candidate $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"predicted candidate $p_T$ [GeV]\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-6, 6, 20)\n",
-    "mat = df_true_blocks[\"eta_matrix\"].sum()\n",
-    "#mat = 100 * mat / np.sum(mat)\n",
-    "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n",
-    "plt.colorbar()\n",
-    "\n",
-    "plt.title(\"True blocks, true vs. predicted candidates\")\n",
-    "plt.xlabel(\"true candidate $\\eta$\")\n",
-    "plt.ylabel(\"predicted candidate $\\eta$\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mat = df_true_blocks[\"phi_matrix\"].sum()\n",
-    "bins = np.linspace(-4, 4, 20)\n",
-    "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n",
-    "plt.colorbar()\n",
-    "\n",
-    "plt.title(\"True blocks, true vs. predicted candidates\")\n",
-    "plt.xlabel(\"true candidate $\\phi$\")\n",
-    "plt.ylabel(\"predicted candidate $\\phi$\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_pred_blocks = get_df(fl, \"cand_pred_blocks\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_pred_blocks.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(df_pred_blocks[\"num_cands_true\"], df_pred_blocks[\"num_cands_pred\"], marker=\".\")\n",
-    "plt.xlim(0,4000)\n",
-    "plt.ylim(0,4000)\n",
-    "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n",
-    "\n",
-    "plt.title(\"True vs. predicted candidates\\nusing predicted blocks\")\n",
-    "\n",
-    "plt.xlabel(\"number of true blocks\")\n",
-    "plt.ylabel(\"number of predicted blocks\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(df_pred_blocks[\"num_cands_true\"], df_pred_blocks[\"num_cands_matched\"], marker=\".\")\n",
-    "\n",
-    "plt.xlim(0,4000)\n",
-    "plt.ylim(0,4000)\n",
-    "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n",
-    "\n",
-    "plt.title(\"True vs. predicted candidates\\nusing predicted blocks\")\n",
-    "\n",
-    "plt.xlabel(\"number of true candidates\")\n",
-    "plt.ylabel(\"number of matched candidates\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "mat = df_pred_blocks[\"pdgid_confusion_matrix\"].sum()\n",
-    "mat = 100.0 * mat / np.sum(mat)\n",
-    "plt.imshow(mat, cmap=\"Blues\")\n",
-    "text_in_box(mat, 20)\n",
-    "plt.colorbar()\n",
-    "labels = [-211, -13, 0, 1, 2, 13, 22, 130, 211]\n",
-    "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.xlim(-0.5,8.5)\n",
-    "plt.ylim(-0.5,8.5)\n",
-    "\n",
-    "plt.title(\"Predicted blocks, true vs. predicted candidates (matched) (%)\")\n",
-    "plt.xlabel(\"pdgid of true candidates\")\n",
-    "plt.ylabel(\"pdgid of predicted candidates\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mat = df_pred_blocks[\"pt_matrix\"].sum()\n",
-    "bins = np.linspace(0, 10, 20)\n",
-    "\n",
-    "#mat = 100 * mat / np.sum(mat)\n",
-    "plt.title(\"Predicted blocks, true vs. matched candidates\")\n",
-    "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n",
-    "plt.colorbar()\n",
-    "\n",
-    "plt.xlabel(\"true candidate $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"predicted candidate $p_T$ [GeV]\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/old/check_clustering.ipynb b/notebooks/old/check_clustering.ipynb
deleted file mode 100644
index 8380899b0..000000000
--- a/notebooks/old/check_clustering.ipynb
+++ /dev/null
@@ -1,418 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy\n",
-    "from matplotlib.colors import LogNorm\n",
-    "import networkx\n",
-    "import sklearn\n",
-    "import sklearn.cluster"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import matplotlib.pyplot as plt\n",
-    "import sys\n",
-    "\n",
-    "import numpy as np\n",
-    "import sklearn\n",
-    "import keras\n",
-    "\n",
-    "import os\n",
-    "os.chdir(\"..\")\n",
-    "\n",
-    "sys.path += [\"test\"]\n",
-    "from train_clustering import load_element_pairs, fill_target_matrix, fill_elem_pairs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "training_info = json.load(open(\"clustering.json\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.title(\"Edge classifier\")\n",
-    "plt.plot(training_info[\"loss\"])\n",
-    "plt.plot(training_info[\"val_loss\"])\n",
-    "plt.xlabel(\"epoch\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fn = \"data/TTbar/191009_155100/step3_AOD_{0}_ev{1}.npz\".format(1, 0)\n",
-    "data = np.load(fn)\n",
-    "els = data[\"elements\"]\n",
-    "els_blid = data[\"element_block_id\"]\n",
-    "\n",
-    "fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n",
-    "dm = scipy.sparse.load_npz(fi).todense()\n",
-    "\n",
-    "#Create the matrix of elements thar are connected according to the miniblock id\n",
-    "target_matrix = np.zeros((len(els_blid), len(els_blid)), dtype=np.int32)\n",
-    "fill_target_matrix(target_matrix, els_blid)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "g = networkx.from_numpy_matrix(dm)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "ax = plt.axes()\n",
-    "networkx.draw(\n",
-    "    g,\n",
-    "    pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],\n",
-    "    node_size=1, axes=ax, alpha=0.2, edge_color=\"grey\", edgelist=[])\n",
-    "plt.axis('on')\n",
-    "plt.xlabel(\"$\\eta$\")\n",
-    "plt.ylabel(\"$\\phi$\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "ax = plt.axes()\n",
-    "networkx.draw(\n",
-    "    g,\n",
-    "    pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],\n",
-    "    node_size=1, axes=ax, alpha=0.2, edge_color=\"grey\", edgelist=list(g.edges)[:200])\n",
-    "plt.axis('on')\n",
-    "plt.title(\"{0} elements,  200 / {1} edges\".format(len(g.edges), len(g.edges)))\n",
-    "plt.xlabel(\"$\\eta$\")\n",
-    "plt.ylabel(\"$\\phi$\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_elem_pairs_X = []\n",
-    "all_dms = []\n",
-    "all_blockids_true = [] \n",
-    "all_elem_pairs_y = []\n",
-    "\n",
-    "for i in range(500):\n",
-    "    for j in range(6,7):\n",
-    "        fn = \"data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz\".format(i, j)\n",
-    "        all_blockids_true += [np.load(open(fn, \"rb\"))[\"element_block_id\"]]\n",
-    "        \n",
-    "        print(\"Loading {0}\".format(fn))\n",
-    "        elem_pairs_X, elem_pairs_y = load_element_pairs(fn)\n",
-    "        all_elem_pairs_X += [elem_pairs_X]\n",
-    "        all_elem_pairs_y += [elem_pairs_y]\n",
-    "        \n",
-    "        fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n",
-    "        dm = scipy.sparse.load_npz(fi).todense()\n",
-    "        all_dms += [dm]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "elem_pairs_X = np.vstack(all_elem_pairs_X)\n",
-    "elem_pairs_y = np.vstack(all_elem_pairs_y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = keras.models.load_model(\"clustering.h5\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pp = model.predict(elem_pairs_X, batch_size=10000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.linear_model import LogisticRegression\n",
-    "t0 = 5\n",
-    "t1 = 1\n",
-    "sel = (elem_pairs_X[:, 0]==t0) & (elem_pairs_X[:, 1]==t1)\n",
-    "\n",
-    "dumdum = LogisticRegression(solver=\"lbfgs\")\n",
-    "dumdum.fit(elem_pairs_X[sel], elem_pairs_y[sel, 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "r = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], pp[sel])\n",
-    "r2 = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], 1.0 - elem_pairs_X[sel, 2])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "threshold = 0.5\n",
-    "idx = len(r2) - np.searchsorted(r[2][::-1], threshold)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(r[2], r[0], label=\"edge FPR\")\n",
-    "plt.plot(r[2], r[1], label=\"edge TPR\")\n",
-    "plt.xlim(0,1)\n",
-    "plt.legend()\n",
-    "plt.ylabel(\"rate\")\n",
-    "plt.xlabel(\"threshold\")\n",
-    "plt.title(\"Element {0}<->{1} linking\".format(t0, t1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.plot(r[0], r[1], label=\"simple feedforward DNN\")\n",
-    "plt.plot(r2[0], r2[1], color=\"black\", ls=\"--\", label=\"logistic regression\")\n",
-    "plt.title(\"Element {0}<->{1} linking\".format(t0, t1))\n",
-    "plt.xlabel(\"edge FPR\")\n",
-    "plt.ylabel(\"edge TPR\")\n",
-    "plt.xlim(0,1)\n",
-    "plt.ylim(0,1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pp"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "elem_pairs_X[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numba\n",
-    "@numba.njit\n",
-    "def pred_to_adj_matrix(nelems, pred, dm, adj_matrix):\n",
-    "    n = 0\n",
-    "    for i in range(nelems):\n",
-    "        for j in range(i+1, nelems):\n",
-    "            assert(n <= len(pred))\n",
-    "            if dm[i,j] > 0:\n",
-    "                adj_matrix[i,j] = pred[n, 0]\n",
-    "            n += 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def predict_clustering(fn):\n",
-    "    fi = open(fn, \"rb\")\n",
-    "    data = np.load(fi)\n",
-    "    els = data[\"elements\"]\n",
-    "    els_blid = data[\"element_block_id\"]\n",
-    "    nelem = len(els)\n",
-    "\n",
-    "    #Load the distance matrix\n",
-    "    fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n",
-    "    dm = scipy.sparse.load_npz(fi).todense()\n",
-    "  \n",
-    "    print(els.shape, dm.shape)\n",
-    "    return els, dm\n",
-    "#     elem_pairs_X, elem_pairs_y = load_element_pairs(fn)\n",
-    "#     pp = model.predict(elem_pairs_X)\n",
-    "    \n",
-    "#     adj_matrix = np.zeros((nelem, nelem), dtype=np.float32)\n",
-    "#     pred_to_adj_matrix(nelem, pp, dm, adj_matrix)\n",
-    "#     return adj_matrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "els, dm = predict_clustering('data/TTbar/191009_155100/step3_AOD_7_ev0.npz')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@numba.njit\n",
-    "def set_triu(i1, i2, vec, mat):\n",
-    "    for i in range(len(i1)):\n",
-    "        mat[i1[i], i2[i]] = vec[i]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i1, i2 = np.triu_indices(len(els))\n",
-    "dmv = np.array(dm[i1, i2])[0, :]\n",
-    "vec = np.vstack([els[i1, 0], els[i2, 0], dmv]).T\n",
-    "p2 = model.predict_proba(vec, batch_size=100000)\n",
-    "p2[dmv==0]=0\n",
-    "\n",
-    "mat = np.zeros((len(els), len(els)))\n",
-    "set_triu(i1, i2, p2[:, 0], mat)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "g1 = networkx.from_numpy_matrix(dm)\n",
-    "g2 = networkx.from_numpy_matrix(mat>0.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(list(networkx.connected_component_subgraphs(g1)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(list(networkx.connected_component_subgraphs(g2)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sklearn.metrics"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/check_regression.ipynb b/notebooks/old/check_regression.ipynb
deleted file mode 100644
index 17131032c..000000000
--- a/notebooks/old/check_regression.ipynb
+++ /dev/null
@@ -1,336 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import matplotlib.pyplot as plt\n",
-    "import sys\n",
-    "\n",
-    "import numpy as np\n",
-    "import sklearn\n",
-    "import keras\n",
-    "import pickle\n",
-    "\n",
-    "import os\n",
-    "os.chdir(\"..\")\n",
-    "\n",
-    "sys.path += [\"test\"]\n",
-    "from train_regression import get_unique_X_y\n",
-    "from predict_pf import set_pred_to_zero"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from matplotlib.colors import LogNorm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "training_info = json.load(open(\"regression.json\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(training_info[\"loss\"])\n",
-    "plt.plot(training_info[\"val_loss\"])\n",
-    "plt.xlabel(\"epochs\")\n",
-    "plt.ylim(0,0.1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_Xs = []\n",
-    "all_ys = []\n",
-    "for i in range(500):\n",
-    "    for j in range(6,7):\n",
-    "        fn = \"data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz\".format(i, j)\n",
-    "        print(\"Loading {0}\".format(fn))\n",
-    "        fi = open(fn, \"rb\")\n",
-    "        data = np.load(fi)\n",
-    "\n",
-    "        Xs, ys = get_unique_X_y(data[\"elements\"], data[\"element_block_id\"], data[\"candidates\"], data[\"candidate_block_id\"])\n",
-    "\n",
-    "        all_Xs += [Xs]\n",
-    "        all_ys += [ys]\n",
-    "all_Xs = np.vstack(all_Xs)\n",
-    "all_ys = np.vstack(all_ys)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_types = all_Xs[:, :, 0]\n",
-    "X_kin = all_Xs[:, :, 1:]\n",
-    "X_kin = X_kin.reshape((X_kin.shape[0], X_kin.shape[1]*X_kin.shape[2]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"preprocessing.pkl\", \"rb\") as fi:\n",
-    "    preproc = pickle.load(fi)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "enc_X = preproc[\"enc_X\"]\n",
-    "scaler_X = preproc[\"scaler_X\"]\n",
-    "enc_y = preproc[\"enc_y\"]\n",
-    "scaler_y = preproc[\"scaler_y\"]\n",
-    "num_onehot_y = 27"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trf = enc_X.transform(X_types)\n",
-    "X = np.hstack([trf, scaler_X.transform(X_kin)])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model2 = keras.models.load_model(\"regression.h5\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred2 = model2.predict(X, batch_size=100000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cand_types = enc_y.inverse_transform(pred2[:, :num_onehot_y])\n",
-    "ncand = (cand_types!=0).sum(axis=1)\n",
-    "ncand_true = (all_ys[:, :, 0]!=0).sum(axis=1)\n",
-    "\n",
-    "cand_momenta = scaler_y.inverse_transform(pred2[:, num_onehot_y:])\n",
-    "set_pred_to_zero(cand_momenta, ncand)\n",
-    "cand_momenta = cand_momenta.reshape((cand_momenta.shape[0], 3, 3))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def text_in_box(mat):\n",
-    "    for i in range(len(mat)):\n",
-    "        for j in range(len(mat)):\n",
-    "            plt.text(i, j, \"{0:.3f}\".format(mat[i,j]), ha=\"center\", va=\"center\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels = [0,1,2,3]\n",
-    "confusion_matrix_ncand = sklearn.metrics.confusion_matrix(ncand_true, ncand, labels=labels)\n",
-    "confusion_matrix_ncand = confusion_matrix_ncand / np.sum(confusion_matrix_ncand)\n",
-    "plt.imshow(confusion_matrix_ncand*100, norm=LogNorm(vmin=1e-9, vmax=100))\n",
-    "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.xlim(-1,4)\n",
-    "plt.ylim(-1,4)\n",
-    "plt.colorbar()\n",
-    "plt.xlabel(\"True ncand\")\n",
-    "plt.ylabel(\"Predicted ncand\")\n",
-    "text_in_box(confusion_matrix_ncand*100)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "confusion_matrix_ncand[2]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels = np.unique(all_ys[:, :, 0])\n",
-    "confusion_matrix = sklearn.metrics.confusion_matrix(all_ys[:, :, 0].flatten(), cand_types.flatten(), labels=labels)\n",
-    "confusion_matrix = confusion_matrix / np.sum(confusion_matrix)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.imshow(confusion_matrix*100, norm=LogNorm(vmin=1e-9, vmax=100))\n",
-    "plt.xlim(-1,9)\n",
-    "plt.ylim(-1,9)\n",
-    "plt.colorbar()\n",
-    "plt.xlabel(\"True pdgid\")\n",
-    "plt.ylabel(\"Predicted pdgid\")\n",
-    "text_in_box(confusion_matrix*100)\n",
-    "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n",
-    "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#all cases where there was a true particle and it was also reconstructed\n",
-    "msk = (all_ys[:, :, 0].flatten()!=0) & (cand_types.flatten() != 0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(all_ys[:, :, 1].flatten()[msk], cand_momenta[:, :, 0].flatten()[msk], alpha=0.2, marker=\".\")\n",
-    "plt.xlim(0,10)\n",
-    "plt.ylim(0,10)\n",
-    "plt.xlabel(\"True $p_T$\")\n",
-    "plt.ylabel(\"Reconstructed $p_T$\")\n",
-    "plt.title(\"Baseline PF block algo regression\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(0, 20, 40)\n",
-    "plt.hist(all_ys[:, :, 1].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"true\");\n",
-    "plt.hist(cand_momenta[:, :, 0].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"predicted\");\n",
-    "plt.yscale(\"log\")\n",
-    "plt.legend()\n",
-    "plt.xlabel(\"Candidate $p_T$ [GeV]\")\n",
-    "plt.title(\"Baseline PF block algo regression\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(all_ys[:, :, 2].flatten()[msk], cand_momenta[:, :, 1].flatten()[msk], alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-6,6)\n",
-    "plt.ylim(-6,6)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(-6, 6, 40)\n",
-    "plt.hist(all_ys[:, :, 2].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"true\");\n",
-    "plt.hist(cand_momenta[:, :, 1].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"predicted\");\n",
-    "plt.legend()\n",
-    "plt.xlabel(\"Candidate $p_T$ [GeV]\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(all_ys[:, :, 3].flatten()[msk], cand_momenta[:, :, 2].flatten()[msk], alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-4,4)\n",
-    "plt.ylim(-4,4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(-4, 4, 40)\n",
-    "plt.hist(all_ys[:, :, 3].flatten()[msk], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(cand_momenta[:, :, 2].flatten()[msk], bins=b, histtype=\"step\", lw=2);"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/data.ipynb b/notebooks/old/data.ipynb
deleted file mode 100644
index 6447e6ca0..000000000
--- a/notebooks/old/data.ipynb
+++ /dev/null
@@ -1,234 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pickle\n",
-    "import matplotlib.pyplot as plt\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import networkx as nx\n",
-    "import pandas as pd\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Run the following command within a CMSSW-aware environment to load the ROOT ntuple produced using `Validation/RecoParticleFlow/plugins/PFAnalysis.cc` and produce python-friendly data files:\n",
-    "\n",
-    "```bash\n",
-    "python test/postprocessing2.py --input data/TTbar_14TeV_TuneCUETP8M1_cfi/pfntuple_1.root \\\n",
-    "  --events-per-file 1 \\\n",
-    "  --save-full-graph \\\n",
-    "  --save-normalized-table \\\n",
-    "  --save-images\n",
-    "```\n",
-    "\n",
-    "The output will be stored in `data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/pfntuple_1_*.pkl`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!wget http://login-1.hep.caltech.edu/~jpata/particleflow/2020-04/TTbar_14TeV_TuneCUETP8M1_cfi/pickle/pfntuple_1_0.pkl"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_data = pickle.load(open(\"pfntuple_1_0.pkl\", \"rb\"), encoding='iso-8859-1')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "one_event = all_data[0]\n",
-    "one_event.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "graph = one_event[\"full_graph\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "graph"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_elements = [n for n in graph.nodes if n[0] == \"elem\"]\n",
-    "len(all_elements)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_gen = [n for n in graph.nodes if n[0] == \"tp\" or n[0] == \"sc\"]\n",
-    "len(all_gen)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_pfcand = [n for n in graph.nodes if n[0] == \"pfcand\"]\n",
-    "len(all_pfcand)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def draw_event(g):\n",
-    "    pos = {}\n",
-    "    for node in g.nodes:\n",
-    "        pos[node] = (g.nodes[node][\"eta\"], g.nodes[node][\"phi\"])\n",
-    "\n",
-    "    fig = plt.figure(figsize=(5,5))\n",
-    "\n",
-    "    nodes_to_draw = [n for n in g.nodes if n[0]==\"elem\"]\n",
-    "    nx.draw_networkx(g, pos=pos, with_labels=False, node_size=5, nodelist=nodes_to_draw, edgelist=[], node_color=\"red\", node_shape=\"s\", alpha=0.5)\n",
-    "\n",
-    "    nodes_to_draw = [n for n in g.nodes if n[0]==\"pfcand\"]\n",
-    "    nx.draw_networkx(g, pos=pos, with_labels=False, node_size=10, nodelist=nodes_to_draw, edgelist=[], node_color=\"green\", node_shape=\"x\", alpha=0.5)\n",
-    "\n",
-    "    nodes_to_draw = [n for n in g.nodes if (n[0]==\"sc\" or n[0]==\"tp\")]\n",
-    "    nx.draw_networkx(g, pos=pos, with_labels=False, node_size=1, nodelist=nodes_to_draw, edgelist=[], node_color=\"blue\", node_shape=\".\", alpha=0.5)\n",
-    "\n",
-    "    #draw edges between genparticles and elements\n",
-    "    edges_to_draw = [e for e in g.edges if e[0] in nodes_to_draw]\n",
-    "    nx.draw_networkx_edges(g, pos, edgelist=edges_to_draw, arrows=False, alpha=0.1)\n",
-    "\n",
-    "    plt.xlim(-6,6)\n",
-    "    plt.ylim(-4,4)\n",
-    "    plt.tight_layout()\n",
-    "    plt.axis(\"on\")\n",
-    "    plt.xlabel(\"$\\eta$\")\n",
-    "    plt.xlabel(\"$\\phi$\")\n",
-    "    return fig"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "draw_event(one_event[\"full_graph\"]);"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Normalized table"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(np.concatenate([one_event[\"Xelem\"], one_event[\"ygen\"], one_event[\"ycand\"]], axis=-1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#show some of the more important columns\n",
-    "df[[0,2,3,4,13,15,16,17,21,23,24,25]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "#Element to element input distance matrix - graph adjacency\n",
-    "plt.imshow(one_event[\"dm\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.imshow(one_event[\"dm_elem_cand\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.imshow(one_event[\"dm_elem_gen\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/old/endtoend_gnn.ipynb b/notebooks/old/endtoend_gnn.ipynb
deleted file mode 100644
index d87ecd338..000000000
--- a/notebooks/old/endtoend_gnn.ipynb
+++ /dev/null
@@ -1,718 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch_geometric\n",
-    "\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch_geometric.transforms as T\n",
-    "from torch_geometric.nn import EdgeConv, MessagePassing, EdgePooling\n",
-    "from torch.nn import Sequential as Seq, Linear as Lin, ReLU\n",
-    "from torch_scatter import scatter_mean\n",
-    "from torch_geometric.nn.inits import reset\n",
-    "from torch_geometric.data import Dataset, Data, DataLoader\n",
-    "\n",
-    "from glob import glob\n",
-    "import numpy as np\n",
-    "import os.path as osp\n",
-    "\n",
-    "import math\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numba\n",
-    "\n",
-    "@numba.njit\n",
-    "def regularize_X_y(X_elements, y_candidates, X_element_block_id, y_candidate_block_id):\n",
-    "    ret_x = np.zeros_like(X_elements)\n",
-    "    ret_x_id = np.zeros_like(X_element_block_id)\n",
-    "    ret_y = np.zeros((X_elements.shape[0], y_candidates.shape[1]))\n",
-    "    ret_y_id = np.zeros((X_elements.shape[0]), dtype=np.int64)\n",
-    "    \n",
-    "    idx = 0\n",
-    "    for cl in np.unique(X_element_block_id):\n",
-    "        m1 = X_element_block_id == cl\n",
-    "        m2 = y_candidate_block_id == cl\n",
-    "\n",
-    "        x = X_elements[m1]\n",
-    "        y = y_candidates[m2]\n",
-    "\n",
-    "        n = x.shape[0]\n",
-    "        ret_x[idx:idx+n] = x[:]\n",
-    "        ret_x_id[idx:idx+n] = cl\n",
-    "        ret_y[idx:idx+y.shape[0]] = y[:]\n",
-    "        ret_y_id[idx:idx+n] = cl\n",
-    "        \n",
-    "        idx += n\n",
-    "        \n",
-    "    return ret_x, ret_y, ret_x_id, ret_y_id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class PFGraphDataset(Dataset):\n",
-    "    def __init__(self, root, transform=None, pre_transform=None, connect_all=False, max_elements=None, max_candidates=None):\n",
-    "        self._connect_all = connect_all\n",
-    "        self._max_elements = max_elements\n",
-    "        self._max_candidates = max_candidates\n",
-    "        super(PFGraphDataset, self).__init__(root, transform, pre_transform)\n",
-    "        self.raw_dir = root\n",
-    "\n",
-    "    @property\n",
-    "    def raw_file_names(self):\n",
-    "        raw_list = glob(self.raw_dir+'/*ev*.npz')\n",
-    "        return sorted([l.replace(self.raw_dir,'.') for l in raw_list])\n",
-    "\n",
-    "    @property\n",
-    "    def processed_file_names(self):\n",
-    "        return ['data_{}.pt'.format(i) for i in range(len(self.raw_file_names))]\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return len(self.processed_file_names)\n",
-    "\n",
-    "    def download(self):\n",
-    "        # Download to `self.raw_dir`.\n",
-    "        pass\n",
-    "\n",
-    "    def process(self):\n",
-    "        feature_scale = np.array([1., 1., 1., 1., 1., 1., 1., 1.])\n",
-    "        i = 0\n",
-    "        \n",
-    "        for raw_file_name in self.raw_file_names:\n",
-    "            if i%100 == 0:\n",
-    "                print(i, len(self.raw_file_names))\n",
-    "            \n",
-    "            dist_file_name = raw_file_name.replace('ev','dist')\n",
-    "            #print(\"loading data from files: {0}, {1}\".format(osp.join(self.raw_dir, raw_file_name), osp.join(self.raw_dir, dist_file_name)))\n",
-    "            try:\n",
-    "                fi = np.load(osp.join(self.raw_dir, raw_file_name))\n",
-    "                fi_dist = np.load(osp.join(self.raw_dir, dist_file_name))\n",
-    "            except Exception as e:\n",
-    "                print(\"Could not open files: {0}, {1}\".format(osp.join(self.raw_dir, raw_file_name), osp.join(self.raw_dir, dist_file_name)))\n",
-    "                continue\n",
-    "            \n",
-    "            X_elements = fi['elements'][:self._max_elements]\n",
-    "            X_element_block_id = fi['element_block_id'][:self._max_elements]\n",
-    "            y_candidates = fi['candidates'][:self._max_candidates, 1:]\n",
-    "            y_candidate_block_id = fi['candidate_block_id'][:self._max_candidates]\n",
-    "            \n",
-    "            X_elements, y_candidates, X_element_block_id, y_candidate_block_id = regularize_X_y(\n",
-    "                X_elements, y_candidates, X_element_block_id, y_candidate_block_id)\n",
-    "            \n",
-    "            num_elements = X_elements.shape[0]\n",
-    "\n",
-    "            row_index = fi_dist['row']\n",
-    "            col_index = fi_dist['col']\n",
-    "            num_edges = row_index.shape[0]\n",
-    "\n",
-    "            edge_index = np.zeros((2, 2*num_edges))\n",
-    "            edge_index[0,:num_edges] = row_index\n",
-    "            edge_index[1,:num_edges] = col_index\n",
-    "            edge_index[0,num_edges:] = col_index\n",
-    "            edge_index[1,num_edges:] = row_index\n",
-    "            edge_index = torch.tensor(edge_index, dtype=torch.long)\n",
-    "\n",
-    "            edge_data = fi_dist['data']\n",
-    "            edge_attr = np.zeros((2*num_edges,1))\n",
-    "            edge_attr[:num_edges,0] = edge_data\n",
-    "            edge_attr[num_edges:,0] = edge_data\n",
-    "            edge_attr = torch.tensor(edge_attr, dtype=torch.float)\n",
-    "\n",
-    "            x = torch.tensor(X_elements/feature_scale, dtype=torch.float)\n",
-    "\n",
-    "            #y = [X_element_block_id[i]==X_element_block_id[j] for (i,j) in edge_index.t().contiguous()]\n",
-    "            y = torch.tensor(y_candidates, dtype=torch.float)\n",
-    "            \n",
-    "            data = Data(x=x, edge_index=edge_index, y=y, edge_attr=edge_attr)\n",
-    "            data.x_cluster_labels = torch.tensor(X_element_block_id, dtype=torch.float)\n",
-    "            data.y_cluster_labels = torch.tensor(y_candidate_block_id, dtype=torch.float)\n",
-    "#             data.y_cluster_labels = torch.nn.functional.pad(\n",
-    "#                 data.y_cluster_labels, (0, x.shape[0] - data.y_cluster_labels.shape[0]),\n",
-    "#                 value=-1)\n",
-    "\n",
-    "            if self.pre_filter is not None and not self.pre_filter(data):\n",
-    "                continue\n",
-    "            if self.pre_transform is not None:\n",
-    "                data = self.pre_transform(data)\n",
-    "\n",
-    "            torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(i)))\n",
-    "            i += 1\n",
-    "\n",
-    "    def get(self, idx):\n",
-    "        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(idx)))\n",
-    "        return data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#!mkdir ../data/TTBar_run3/processed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "full_dataset = PFGraphDataset(root='../data/TTbar_run3')\n",
-    "#full_dataset.process()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = full_dataset.get(0)\n",
-    "input_dim = data.x.shape[1]\n",
-    "edge_dim = data.edge_attr.shape[1]\n",
-    "\n",
-    "batch_size = 4\n",
-    "n_epochs = 50\n",
-    "lr = 1e-5\n",
-    "hidden_dim = 128\n",
-    "n_iters = 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class EdgeConvWithEdgeAttr(MessagePassing):\n",
-    "    def __init__(self, nn, aggr='max', **kwargs):\n",
-    "        super(EdgeConvWithEdgeAttr, self).__init__(aggr=aggr, **kwargs)\n",
-    "        self.nn = nn\n",
-    "        self.reset_parameters()\n",
-    "\n",
-    "    def reset_parameters(self):\n",
-    "        reset(self.nn)\n",
-    "\n",
-    "    def forward(self, x, edge_index, edge_attr):\n",
-    "        \"\"\"\"\"\"\n",
-    "        x = x.unsqueeze(-1) if x.dim() == 1 else x\n",
-    "        pseudo = edge_attr.unsqueeze(-1) if edge_attr.dim() == 1 else edge_attr\n",
-    "        return self.propagate(edge_index, x=x, pseudo=pseudo)\n",
-    "\n",
-    "    def message(self, x_i, x_j, pseudo):\n",
-    "        return self.nn(torch.cat([x_i, x_j - x_i, pseudo], dim=1))\n",
-    "\n",
-    "    def __repr__(self):\n",
-    "        return '{}(nn={})'.format(self.__class__.__name__, self.nn)\n",
-    "\n",
-    "class PFNet1(nn.Module):\n",
-    "    def __init__(self, input_dim=3, hidden_dim=32, edge_dim=1, output_dim=1, n_iters=1, aggr='add'):\n",
-    "        super(PFNet1, self).__init__()\n",
-    "        \n",
-    "        convnn = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(2*hidden_dim, hidden_dim),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(hidden_dim, hidden_dim),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(hidden_dim, hidden_dim),\n",
-    "                               nn.ReLU()\n",
-    "        )\n",
-    "        self.n_iters = n_iters\n",
-    "        \n",
-    "        self.batchnorm1 = nn.BatchNorm1d(input_dim)\n",
-    "        self.batchnorm2 = nn.BatchNorm1d(40)\n",
-    "\n",
-    "        self.inputnet =  nn.Sequential(\n",
-    "            nn.Linear(input_dim, hidden_dim),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(hidden_dim, hidden_dim),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(hidden_dim, hidden_dim),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(hidden_dim, hidden_dim),\n",
-    "            nn.ReLU(),\n",
-    "        )\n",
-    "\n",
-    "#         self.edgenetwork = nn.Sequential(nn.Linear(2*(hidden_dim+input_dim)+edge_dim,2*hidden_dim),\n",
-    "#                                          nn.ReLU(),\n",
-    "#                                          nn.Linear(2*hidden_dim, output_dim),\n",
-    "#                                          nn.Sigmoid())\n",
-    "\n",
-    "        self.nodenetwork = EdgeConvWithEdgeAttr(nn=convnn, aggr=aggr)\n",
-    "        \n",
-    "        self.pooling1 = EdgePooling(40, dropout=0.2)\n",
-    "        self.pooling2 = EdgePooling(40, dropout=0.2)\n",
-    "        self.pooling3 = EdgePooling(40, dropout=0.2)\n",
-    "        \n",
-    "        self.outnetwork = nn.Sequential(nn.Linear(40, 100),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(100, 100),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(100, 100),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(100, 100),\n",
-    "                               nn.ReLU(),\n",
-    "                               nn.Linear(100, 3),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, data):        \n",
-    "        X = self.batchnorm1(data.x)\n",
-    "        H = self.inputnet(X)\n",
-    "        x = torch.cat([H,X],dim=-1)\n",
-    "\n",
-    "        for i in range(self.n_iters):\n",
-    "            H = self.nodenetwork(x, data.edge_index, data.edge_attr)\n",
-    "            x = torch.cat([H,X],dim=-1)\n",
-    "\n",
-    "        #row,col = data.edge_index        \n",
-    "        #output = self.edgenetwork(torch.cat([x[row], x[col], data.edge_attr],dim=-1)).squeeze(-1)\n",
-    "\n",
-    "        pooled, edge_index, batch1, unpool_info1 = self.pooling1(x, data.edge_index, data.batch)\n",
-    "        pooled, edge_index, batch2, unpool_info2 = self.pooling2(pooled, edge_index, batch1)\n",
-    "        pooled, edge_index, batch3, unpool_info3 = self.pooling3(pooled, edge_index, batch2)\n",
-    "        \n",
-    "        r = self.outnetwork(self.batchnorm2(pooled))\n",
-    "        \n",
-    "        return r, unpool_info1.cluster, unpool_info2.cluster, unpool_info3.cluster\n",
-    "    \n",
-    "class PFNet2(nn.Module):\n",
-    "    def __init__(self, input_dim=3, hidden_dim=32, edge_dim=1, output_dim=1, n_iters=1, aggr='mean'):\n",
-    "        super(PFNet2, self).__init__()\n",
-    "        \n",
-    "        convnn = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n",
-    "                               nn.LeakyReLU(),\n",
-    "                               nn.Linear(2*hidden_dim, hidden_dim),\n",
-    "                               nn.LeakyReLU(),\n",
-    "                               nn.Linear(hidden_dim, hidden_dim),\n",
-    "                               nn.LeakyReLU(),\n",
-    "        )\n",
-    "        convnn2 = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n",
-    "                               nn.LeakyReLU(),\n",
-    "                               nn.Linear(2*hidden_dim, hidden_dim),\n",
-    "                               nn.LeakyReLU(),\n",
-    "                               nn.Linear(hidden_dim, 3),\n",
-    "        )\n",
-    "\n",
-    "        self.n_iters = n_iters\n",
-    "        \n",
-    "        self.batchnorm1 = nn.BatchNorm1d(input_dim)\n",
-    "        self.batchnorm2 = nn.BatchNorm1d(hidden_dim + input_dim)\n",
-    "\n",
-    "        self.inputnet =  nn.Sequential(\n",
-    "            nn.Linear(input_dim, hidden_dim),\n",
-    "            nn.LeakyReLU(),\n",
-    "            nn.Linear(hidden_dim, hidden_dim),\n",
-    "            nn.LeakyReLU(),\n",
-    "        )\n",
-    "\n",
-    "        self.nodenetwork = EdgeConvWithEdgeAttr(nn=convnn, aggr=aggr)\n",
-    "        self.nodenetwork2 = EdgeConvWithEdgeAttr(nn=convnn2, aggr=aggr)\n",
-    "        \n",
-    "#         self.outnetwork = nn.Sequential(nn.Linear(hidden_dim + input_dim, 64),\n",
-    "#                                nn.LeakyReLU(),\n",
-    "#                                nn.Linear(64, 32),\n",
-    "#                                nn.LeakyReLU(),\n",
-    "#                                nn.Linear(32, 16),\n",
-    "#                                nn.LeakyReLU(),\n",
-    "#                                nn.Linear(16, 3),\n",
-    "#         )\n",
-    "\n",
-    "    def forward(self, data):        \n",
-    "        X = self.batchnorm1(data.x)\n",
-    "        H = self.inputnet(X)\n",
-    "        x = torch.cat([H,X],dim=-1)\n",
-    "\n",
-    "        for i in range(self.n_iters):\n",
-    "            x = self.batchnorm2(x)\n",
-    "            H = self.nodenetwork(x, data.edge_index, data.edge_attr)\n",
-    "            x = torch.cat([H,X],dim=-1)\n",
-    "                \n",
-    "        #r = self.outnetwork(x)\n",
-    "        r = self.nodenetwork2(x, data.edge_index, data.edge_attr)\n",
-    "        \n",
-    "        return r"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numba\n",
-    "\n",
-    "@numba.njit\n",
-    "def reorder_predicted_target(predicted_y, target_y, x_cluster_labels, y_cluster_labels, p1, p2, p3):\n",
-    "    maxvals = max(predicted_y.shape[0], target_y.shape[0])\n",
-    "    chosen_pred = np.zeros(maxvals, dtype=np.int32)\n",
-    "    chosen_target = np.zeros(maxvals, dtype=np.int32)\n",
-    "    \n",
-    "    idx = 0\n",
-    "    for cl in np.unique(y_cluster_labels):\n",
-    "        m1 = y_cluster_labels == cl\n",
-    "        m2 = x_cluster_labels == cl\n",
-    "\n",
-    "        #get the predicted and target candidates that use elements from this block\n",
-    "        pred = p3[p2[p1[m2]]]\n",
-    "        tgt = np.where(m1)[0]\n",
-    "        n = min(pred.shape[0], tgt.shape[0])\n",
-    "\n",
-    "        chosen_pred[idx:idx+n] = pred[:n]\n",
-    "        chosen_target[idx:idx+n] = tgt[:n]\n",
-    "        \n",
-    "        idx += n\n",
-    "    return chosen_pred[:idx], chosen_target[:idx]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# train_loader = DataLoader(full_dataset, batch_size=1, pin_memory=True, shuffle=False)\n",
-    "# model = PFNet1(input_dim=input_dim, hidden_dim=hidden_dim, edge_dim=edge_dim, n_iters=n_iters).to(device)\n",
-    "# optimizer = torch.optim.Adam(model.parameters(), lr = lr)\n",
-    "# loss = torch.nn.MSELoss()\n",
-    "\n",
-    "# # print(model)\n",
-    "# # model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n",
-    "# # params = sum([np.prod(p.size()) for p in model_parameters])\n",
-    "# # print(params)\n",
-    "\n",
-    "# model.train()\n",
-    "\n",
-    "# losses = []\n",
-    "# corrs = []\n",
-    "# t0 = time.time()\n",
-    "\n",
-    "# for j in range(200):\n",
-    "#     losses_batch = []\n",
-    "#     corrs_batch = []\n",
-    "    \n",
-    "#     num_pred = []\n",
-    "#     num_true = []\n",
-    "#     for i, data in enumerate(train_loader):\n",
-    "#         if i>200:\n",
-    "#             break\n",
-    "#         data = data.to(device)\n",
-    "#         optimizer.zero_grad()\n",
-    "#         batch_target = data.y        \n",
-    "#         batch_output, pool_clusters1, pool_clusters2, pool_clusters3 = model(data)\n",
-    "        \n",
-    "#         #Find the correspondence between the predicted candidates and true candidates based\n",
-    "#         #on the block id of the input elements\n",
-    "#         chosen_pred, chosen_target = reorder_predicted_target(\n",
-    "#             batch_output.detach().cpu().numpy(),\n",
-    "#             data.y.detach().cpu().numpy(),\n",
-    "#             data.x_cluster_labels.detach().cpu().numpy(),\n",
-    "#             data.y_cluster_labels.detach().cpu().numpy(),\n",
-    "#             pool_clusters1.detach().cpu().numpy(),\n",
-    "#             pool_clusters2.detach().cpu().numpy(),\n",
-    "#             pool_clusters3.detach().cpu().numpy()\n",
-    "#         )\n",
-    "        \n",
-    "#         #Create arrays where each row corresponds to a matched true or predicted candidate\n",
-    "#         preds_cleaned = batch_output[torch.tensor(chosen_pred, dtype=torch.long)]\n",
-    "#         targets_cleaned = data.y[torch.tensor(chosen_target, dtype=torch.long)]\n",
-    "#         batch_loss = loss(\n",
-    "#             preds_cleaned,\n",
-    "#             targets_cleaned\n",
-    "#         )\n",
-    "        \n",
-    "#         batch_loss.backward()\n",
-    "#         batch_loss_item = batch_loss.item()\n",
-    "#         optimizer.step()\n",
-    "\n",
-    "#         corr_pt = np.corrcoef(\n",
-    "#             preds_cleaned[:, 0].detach().cpu().numpy(),\n",
-    "#             targets_cleaned[:, 0].detach().cpu().numpy())[0,1]\n",
-    "#         corrs_batch += [corr_pt]\n",
-    "#         losses_batch += [batch_loss_item]\n",
-    "    \n",
-    "#     l = np.mean(losses_batch)\n",
-    "#     losses += [l]\n",
-    "#     corrs += [np.mean(corrs_batch)]\n",
-    "#     t1 = time.time()\n",
-    "#     print(\"epoch={0}, dt={1:.1f}s, loss={2:.4f}, corr_pt={3:.4f}\".format(j, t1 - t0, losses[-1], corrs[-1]))\n",
-    "#     t0 = t1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_loader = DataLoader(full_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)\n",
-    "model = PFNet2(input_dim=input_dim, hidden_dim=hidden_dim, edge_dim=edge_dim, n_iters=n_iters).to(device)\n",
-    "optimizer = torch.optim.Adam(model.parameters(), lr = lr)\n",
-    "loss = torch.nn.MSELoss()\n",
-    "loss2 = torch.nn.BCELoss()\n",
-    "\n",
-    "print(model)\n",
-    "model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n",
-    "params = sum([np.prod(p.size()) for p in model_parameters])\n",
-    "print(\"params\", params)\n",
-    "\n",
-    "model.train()\n",
-    "\n",
-    "losses = []\n",
-    "corrs = []\n",
-    "t0 = time.time()\n",
-    "\n",
-    "for j in range(n_epochs):\n",
-    "    losses_batch = []\n",
-    "    corrs_batch = []\n",
-    "    \n",
-    "    num_pred = []\n",
-    "    num_true = []\n",
-    "    for i, data in enumerate(train_loader):\n",
-    "        if i>500:\n",
-    "            break\n",
-    "        data = data.to(device)\n",
-    "        optimizer.zero_grad()\n",
-    "        output = model(data)\n",
-    "        \n",
-    "        batch_loss = loss(\n",
-    "            output,\n",
-    "            data.y\n",
-    "        )\n",
-    "                  \n",
-    "        if i==0 and j%10==0:\n",
-    "            #print(is_pred.detach().cpu().numpy())\n",
-    "            #print((data.y[:, 0]!=0).to(dtype=torch.float).detach().cpu().numpy())\n",
-    "            print(output[:5].detach().cpu().numpy())\n",
-    "            print(data.y[:5].detach().cpu().numpy())\n",
-    "        \n",
-    "        batch_loss.backward()\n",
-    "        batch_loss_item = batch_loss.item()\n",
-    "        optimizer.step()\n",
-    "\n",
-    "        corr_pt = np.corrcoef(\n",
-    "            output[:, 0].detach().cpu().numpy(),\n",
-    "            data.y[:, 0].detach().cpu().numpy())[0,1]\n",
-    "        corrs_batch += [corr_pt]\n",
-    "        losses_batch += [batch_loss_item]\n",
-    "    \n",
-    "    l = np.mean(losses_batch)\n",
-    "    losses += [l]\n",
-    "    corrs += [np.mean(corrs_batch)]\n",
-    "    t1 = time.time()\n",
-    "    print(\"epoch={0}, dt={1:.1f}s, loss={2:.4f}, corr_pt={3:.4f}\".format(j, t1 - t0, losses[-1], corrs[-1]))\n",
-    "    t0 = t1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(losses)\n",
-    "plt.ylim(0.8,2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(corrs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d = data.to(device=device)\n",
-    "output = model(d)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cl = (data.x_cluster_labels == 0) & (data.batch == 0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.x[cl]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.y[cl]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "output[cl]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "msk = data.y[:, 0] != 0\n",
-    "plt.scatter(\n",
-    "    data.y[msk][:, 0].detach().cpu().numpy(),\n",
-    "    output[msk][:, 0].detach().cpu().numpy(),\n",
-    "    marker=\".\", alpha=0.5)\n",
-    "plt.plot([0,5],[0,5])\n",
-    "plt.xlim(0,5)\n",
-    "plt.ylim(0,5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(\n",
-    "    data.y[msk, 1].detach().cpu().numpy(),\n",
-    "    output[msk, 1].detach().cpu().numpy(),\n",
-    "    marker=\".\", alpha=0.5)\n",
-    "plt.plot([-5,5],[-5,5])\n",
-    "plt.xlim(-5,5)\n",
-    "plt.ylim(-5,5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.scatter(\n",
-    "    data.y[msk, 2].detach().cpu().numpy(),\n",
-    "    output[msk, 2].detach().cpu().numpy(),\n",
-    "    marker=\".\", alpha=0.5)\n",
-    "plt.plot([-5,5],[-5,5])\n",
-    "plt.xlim(-5,5)\n",
-    "plt.ylim(-5,5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(0,10,40)\n",
-    "plt.hist(data.y[msk, 0].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n",
-    "plt.hist(output[msk, 0].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(-5,5,40)\n",
-    "plt.hist(data.y[msk, 1].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n",
-    "plt.hist(output[msk, 1].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b = np.linspace(-5,5,40)\n",
-    "plt.hist(data.y[msk, 2].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n",
-    "plt.hist(output[msk, 2].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/genpf.ipynb b/notebooks/old/genpf.ipynb
deleted file mode 100644
index a9f9798fb..000000000
--- a/notebooks/old/genpf.ipynb
+++ /dev/null
@@ -1,1359 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import numba\n",
-    "import hepaccelerate.backend_cpu as ha\n",
-    "import uproot\n",
-    "import sklearn.metrics\n",
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def freqtable(arr, labels):\n",
-    "    ret = {}\n",
-    "    for l in labels:\n",
-    "        ret[l] = np.sum(arr==l)\n",
-    "    return ret"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_confusion_matrix(cm,\n",
-    "                          target_names,\n",
-    "                          title='Confusion matrix',\n",
-    "                          cmap=None,\n",
-    "                          normalize=True):\n",
-    "    \"\"\"\n",
-    "    given a sklearn confusion matrix (cm), make a nice plot\n",
-    "\n",
-    "    Arguments\n",
-    "    ---------\n",
-    "    cm:           confusion matrix from sklearn.metrics.confusion_matrix\n",
-    "\n",
-    "    target_names: given classification classes such as [0, 1, 2]\n",
-    "                  the class names, for example: ['high', 'medium', 'low']\n",
-    "\n",
-    "    title:        the text to display at the top of the matrix\n",
-    "\n",
-    "    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm\n",
-    "                  see http://matplotlib.org/examples/color/colormaps_reference.html\n",
-    "                  plt.get_cmap('jet') or plt.cm.Blues\n",
-    "\n",
-    "    normalize:    If False, plot the raw numbers\n",
-    "                  If True, plot the proportions\n",
-    "\n",
-    "    Usage\n",
-    "    -----\n",
-    "    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by\n",
-    "                                                              # sklearn.metrics.confusion_matrix\n",
-    "                          normalize    = True,                # show proportions\n",
-    "                          target_names = y_labels_vals,       # list of names of the classes\n",
-    "                          title        = best_estimator_name) # title of graph\n",
-    "\n",
-    "    Citiation\n",
-    "    ---------\n",
-    "    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n",
-    "\n",
-    "    \"\"\"\n",
-    "    import matplotlib.pyplot as plt\n",
-    "    import numpy as np\n",
-    "    import itertools\n",
-    "\n",
-    "    accuracy = np.trace(cm) / float(np.sum(cm))\n",
-    "    misclass = 1 - accuracy\n",
-    "\n",
-    "    if cmap is None:\n",
-    "        cmap = plt.get_cmap('Blues')\n",
-    "\n",
-    "    if normalize:\n",
-    "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
-    "    cm[np.isnan(cm)] = 0.0\n",
-    "\n",
-    "    plt.figure(figsize=(8, 6))\n",
-    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
-    "    plt.title(title)\n",
-    "    plt.colorbar()\n",
-    "\n",
-    "    if target_names is not None:\n",
-    "        tick_marks = np.arange(len(target_names))\n",
-    "        plt.xticks(tick_marks, target_names, rotation=45)\n",
-    "        plt.yticks(tick_marks, target_names)\n",
-    "\n",
-    "    thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
-    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
-    "        if normalize:\n",
-    "            plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n",
-    "                horizontalalignment=\"center\",\n",
-    "                color=\"white\" if cm[i, j] > thresh else \"black\",\n",
-    "                fontsize=8)\n",
-    "        else:\n",
-    "            plt.text(j, i, \"{:,}\".format(cm[i, j]),\n",
-    "                horizontalalignment=\"center\",\n",
-    "                color=\"white\" if cm[i, j] > thresh else \"black\",\n",
-    "                fontsize=8)\n",
-    "\n",
-    "\n",
-    "    plt.ylabel('True label')\n",
-    "    plt.xlim(-1, len(target_names))\n",
-    "    plt.ylim(-1, len(target_names))\n",
-    "    plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
-    "    plt.tight_layout()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t = uproot.open(\"../pfntuple_1.root\").get(\"ana/pftree\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "iev = 0\n",
-    "cand_pt = t.array('pfcandidate_pt')[iev]\n",
-    "cand_eta = t.array('pfcandidate_eta')[iev]\n",
-    "cand_phi = t.array('pfcandidate_phi')[iev]\n",
-    "cand_pid = t.array('pfcandidate_pdgid')[iev]\n",
-    "m = np.ones_like(cand_pt, dtype=np.bool)\n",
-    "\n",
-    "tp_pt = t.array('trackingparticle_pt')[iev]\n",
-    "tp_eta = t.array('trackingparticle_eta')[iev]\n",
-    "tp_phi = t.array('trackingparticle_phi')[iev]\n",
-    "tp_pid = t.array('trackingparticle_pid')[iev]\n",
-    "tp_bx = t.array('trackingparticle_bx')[iev]\n",
-    "\n",
-    "sc_pt = t.array('simcluster_pt')[iev]\n",
-    "sc_eta = t.array('simcluster_eta')[iev]\n",
-    "sc_phi = t.array('simcluster_phi')[iev]\n",
-    "sc_pid = t.array('simcluster_pid')[iev]\n",
-    "sc_bx = t.array('simcluster_bx')[iev]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t.array(\"trackingparticle_to_element\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t.array(\"simcluster_to_element\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "st_x = t.array(\"simtrack_x\")[iev]\n",
-    "st_y = t.array(\"simtrack_y\")[iev]\n",
-    "st_z = t.array(\"simtrack_z\")[iev]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rh_x = t.array(\"rechit_x\")[iev]\n",
-    "rh_y = t.array(\"rechit_y\")[iev]\n",
-    "rh_z = t.array(\"rechit_z\")[iev]\n",
-    "rh_eta = t.array(\"rechit_eta\")[iev]\n",
-    "rh_phi = t.array(\"rechit_phi\")[iev]\n",
-    "rh_e = t.array(\"rechit_e\")[iev]\n",
-    "rh_detid = t.array(\"rechit_detid\")[iev]\n",
-    "rh_det = t.array(\"rechit_det\")[iev]\n",
-    "rh_subdet = t.array(\"rechit_subdet\")[iev]\n",
-    "rh_idx_cluster = t.array(\"rechit_idx_cluster\")[iev]\n",
-    "\n",
-    "sh_x = t.array(\"simhit_x\")[iev]\n",
-    "sh_y = t.array(\"simhit_y\")[iev]\n",
-    "sh_z = t.array(\"simhit_z\")[iev]\n",
-    "sh_e = t.array(\"simhit_e\")[iev]\n",
-    "sh_eta = t.array(\"simhit_eta\")[iev]\n",
-    "sh_phi = t.array(\"simhit_phi\")[iev]\n",
-    "sh_det = t.array(\"simhit_det\")[iev]\n",
-    "sh_detid = t.array(\"simhit_detid\")[iev]\n",
-    "sh_subdet = t.array(\"simhit_subdet\")[iev]\n",
-    "sh_idx_cluster = t.array(\"simhit_idx_cluster\")[iev]\n",
-    "sh_idx_caloparticle = t.array(\"simhit_idx_caloparticle\")[iev]\n",
-    "\n",
-    "st_x = t.array(\"simtrack_x\")[iev]\n",
-    "st_y = t.array(\"simtrack_y\")[iev]\n",
-    "st_z = t.array(\"simtrack_z\")[iev]\n",
-    "st_idx_cluster = t.array(\"simtrack_idx_cluster\")[iev]\n",
-    "st_idx_caloparticle = t.array(\"simtrack_idx_caloparticle\")[iev]\n",
-    "st_pid = t.array(\"simtrack_pid\")[iev]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "candidx = np.nonzero(cand_pid == 130)[0][0]\n",
-    "icluster = np.nonzero(cl_idx_cand==candidx)[0][0]\n",
-    "\n",
-    "detids_r = rh_detid[rh_idx_cluster==icluster]\n",
-    "shids = []\n",
-    "for i in range(len(sh_detid)):\n",
-    "    s = sh_detid[i]\n",
-    "    if s in detids_r:\n",
-    "        #print(\"rh\", s)\n",
-    "        shids += [i]\n",
-    "shids = np.array(shids)\n",
-    "\n",
-    "plt.scatter(rh_eta[rh_idx_cluster == icluster], rh_phi[rh_idx_cluster == icluster], s=rh_e[rh_idx_cluster==icluster], alpha=0.2)\n",
-    "plt.scatter(sh_eta[shids], sh_phi[shids], alpha=0.2)\n",
-    "\n",
-    "plt.xlim(-8,8)\n",
-    "plt.ylim(-4,4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cl_idx_cp[icluster]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cand_pid[:100]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(sh_subdet[sh_det==4])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.scatter(sh_eta[sh_det==4], sh_subdet[sh_det==4])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(sh_det)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(sh_e[sh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype=\"step\", lw=2);\n",
-    "plt.hist(rh_e[rh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import plotly.graph_objects as go\n",
-    "\n",
-    "# fig = go.Figure()\n",
-    "\n",
-    "# m1 = (np.abs(sh_eta) < 1.0) & (np.abs(sh_phi) < 1.0) & (sh_e>0.2)\n",
-    "# m2 = (np.abs(rh_eta) < 1.0) & (np.abs(rh_phi) < 1.0) & (rh_e>0.2)\n",
-    "\n",
-    "# # Add traces\n",
-    "# fig.add_trace(go.Scatter(\n",
-    "#     x=sh_eta[m1], y=sh_phi[m1],\n",
-    "#     mode='markers',\n",
-    "#     name='SimHit',\n",
-    "#     hovertemplate='<b>%{text}</b>',\n",
-    "#     marker={\"size\": 0.1*sh_e[m1], \"symbol\": \"circle\"},\n",
-    "#     text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n",
-    "#     ))\n",
-    "\n",
-    "# fig.add_trace(go.Scatter(\n",
-    "#     x=rh_eta[m2], y=rh_phi[m2],\n",
-    "#     mode='markers',\n",
-    "#     name='RecHit',\n",
-    "#     hovertemplate='<b>%{text}</b>',\n",
-    "#     text = ['PFcluster {}'.format(rh_idx_cluster[m2][i]) for i in range(len(rh_eta[m2]))],\n",
-    "\n",
-    "# ))\n",
-    "\n",
-    "# fig.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.scatter(sh_eta, sh_e, marker=\".\", alpha=0.5)\n",
-    "plt.scatter(rh_eta, rh_e, marker=\".\", alpha=0.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-500, 500, 100)\n",
-    "plt.hist(sh_x, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.hist(rh_x, bins=bins, histtype=\"step\", lw=2, density=1.0);\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-500, 500, 100)\n",
-    "plt.hist(sh_y, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.hist(rh_y, bins=bins, histtype=\"step\", lw=2, density=1.0);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-1000, 1000, 100)\n",
-    "plt.hist(sh_z, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.hist(rh_z, bins=bins, histtype=\"step\", lw=2, density=1.0);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(0,100,100)\n",
-    "plt.hist(sh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.hist(rh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(0,10,100)\n",
-    "plt.hist(sh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.hist(rh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(20, 20))\n",
-    "m = (sh_e>0.5) & (sh_det == 3)\n",
-    "m2 = rh_det==3\n",
-    "plt.scatter(sh_eta[m], sh_phi[m], marker=\"o\", alpha=0.5, s=sh_e, color=\"blue\", label=\"simhits\")\n",
-    "plt.scatter(rh_eta[m2], rh_phi[m2], marker=\"x\", alpha=0.5, s=5*rh_e, color=\"red\", label=\"rechits\")\n",
-    "plt.xlim(-6,6)\n",
-    "plt.ylim(-4,4)\n",
-    "plt.xlabel(\"eta\")\n",
-    "plt.ylabel(\"phi\")\n",
-    "plt.legend()\n",
-    "plt.savefig(\"hits_ecal.png\", bbox_inches=\"tight\", dpi=100)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(20, 20))\n",
-    "m = (sh_e>0.5) & (sh_det==4)\n",
-    "m2 = rh_det==4\n",
-    "plt.scatter(sh_eta[m], sh_phi[m], marker=\"o\", alpha=0.5, s=sh_e, color=\"blue\", label=\"simhits\")\n",
-    "plt.scatter(rh_eta[m2], rh_phi[m2], marker=\"x\", alpha=0.5, s=5*rh_e, color=\"red\", label=\"rechits\")\n",
-    "plt.xlim(-6,6)\n",
-    "plt.ylim(-4,4)\n",
-    "plt.xlabel(\"eta\")\n",
-    "plt.ylabel(\"phi\")\n",
-    "plt.legend()\n",
-    "plt.savefig(\"hits_hcal.png\", bbox_inches=\"tight\", dpi=100)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "icaloparticle = 31\n",
-    "i = 1\n",
-    "\n",
-    "plt.figure(figsize=(20, 20))\n",
-    "\n",
-    "for icaloparticle in np.random.permutation(np.nonzero(cp_idx_cluster!=-1)[0])[:100]:\n",
-    "    ax = plt.subplot(10, 10, i)\n",
-    "    plt.title(str(cp_pid[icaloparticle]), y=0.94, va=\"top\")\n",
-    "    mask_simhit = (sh_idx_caloparticle==icaloparticle) & (sh_e > 0.5)\n",
-    "    mask_rechit = rh_idx_cluster==cp_idx_cluster[icaloparticle]\n",
-    "#     print(cp_idx_cluster[icaloparticle], cp_pid[icaloparticle])\n",
-    "#     print(st_pid[st_idx_caloparticle==icaloparticle])\n",
-    "#     print(sh_det[mask_simhit])\n",
-    "#     print(rh_det[mask_rechit])\n",
-    "#     print(np.mean(sh_e[mask_simhit]))\n",
-    "#     print(np.mean(rh_e[mask_rechit]))\n",
-    "\n",
-    "    mask_sh_ecal = sh_det == 3\n",
-    "    mask_sh_hcal = sh_det == 4\n",
-    "    mask_rh_ecal = rh_det == 3\n",
-    "    mask_rh_hcal = rh_det == 4\n",
-    "    plt.scatter(sh_eta[mask_simhit & mask_sh_ecal], sh_phi[mask_simhit & mask_sh_ecal], s=10, marker=\"s\", label=\"SH ECAL\", alpha=0.5, color=\"lightblue\")\n",
-    "    plt.scatter(sh_eta[mask_simhit & mask_sh_hcal], sh_phi[mask_simhit & mask_sh_hcal], s=10, marker=\"o\", label=\"SH HCAL\", alpha=0.5, color=\"red\")\n",
-    "    plt.scatter(rh_eta[mask_rechit & mask_rh_ecal], rh_phi[mask_rechit & mask_rh_ecal], s=20, marker=\"x\", label=\"RH ECAL\", alpha=0.5, color=\"purple\");\n",
-    "    plt.scatter(rh_eta[mask_rechit & mask_rh_hcal], rh_phi[mask_rechit & mask_rh_hcal], s=20, marker=\"+\", label=\"RH HCAL\", alpha=0.5, color=\"green\");\n",
-    "    if i==1:\n",
-    "        plt.legend(frameon=False, fontsize=8)\n",
-    "    plt.xlim(-8,8)\n",
-    "    plt.ylim(-4,4)\n",
-    "    #plt.xlabel(\"$\\eta$\")\n",
-    "    #plt.ylabel(\"$\\phi$\")\n",
-    "    plt.xticks()\n",
-    "    plt.yticks()\n",
-    "    #ax.set_yticklabels([])\n",
-    "    #ax.set_xticklabels([])\n",
-    "\n",
-    "    i += 1\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"hits.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins=np.linspace(-1500, 1500, 100)\n",
-    "plt.hist(rh_z, bins=bins, density=1.0, lw=2, histtype=\"step\");\n",
-    "plt.hist(sh_z[sh_x!=0], bins=bins, density=1.0, lw=2, histtype=\"step\");"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.log(rh_e[:100]+1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(rh_det)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import plotly.express as px\n",
-    "import plotly.graph_objects as go\n",
-    "\n",
-    "points_rechit_ecal = go.Scatter3d(\n",
-    "    x=rh_x[rh_det==3],\n",
-    "    z=rh_y[rh_det==3],\n",
-    "    y=rh_z[rh_det==3],\n",
-    "    mode=\"markers\",\n",
-    "    marker={\n",
-    "        \"symbol\": \"square\",\n",
-    "        \"opacity\": 0.5,\n",
-    "        \"size\": 3,\n",
-    "        \"color\": \"red\"\n",
-    "\n",
-    "    },\n",
-    "    name=\"ECAL rechits\"\n",
-    ")\n",
-    "\n",
-    "points_rechit_hcal = go.Scatter3d(\n",
-    "    x=rh_x[rh_det==4],\n",
-    "    z=rh_y[rh_det==4],\n",
-    "    y=rh_z[rh_det==4],\n",
-    "    mode=\"markers\",\n",
-    "    marker={\n",
-    "        \"symbol\": \"circle\",\n",
-    "        \"opacity\": 0.5,\n",
-    "        \"size\": 3,\n",
-    "        \"color\": \"orange\"\n",
-    "\n",
-    "    },\n",
-    "    name=\"HCAL rechits\"\n",
-    ")\n",
-    "\n",
-    "points_simtracks = go.Scatter3d(\n",
-    "    x=st_x[st_idx_caloparticle==icaloparticle],\n",
-    "    z=st_y[st_idx_caloparticle==icaloparticle],\n",
-    "    y=st_z[st_idx_caloparticle==icaloparticle],\n",
-    "    mode=\"markers\",\n",
-    "    marker={\n",
-    "        \"symbol\": \"x\",\n",
-    "        \"opacity\": 1.0,\n",
-    "        \"size\": 5,\n",
-    "        \"color\": \"green\"\n",
-    "\n",
-    "    },\n",
-    "    name=\"simtracks\"\n",
-    ")\n",
-    "\n",
-    "points_simhit_ecal = go.Scatter3d(\n",
-    "    x=sh_x[sh_det==3],\n",
-    "    z=sh_y[sh_det==3],\n",
-    "    y=sh_z[sh_det==3],\n",
-    "    mode=\"markers\",\n",
-    "    marker={\n",
-    "        \"symbol\": \"circle\",\n",
-    "        \"opacity\": 0.5,\n",
-    "        \"size\": 0.1*sh_e[sh_det==3],\n",
-    "        #\"size\": 1.0,\n",
-    "        \"color\": \"blue\"\n",
-    "    },\n",
-    "    name=\"ECAL simhits\",\n",
-    "    #hovertemplate='<b>%{text}</b>',\n",
-    "    #text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n",
-    "\n",
-    ")\n",
-    "\n",
-    "points_simhit_hcal = go.Scatter3d(\n",
-    "    x=sh_x[(sh_det==4) & (sh_x != 0)],\n",
-    "    z=sh_y[(sh_det==4) & (sh_x != 0)],\n",
-    "    y=sh_z[(sh_det==4) & (sh_x != 0)],\n",
-    "    mode=\"markers\",\n",
-    "    marker={\n",
-    "        \"symbol\": \"circle\",\n",
-    "        \"opacity\": 0.5,\n",
-    "        \"size\": 0.1*sh_e[(sh_det==4) & (sh_x != 0)],\n",
-    "        #\"size\": 1.0,\n",
-    "        \"color\": \"purple\"\n",
-    "    },\n",
-    "    name=\"HCAL simhits\",\n",
-    "    #hovertemplate='<b>%{text}</b>',\n",
-    "    #text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n",
-    "\n",
-    ")\n",
-    "\n",
-    "data=[\n",
-    "    points_rechit_ecal,\n",
-    "    points_rechit_hcal,\n",
-    "    points_simhit_ecal,\n",
-    "    points_simhit_hcal,\n",
-    "]\n",
-    "\n",
-    "fig = go.Figure(data=data)\n",
-    "\n",
-    "fig.update_layout(\n",
-    "    autosize=False,\n",
-    "    width=1000,\n",
-    "    height=1000,\n",
-    "    margin=go.layout.Margin(\n",
-    "        l=0,\n",
-    "        r=0,\n",
-    "        b=0,\n",
-    "        t=0,\n",
-    "    ),\n",
-    "    scene_camera={\n",
-    "        \"eye\": dict(x=0.8, y=0.8, z=0.8)\n",
-    "    }\n",
-    ")\n",
-    "\n",
-    "fig.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import networkx as nx"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "iev = 0\n",
-    "g = nx.DiGraph()\n",
-    "\n",
-    "cp_cl = t.array(\"caloparticle_idx_cluster\")[iev]\n",
-    "cp_pt = t.array(\"caloparticle_pt\")[iev]\n",
-    "cp_e = t.array(\"caloparticle_energy\")[iev]\n",
-    "cp_eta = t.array(\"caloparticle_eta\")[iev]\n",
-    "cp_phi = t.array(\"caloparticle_phi\")[iev]\n",
-    "cp_pid = t.array(\"caloparticle_pid\")[iev]\n",
-    "\n",
-    "cl_cnd = t.array(\"cluster_idx_candidate\")[iev]\n",
-    "cl_cp = t.array(\"cluster_idx_caloparticle\")[iev]\n",
-    "cl_e = t.array(\"cluster_energy\")[iev]\n",
-    "cl_eta = t.array(\"cluster_eta\")[iev]\n",
-    "cl_phi = t.array(\"cluster_phi\")[iev]\n",
-    "\n",
-    "tp_t = t.array(\"trackingparticle_idx_track\")[iev]\n",
-    "tp_pt = t.array(\"trackingparticle_pt\")[iev]\n",
-    "tp_eta = t.array(\"trackingparticle_eta\")[iev]\n",
-    "tp_phi = t.array(\"trackingparticle_phi\")[iev]\n",
-    "tp_pid = t.array(\"trackingparticle_pid\")[iev]\n",
-    "\n",
-    "t_cnd = t.array(\"track_idx_candidate\")[iev]\n",
-    "t_pt = t.array(\"track_pt\")[iev]\n",
-    "t_eta = t.array(\"track_eta\")[iev]\n",
-    "t_phi = t.array(\"track_phi\")[iev]\n",
-    "\n",
-    "c_pt = t.array(\"pfcandidate_pt\")[iev]\n",
-    "c_eta = t.array(\"pfcandidate_eta\")[iev]\n",
-    "c_phi = t.array(\"pfcandidate_phi\")[iev]\n",
-    "c_pid = t.array(\"pfcandidate_pdgid\")[iev]\n",
-    "\n",
-    "ncaloparticle = len(cp_cl)\n",
-    "ncluster = len(cl_cnd)\n",
-    "ncandidate = len(c_pt)\n",
-    "ntrackingparticle = len(tp_t)\n",
-    "ntrack = len(t_cnd)\n",
-    "\n",
-    "for i in range(ncaloparticle):\n",
-    "    g.add_node((\"caloparticle\", i))\n",
-    "    \n",
-    "for i in range(ncluster):\n",
-    "    g.add_node((\"cluster\", i))\n",
-    "    \n",
-    "for i in range(ncandidate):\n",
-    "    g.add_node((\"candidate\", i))\n",
-    "\n",
-    "for i in range(ntrackingparticle):\n",
-    "    g.add_node((\"trackingparticle\", i))\n",
-    "    \n",
-    "for i in range(ntrack):\n",
-    "    g.add_node((\"track\", i))\n",
-    "    \n",
-    "for i in range(ncaloparticle):\n",
-    "    ind_cluster = cp_cl[i]\n",
-    "    if ind_cluster != -1:\n",
-    "        g.add_edge((\"caloparticle\", i), (\"cluster\", ind_cluster))\n",
-    "        \n",
-    "for i in range(ncluster):\n",
-    "    ind_cand = cl_cnd[i]\n",
-    "    if ind_cand != -1:\n",
-    "        g.add_edge((\"cluster\", i), (\"candidate\", ind_cand))\n",
-    "    \n",
-    "    ind_cp = cl_cp[i]\n",
-    "    if ind_cp != -1:\n",
-    "        g.nodes[(\"caloparticle\", ind_cp)][\"state\"] = \"matched\"\n",
-    "        \n",
-    "for i in range(ntrackingparticle):\n",
-    "    ind_track = tp_t[i]\n",
-    "    if ind_track != -1:\n",
-    "        g.add_edge((\"trackingparticle\", i), (\"track\", ind_track))\n",
-    "\n",
-    "for i in range(ntrack):\n",
-    "    ind_cand = t_cnd[i]\n",
-    "    if ind_cand != -1:\n",
-    "        g.add_edge((\"track\", i), (\"candidate\", ind_cand))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sgs = [nx.subgraph(g, c) for c in nx.weakly_connected_components(g)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sgs_d = {}\n",
-    "for sg in sgs:\n",
-    "    cands = [n for n in sg.nodes if n[0] == \"candidate\"]\n",
-    "    if len(cands) == 1:\n",
-    "        pdgid = c_pid[cands[0][1]]\n",
-    "        if not (pdgid in sgs_d):\n",
-    "            sgs_d[pdgid] = []\n",
-    "        sgs_d[pdgid] += [sg]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from networkx.drawing.nx_agraph import write_dot, graphviz_layout"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "colors = {\n",
-    "    \"caloparticle\": \"red\",\n",
-    "    \"cluster\": \"lightblue\",\n",
-    "    \"candidate\": \"green\",\n",
-    "    \"trackingparticle\": \"pink\",\n",
-    "    \"track\": \"orange\"\n",
-    "}\n",
-    "\n",
-    "for pid in sgs_d.keys():\n",
-    "    ip = 0\n",
-    "    for sg in sgs_d[pid][:10]:\n",
-    "        labels = {}\n",
-    "        plt.figure(figsize=(max(3, len(sg.nodes)+1), 10))\n",
-    "        for n in sg.nodes:\n",
-    "            if n[0] == \"caloparticle\":\n",
-    "                d = sg.nodes[n]\n",
-    "                labels[n] = \"{}\\npt: {:.2f}\\ne: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n",
-    "                    \"CaloParticle\", cp_pt[n[1]], cp_e[n[1]], cp_eta[n[1]], cp_phi[n[1]], cp_pid[n[1]]\n",
-    "                ) + \"\\n\" + d.get(\"state\", \"\")\n",
-    "            elif n[0] == \"cluster\":\n",
-    "                labels[n] = \"{}\\ne: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\".format(\n",
-    "                    \"PFCluster\", cl_e[n[1]], cl_eta[n[1]], cl_phi[n[1]]\n",
-    "                )\n",
-    "            elif n[0] == \"trackingparticle\":\n",
-    "                labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n",
-    "                    \"TrackingParticle\", tp_pt[n[1]], tp_eta[n[1]], tp_phi[n[1]], tp_pid[n[1]]\n",
-    "                )\n",
-    "            elif n[0] == \"track\":\n",
-    "                labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\".format(\n",
-    "                    \"Track\", t_pt[n[1]], t_eta[n[1]], t_phi[n[1]]\n",
-    "                )\n",
-    "            elif n[0] == \"candidate\":\n",
-    "                labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n",
-    "                    \"PFCandidate\", c_pt[n[1]], c_eta[n[1]], c_phi[n[1]], c_pid[n[1]]\n",
-    "                )\n",
-    "            else:\n",
-    "                labels[n] = n\n",
-    "\n",
-    "        pos = graphviz_layout(sg, prog='dot')\n",
-    "        nx.draw(sg,\n",
-    "            pos=pos,\n",
-    "            with_labels=True,\n",
-    "            node_color=[colors[n[0]] for n in sg.nodes],\n",
-    "            labels=labels,\n",
-    "            edge_color=\"gray\"\n",
-    "        )\n",
-    "        plt.tight_layout()\n",
-    "        plt.savefig(\"graph_ev{}_pid{}_i{}.pdf\".format(iev, pid, ip))\n",
-    "        ip += 1\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tp_idx_track = t.array(\"trackingparticle_idx_track\")\n",
-    "tp_pt = t.array(\"trackingparticle_pt\")\n",
-    "tp_eta = t.array(\"trackingparticle_eta\")\n",
-    "tp_phi = t.array(\"trackingparticle_phi\")\n",
-    "tp_pid = t.array(\"trackingparticle_pid\")\n",
-    "\n",
-    "cp_idx_cluster = t.array(\"caloparticle_idx_cluster\")\n",
-    "cp_pt = t.array(\"caloparticle_pt\")\n",
-    "cp_e = t.array(\"caloparticle_energy\")\n",
-    "cp_eta = t.array(\"caloparticle_eta\")\n",
-    "cp_phi = t.array(\"caloparticle_phi\")\n",
-    "cp_pid = t.array(\"caloparticle_pid\")\n",
-    "\n",
-    "t_pt = t.array('track_pt')\n",
-    "t_eta = t.array('track_eta')\n",
-    "t_phi = t.array('track_phi')\n",
-    "t_idx_c = t.array('track_idx_candidate')\n",
-    "\n",
-    "cl_e = t.array('cluster_energy')\n",
-    "cl_eta = t.array('cluster_eta')\n",
-    "cl_phi = t.array('cluster_phi')\n",
-    "cl_idx_cp = t.array('cluster_idx_caloparticle')\n",
-    "cl_idx_c = t.array('cluster_idx_candidate')\n",
-    "\n",
-    "c_pt = t.array('pfcandidate_pt')\n",
-    "c_eta = t.array('pfcandidate_eta')\n",
-    "c_phi = t.array('pfcandidate_phi')\n",
-    "c_pid = t.array('pfcandidate_pdgid')\n",
-    "c_idx_tp = t.array('pfcandidate_idx_trackingparticle')\n",
-    "c_idx_cp = t.array('pfcandidate_idx_caloparticle')\n",
-    "\n",
-    "mc = c_idx_tp != -1\n",
-    "mc2 = (c_idx_tp == -1) & (c_idx_cp != -1)\n",
-    "mc3 = (c_idx_tp == -1) & (c_idx_cp == -1)\n",
-    "mtp = tp_idx_track != -1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])\n",
-    "c1, _ = np.histogram(tp_pt.flatten(), bins=bins)\n",
-    "c2, _ = np.histogram(tp_pt[tp_idx_track!=-1].flatten(), bins=bins)\n",
-    "ratio = c2/c1\n",
-    "errs = np.sqrt(c2)/c1\n",
-    "ratio[np.isnan(ratio)] = 0\n",
-    "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n",
-    "plt.ylabel(\"matching efficiency\\nTrackingParticle to Track\")\n",
-    "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")\n",
-    "plt.savefig(\"trackingparticle_matching_eff.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-3,3,100)\n",
-    "c1, _ = np.histogram(tp_eta.flatten(), bins=bins)\n",
-    "c2, _ = np.histogram(tp_eta[tp_idx_track!=-1].flatten(), bins=bins)\n",
-    "ratio = c2/c1\n",
-    "errs = np.sqrt(c2)/c1\n",
-    "ratio[np.isnan(ratio)] = 0\n",
-    "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n",
-    "plt.ylabel(\"matching efficiency\\nTrackingParticle to Track\")\n",
-    "plt.xlabel(\"TrackingParticle $\\eta$\")\n",
-    "plt.savefig(\"trackingparticle_matching_eff_eta.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(tp_pt.flatten(), bins=np.linspace(0,5,101));\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])\n",
-    "c1, _ = np.histogram(cp_e.flatten(), bins=bins)\n",
-    "c2, _ = np.histogram(cp_e[cp_idx_cluster!=-1].flatten(), bins=bins)\n",
-    "ratio = c2/c1\n",
-    "errs = np.sqrt(c2)/c1\n",
-    "ratio[np.isnan(ratio)] = 0\n",
-    "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n",
-    "plt.ylabel(\"matching efficiency\\nCaloParticle to Cluster\")\n",
-    "plt.xlabel(\"CaloParticle energy [GeV]\")\n",
-    "plt.savefig(\"caloparticle_matching_eff.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-6,6,100)\n",
-    "c1, _ = np.histogram(cp_eta.flatten(), bins=bins)\n",
-    "c2, _ = np.histogram(cp_eta[cp_idx_cluster!=-1].flatten(), bins=bins)\n",
-    "ratio = c2/c1\n",
-    "errs = np.sqrt(c2)/c1\n",
-    "ratio[np.isnan(ratio)] = 0\n",
-    "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n",
-    "plt.ylabel(\"matching efficiency\\nCaloParticle to Cluster\")\n",
-    "plt.xlabel(\"CaloParticle $\\eta$\")\n",
-    "plt.savefig(\"caloparticle_matching_eff_eta.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(cp_e.flatten(), bins=np.linspace(0,5,101));\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"CaloParticle energy [GeV]\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a = cp_idx_cluster[cp_idx_cluster != -1]\n",
-    "vs = sum([\n",
-    "    list(freqtable(_a, np.unique(_a)).values()) for _a in a\n",
-    "], [])\n",
-    "\n",
-    "plt.hist(vs, bins=np.linspace(0, 20, 21))\n",
-    "plt.xlabel(\"CaloParticles per Cluster\")\n",
-    "plt.ylabel(\"Number of Clusters\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.title(\"CaloParticles per Cluster: {:.2f}\".format(np.mean(vs)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a = cl_idx_c[cl_idx_c != -1]\n",
-    "vs1 = sum([\n",
-    "    list(freqtable(_a, np.unique(_a)).values()) for _a in a\n",
-    "], [])\n",
-    "\n",
-    "plt.hist(vs1, bins=np.linspace(0, 10, 11))\n",
-    "plt.xlabel(\"Cluster per Candidate\")\n",
-    "plt.ylabel(\"Number of Candidates\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.title(\"Cluster per Candidate: {:.2f}\".format(np.mean(vs1)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a = t_idx_c[t_idx_c != -1]\n",
-    "vs2 = sum([\n",
-    "    list(freqtable(_a, np.unique(_a)).values()) for _a in a\n",
-    "], [])\n",
-    "plt.hist(vs2, bins=np.linspace(0, 20, 11))\n",
-    "plt.xlabel(\"Tracks per Candidate\")\n",
-    "plt.ylabel(\"Number of Candidates\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.title(\"Tracks per Candidate: {:.2f}\".format(np.mean(vs2)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.mean(vs1), np.mean(vs2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(vs1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(vs2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(3*3, 2*3))\n",
-    "\n",
-    "plt.suptitle(\"GenParticle kinematics\", y=1.01)\n",
-    "plt.subplot(2, 3, 1)\n",
-    "plt.hist(tp_pt.flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"all TrackingParticles\", density=1);\n",
-    "plt.hist(tp_pt[tp_idx_track!=-1].flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"matched to reco track\", density=1);\n",
-    "plt.ylim(1e-3, 1000)\n",
-    "plt.yscale(\"log\")\n",
-    "plt.legend(frameon=False)\n",
-    "plt.xlabel(\"$p_T$ [GeV]\")\n",
-    "\n",
-    "plt.subplot(2, 3, 2)\n",
-    "plt.hist(tp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.hist(tp_eta[tp_idx_track!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"$\\eta$\")\n",
-    "\n",
-    "plt.subplot(2, 3, 3)\n",
-    "plt.hist(tp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.hist(tp_phi[tp_idx_track!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"$\\phi$\")\n",
-    "\n",
-    "plt.subplot(2, 3, 4)\n",
-    "plt.hist(cp_pt.flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"all CaloParticles\", density=1);\n",
-    "plt.hist(cp_pt[cp_idx_cluster!=-1].flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"matched to reco cluster\", density=1);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.ylim(1e-3, 100)\n",
-    "plt.legend(frameon=False)\n",
-    "plt.xlabel(\"$p_T$ [GeV]\")\n",
-    "\n",
-    "plt.subplot(2, 3, 5)\n",
-    "plt.hist(cp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.hist(cp_eta[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"$\\eta$\")\n",
-    "\n",
-    "plt.subplot(2, 3, 6)\n",
-    "plt.hist(cp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.hist(cp_phi[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"$\\phi$\")\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"genparticle_to_reco.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "inds = np.random.permutation(range(len(tp_pt[tp_idx_track!=-1].flatten())))[:1000]\n",
-    "\n",
-    "plt.figure(figsize=(3*3, 2*3))\n",
-    "plt.suptitle(\"GenParticle to Track or Cluster\", y=1.01)\n",
-    "plt.subplot(2,3,1)\n",
-    "plt.scatter(\n",
-    "    tp_pt[tp_idx_track!=-1].flatten()[inds],\n",
-    "    t_pt[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n",
-    "    alpha=0.2, marker=\".\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"TrackingParticle pT [GeV]\")\n",
-    "plt.ylabel(\"recoTrack pT [GeV]\")\n",
-    "plt.xlim(0.1, 100)\n",
-    "plt.ylim(0.1, 100)\n",
-    "\n",
-    "plt.subplot(2,3,2)\n",
-    "plt.scatter(\n",
-    "    tp_eta[tp_idx_track!=-1].flatten()[inds],\n",
-    "    t_eta[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n",
-    "    alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-4, 4)\n",
-    "plt.ylim(-4, 4)\n",
-    "plt.xlabel(\"TrackingParticle $\\eta$\")\n",
-    "plt.ylabel(\"recoTrack $\\eta$\")\n",
-    "\n",
-    "plt.subplot(2,3,3)\n",
-    "plt.scatter(\n",
-    "    tp_phi[tp_idx_track!=-1].flatten()[inds],\n",
-    "    t_phi[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n",
-    "    alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-4, 4)\n",
-    "plt.ylim(-4, 4)\n",
-    "plt.xlabel(\"TrackingParticle $\\phi$\")\n",
-    "plt.ylabel(\"recoTrack $\\phi$\")\n",
-    "\n",
-    "inds = np.random.permutation(range(len(cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten())))[:1000]\n",
-    "\n",
-    "plt.subplot(2,3,4)\n",
-    "plt.scatter(\n",
-    "    cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n",
-    "    cl_e[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"CaloParticle energy\")\n",
-    "plt.ylabel(\"PFCluster energy [GeV]\")\n",
-    "plt.xlim(0.1, 1000)\n",
-    "plt.ylim(0.1, 1000)\n",
-    "\n",
-    "plt.subplot(2,3,5)\n",
-    "plt.scatter(\n",
-    "    cp_eta[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n",
-    "    cl_eta[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-6, 6)\n",
-    "plt.ylim(-6, 6)\n",
-    "plt.xlabel(\"CaloParticle $\\eta$\")\n",
-    "plt.ylabel(\"PFCluster $\\eta$\")\n",
-    "\n",
-    "plt.subplot(2,3,6)\n",
-    "plt.scatter(\n",
-    "    cp_phi[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n",
-    "    cl_phi[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n",
-    "plt.xlim(-4, 4)\n",
-    "plt.ylim(-4, 4)\n",
-    "plt.xlabel(\"CaloParticle $\\phi$\")\n",
-    "plt.ylabel(\"PFCluster $\\phi$\")\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"genparticle_to_reco_scatter.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "unmatched_pids = c_pid[mc3].flatten()\n",
-    "matched_pids = c_pid[~mc3].flatten()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels = np.unique(c_pid.flatten())\n",
-    "f1 = freqtable(matched_pids, labels)\n",
-    "f2 = freqtable(unmatched_pids, labels)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "numtot = len(c_pid.flatten())\n",
-    "b1 = np.array([f1[l]/numtot for l in labels])\n",
-    "b2 = np.array([f2[l]/numtot for l in labels])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "xs = np.arange(len(b1))\n",
-    "plt.bar(xs, b1, label=\"matched PFCandidates\")\n",
-    "plt.bar(xs, b2, bottom=b1, label=\"unmatched PFCandidates\")\n",
-    "plt.xticks(xs, labels);\n",
-    "plt.legend(frameon=False)\n",
-    "plt.ylabel(\"fraction of PFCandidates\")\n",
-    "plt.xlabel(\"PFCandidate PDGID\")\n",
-    "plt.title(\"PFCandidate GenParticle matching efficiency\")\n",
-    "plt.savefig(\"candidate_matching_efficiency.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "labels=np.unique(c_pid.flatten())\n",
-    "cm = sklearn.metrics.confusion_matrix(cp_pid[c_idx_cp[mc2]].flatten(), c_pid[mc2].flatten(), labels=labels)\n",
-    "plot_confusion_matrix(cm, labels, normalize=False)\n",
-    "plt.title(\"CaloParticle to PFCandidate\")\n",
-    "plt.ylabel(\"CaloParticle PDGID\")\n",
-    "plt.xlabel(\"PFCandidate PDGID\")\n",
-    "plt.savefig(\"caloparticle_to_candidate_confusion.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cm = sklearn.metrics.confusion_matrix(tp_pid[c_idx_tp[mc]].flatten(), c_pid[mc].flatten(), labels=labels)\n",
-    "plot_confusion_matrix(cm, labels, normalize=False)\n",
-    "plt.title(\"TrackingParticle to PFCandidate\")\n",
-    "plt.ylabel(\"TrackingParticle PDGID\")\n",
-    "plt.xlabel(\"PFCandidate PDGID\")\n",
-    "plt.savefig(\"trackingparticle_to_pfcandidate_confusion.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "inds = np.random.permutation(range(len(tp_pt[c_idx_tp[mc]].flatten())))[:1000]\n",
-    "\n",
-    "plt.figure(figsize=(3*3, 2*3))\n",
-    "plt.suptitle(\"GenParticle to PFCandidate\", y=1.01)\n",
-    "plt.subplot(2,3,1)\n",
-    "plt.scatter(tp_pt[c_idx_tp[mc]].flatten()[inds], c_pt[mc].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"PFCand $p_T$ [GeV]\")\n",
-    "plt.xlim(0.1, 100)\n",
-    "plt.ylim(0.1, 100)\n",
-    "\n",
-    "plt.subplot(2,3,2)\n",
-    "plt.scatter(tp_eta[c_idx_tp[mc]].flatten()[inds], c_eta[mc].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xlabel(\"TrackingParticle $\\eta$\")\n",
-    "plt.ylabel(\"PFCand $\\eta$\")\n",
-    "plt.xlim(-5, 5)\n",
-    "plt.ylim(-5, 5)\n",
-    "\n",
-    "plt.subplot(2,3,3)\n",
-    "plt.scatter(tp_phi[c_idx_tp[mc]].flatten()[inds], c_phi[mc].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xlabel(\"TrackingParticle $\\phi$\")\n",
-    "plt.ylabel(\"PFCand $\\phi$\")\n",
-    "plt.xlim(-4, 4)\n",
-    "plt.ylim(-4, 4)\n",
-    "\n",
-    "inds = np.random.permutation(range(len(cp_pt[c_idx_cp[mc2]].flatten())))[:1000]\n",
-    "\n",
-    "plt.subplot(2,3,4)\n",
-    "plt.scatter(cp_pt[c_idx_cp[mc2]].flatten()[inds], c_pt[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"CaloParticle $p_T$ [GeV]\")\n",
-    "plt.ylabel(\"PFCandidate $p_T$ [GeV]\")\n",
-    "plt.xlim(0.1, 100)\n",
-    "plt.ylim(0.1, 100)\n",
-    "\n",
-    "plt.subplot(2,3,5)\n",
-    "plt.scatter(cp_eta[c_idx_cp[mc2]].flatten()[inds], c_eta[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xlabel(\"CaloParticle $\\eta$\")\n",
-    "plt.ylabel(\"PFCandidate $\\eta$\")\n",
-    "plt.xlim(-5, 5)\n",
-    "plt.ylim(-5, 5)\n",
-    "\n",
-    "plt.subplot(2,3,6)\n",
-    "plt.scatter(cp_phi[c_idx_cp[mc2]].flatten()[inds], c_phi[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n",
-    "plt.xlabel(\"CaloParticle $\\phi$\")\n",
-    "plt.ylabel(\"PFCandidate $\\phi$\")\n",
-    "plt.xlim(-4, 4)\n",
-    "plt.ylim(-4, 4)\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"genparticle_to_candidate_scatter.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "bins = np.linspace(-0.5, 0.5, 101)\n",
-    "plt.subplot(2,1,1)\n",
-    "\n",
-    "plt.title(\"TrackingParticle to Track\")\n",
-    "plt.hist(((tp_pt[mtp] - t_pt[tp_idx_track[mtp]])/tp_pt[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$p_T$\");\n",
-    "plt.hist(((tp_eta[mtp] - t_eta[tp_idx_track[mtp]])/tp_eta[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\eta$\");\n",
-    "plt.hist(((tp_phi[mtp] - t_phi[tp_idx_track[mtp]])/tp_phi[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\phi$\");\n",
-    "plt.yscale(\"log\")\n",
-    "plt.legend(frameon=False)\n",
-    "plt.xlabel(\"(gen - reco) / gen\")\n",
-    "\n",
-    "\n",
-    "plt.subplot(2,1,2)\n",
-    "plt.title(\"TrackingParticle to PFCandidate\")\n",
-    "plt.hist(((tp_pt[c_idx_tp[mc]] - c_pt[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$p_T$\");\n",
-    "plt.hist(((tp_eta[c_idx_tp[mc]] - c_eta[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\eta$\");\n",
-    "plt.hist(((tp_phi[c_idx_tp[mc]] - c_phi[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\phi$\");\n",
-    "plt.yscale(\"log\")\n",
-    "plt.legend(frameon=False)\n",
-    "plt.xlabel(\"(gen - reco) / gen\")\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"reco_resolutions.pdf\", bbox_inches=\"tight\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/python_ntuple.ipynb b/notebooks/old/python_ntuple.ipynb
deleted file mode 100644
index 98af593c9..000000000
--- a/notebooks/old/python_ntuple.ipynb
+++ /dev/null
@@ -1,695 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"-1\"    \n",
-    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
-    "\n",
-    "import numpy as np\n",
-    "import glob\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numba\n",
-    "from collections import Counter\n",
-    "import math\n",
-    "import sklearn\n",
-    "import sklearn.metrics\n",
-    "import sklearn.ensemble\n",
-    "import sklearn.cluster\n",
-    "import scipy.sparse\n",
-    "import keras\n",
-    "import sys\n",
-    "import pickle\n",
-    "import matplotlib\n",
-    "\n",
-    "sys.path += [\"../test\"]\n",
-    "from train_clustering import encode_triu, decode_triu\n",
-    "from train_regression import get_unique_X_y\n",
-    "from benchmark_solution import create_points\n",
-    "\n",
-    "from matplotlib.colors import LogNorm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@numba.njit\n",
-    "def get_types_in_block(X, y, blk):\n",
-    "    return [int(x) for x in sorted(X[y==blk, 0])]\n",
-    "\n",
-    "def get_blocksize_candsize_matrix(el_bl_id, cand_bl_id):\n",
-    "    blids = np.unique(el_bl_id)\n",
-    "    sizes = np.zeros((len(blids), 2), dtype=np.float32)\n",
-    "    i = 0\n",
-    "    els_counter = Counter(el_bl_id)\n",
-    "    cands_counter = Counter(cand_bl_id)\n",
-    "    for bl in blids:\n",
-    "        sizes[i, 0] = els_counter[bl]\n",
-    "        sizes[i, 1] = cands_counter[bl]\n",
-    "        i += 1\n",
-    "        \n",
-    "    b = np.linspace(0,20,21)\n",
-    "    c, _, _ = np.histogram2d(sizes[:, 0], sizes[:, 1], bins=(b, b))\n",
-    "    return c"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load all elements"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_sgs = []\n",
-    "\n",
-    "num_clusters = []\n",
-    "num_tracks = []\n",
-    "num_cands = []\n",
-    "num_blocks = []\n",
-    "\n",
-    "blsize_candsize_matrices = []\n",
-    "\n",
-    "for fi in glob.glob(\"../data/TTbar_run3/*ev*.npz\"):\n",
-    "    fi = open(fi, \"rb\")\n",
-    "    data = np.load(fi)\n",
-    "    \n",
-    "    #list of PF input elements in the event\n",
-    "    X = data[\"elements\"]\n",
-    "    \n",
-    "    #tracks have type=1\n",
-    "    num_clusters += [np.sum(X[:, 0] != 1)]\n",
-    "    num_tracks += [np.sum(X[:, 0] == 1)]\n",
-    "    \n",
-    "    #unique ID for each cluster/block of elements that the PFAlgo considered independently\n",
-    "    #this can be considered as the target output of an improved PFBlockAlgo\n",
-    "    y = data[\"element_block_id\"]\n",
-    "    num_blocks += [len(np.unique(y))]\n",
-    "\n",
-    "    #List of candidates produced in the event.\n",
-    "    #This can be considered as the output of PFAlgo\n",
-    "    cands = data[\"candidates\"]\n",
-    "    num_cands += [len(cands)]\n",
-    "\n",
-    "    #get the types of the elements for each cluster/block\n",
-    "    sgs = [tuple(get_types_in_block(X, y, blk)) for blk in np.unique(y)]\n",
-    "    all_sgs += sgs\n",
-    "    \n",
-    "    blsize_candsize_matrices += [get_blocksize_candsize_matrix(data[\"element_block_id\"], data[\"candidate_block_id\"])]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(0,20,21)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6,6))\n",
-    "cmat = sum(blsize_candsize_matrices)\n",
-    "plt.imshow(cmat, norm=LogNorm(vmin=1, vmax=10*np.sum(cmat)), origin=\"lower\", interpolation=None)\n",
-    "\n",
-    "plt.colorbar()\n",
-    "plt.xticks(bins);\n",
-    "plt.yticks(bins);\n",
-    "\n",
-    "plt.title(\"Miniblock size to number of\\nproduced PFCandidates\")\n",
-    "plt.xlabel(\"number of candidates\")\n",
-    "plt.ylabel(\"number of elements in block\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(bins[:-1], cmat.sum(axis=1).cumsum()/np.sum(cmat), marker=\"o\")\n",
-    "plt.xticks(bins);\n",
-    "plt.xlabel(\"maximum block size\")\n",
-    "plt.ylabel(\"fraction of candidates\")\n",
-    "plt.xlim(0,3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(bins[:-1], cmat.sum(axis=1).cumsum()/np.sum(cmat), marker=\"o\")\n",
-    "plt.xticks(bins);\n",
-    "plt.xlabel(\"maximum block size\")\n",
-    "plt.ylabel(\"fraction of candidates\")\n",
-    "plt.ylim(0.9, 1.0)\n",
-    "plt.xlim(2,20)\n",
-    "plt.savefig(\"cand_blocksize.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(num_clusters, bins=np.linspace(0, 5000, 100), label=\"clusters\", histtype=\"step\", lw=2);\n",
-    "plt.hist(num_tracks, bins=np.linspace(0, 5000, 100), label=\"tracks\", histtype=\"step\", lw=2);\n",
-    "plt.hist(num_blocks, bins=np.linspace(0, 5000, 100), label=\"blocks\", histtype=\"step\", lw=2);\n",
-    "plt.hist(num_cands, bins=np.linspace(0, 5000, 100), label=\"candidates\", histtype=\"step\", lw=2);\n",
-    "plt.legend(frameon=False)\n",
-    "plt.xlabel(\"number of els/cands/blocks\")\n",
-    "plt.ylabel(\"number of events\")\n",
-    "plt.savefig(\"num_elems.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we look at the number of blocks of a certain size."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "block_sizes = Counter([len(sg) for sg in all_sgs])\n",
-    "print(\"block sizes\", block_sizes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist([len(sg) for sg in all_sgs], bins=np.linspace(0,100,101));\n",
-    "plt.xlabel(\"block size\")\n",
-    "plt.ylabel(\"Number of blocks\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist([len(sg) for sg in all_sgs], bins=np.linspace(0,100,101), histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")\n",
-    "plt.xlabel(\"block size\")\n",
-    "plt.ylabel(\"number of blocks\")\n",
-    "plt.savefig(\"block_sizes.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's look at what the blocks f size, 1, 2, 3 and 4 are made of."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_block_nelem(blocks_nelem):\n",
-    "    kv = list(blocks_nelem.items())\n",
-    "    xs = np.arange(len(kv))\n",
-    "    ys = np.array([v for k, v in kv])\n",
-    "\n",
-    "    plt.bar(xs, ys)\n",
-    "    plt.xticks(xs, [k for k, v in kv], rotation=90)\n",
-    "    \n",
-    "\n",
-    "for blocksize in range(1,5):\n",
-    "    sizes = [\",\".join(map(str, sg)) for sg in all_sgs if len(sg)==blocksize]\n",
-    "    blocks_nelem = Counter(sizes)\n",
-    "    print(\"{0}-element blocks\".format(blocksize), blocks_nelem)\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    plt.title(\"Blocks of size {0}: {1} ({2:.0f}%)\".format(blocksize, len(sizes), 100.0*len(sizes)/len(all_sgs)))\n",
-    "    plot_block_nelem(blocks_nelem)\n",
-    "    plt.xlabel(\"Block element types\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Look at the first 10 blocks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fi = open(\"../data/TTbar_run3/step3_ntuple_10_ev39.npz\", \"rb\")\n",
-    "data = np.load(fi)\n",
-    "\n",
-    "dm = scipy.sparse.load_npz(open(\"../data/TTbar_run3/step3_ntuple_10_dist39.npz\", \"rb\")).todense()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "largest_blocks = sorted(Counter(data[\"element_block_id\"]).items(), key=lambda x: x[1], reverse=True)\n",
-    "largest_blocks[:10]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "block_ids = data[\"element_block_id\"]\n",
-    "inds_elem = np.arange(len(data[\"elements\"]))\n",
-    "inds_cand = np.arange(len(data[\"candidates\"]))\n",
-    "for blk, blksize in largest_blocks[:10]:\n",
-    "    candidates_from_block = data[\"candidate_block_id\"] == blk\n",
-    "    elems_in_block = data[\"element_block_id\"] == blk\n",
-    "    tps = get_types_in_block(data[\"elements\"], data[\"element_block_id\"], blk)\n",
-    "    print(\"in block\", blk, \"had the following elements: {0}\".format(Counter(tps)))\n",
-    "    for ielem in inds_elem[elems_in_block]:\n",
-    "        print(\"  elements[{0}]: type={1} energy={2:.2f}\".format(ielem, int(data[\"elements\"][ielem, 0]), data[\"elements\"][ielem, 1]))\n",
-    "    print(\"from which the following {0} candidates were produced\".format(len(inds_cand[candidates_from_block])))\n",
-    "    for icand in inds_cand[candidates_from_block]:\n",
-    "        print(\"  candidates[{0}]: pdgid={1} pt={2:.2f}\".format(icand, int(data[\"candidates\"][icand, 0]), data[\"candidates\"][icand, 1]))\n",
-    "    print()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Scratchpad"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# NONE=0,\n",
-    "# TRACK=1, \n",
-    "# PS1=2, \n",
-    "# PS2=3, \n",
-    "# ECAL=4, \n",
-    "# HCAL=5,\n",
-    "# GSF=6,\n",
-    "# BREM=7,\n",
-    "# HFEM=8,\n",
-    "# HFHAD=9,\n",
-    "# SC=10,\n",
-    "# HO=11"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import networkx as nx\n",
-    "import pandas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def make_df(points_data, points_pos, points_to_elem, elems_block_id):\n",
-    "    df = pandas.DataFrame(points_data.copy(),\n",
-    "        columns=[\"id\", \"type\", \"layer\"],\n",
-    "        index=points_data[:, 0])\n",
-    "\n",
-    "    df[\"block_id\"] = [elems_block_id[points_to_elem[ip]] for ip in range(len(df))]\n",
-    "    df[\"pos_eta\"] = np.array(points_pos[:, 0])\n",
-    "    df[\"pos_phi\"] = np.array(points_pos[:, 1])\n",
-    "    df[\"energy\"] = np.array(points_pos[:, 2])\n",
-    "    df[\"size\"] = 1\n",
-    "    df[\"symbol\"] = \"dot\"\n",
-    "    df[\"color\"] = df[\"type\"]\n",
-    "    df[\"layer\"] = 1 + 2*df[\"layer\"]\n",
-    "\n",
-    "    df[\"pos_x\"] = 2*df[\"pos_eta\"]\n",
-    "    df[\"pos_y\"] = df[\"layer\"]*np.cos(df[\"pos_phi\"])\n",
-    "    df[\"pos_z\"] = df[\"layer\"]*np.sin(df[\"pos_phi\"])\n",
-    "\n",
-    "    df.loc[df[\"type\"]==1, \"size\"] = 0.2\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import itertools\n",
-    "def color_largest_blocks(block_ids, highlight_blocks):\n",
-    "    colors = []\n",
-    "    cols_to_take = itertools.cycle([\"red\", \"green\", \"blue\", \"orange\", \"purple\", \"cyan\", \"yellow\", \"brown\"])\n",
-    "    colmap = {t: next(cols_to_take) for t in highlight_blocks}\n",
-    "    for i in block_ids:\n",
-    "        if i in highlight_blocks:\n",
-    "            colors.append(colmap[i])\n",
-    "        else:\n",
-    "            colors.append(\"gray\")\n",
-    "    return colors\n",
-    "\n",
-    "\n",
-    "def cluster_pfblockalgo(Nelem, distance_matrix):\n",
-    "    dm2 = distance_matrix.copy()\n",
-    "    dm2[dm2>0] = 1\n",
-    "    g = nx.from_numpy_matrix(dm2)\n",
-    "\n",
-    "    block_id_aspf = np.zeros((Nelem, ), dtype=np.int32)\n",
-    "    for ibl, conn in enumerate(nx.connected_components(g)):\n",
-    "        block_id_aspf[np.array(list(conn), dtype=np.int32)] = ibl\n",
-    "    return block_id_aspf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import plotly.express as px\n",
-    "import plotly.graph_objects as go\n",
-    "import colorlover as cl\n",
-    "\n",
-    "def draw_plot(dfsel, highlight_blocks, point_to_point_link, title, layers_to_plot=[1,3,5,7], do_tracks=True):\n",
-    "    \n",
-    "    \n",
-    "    msk_blocks = np.vstack([dfsel[\"block_id\"] == b for b in highlight_blocks]).sum(axis=0)>=1\n",
-    "    msk_layers = np.vstack([dfsel[\"layer\"] == b for b in layers_to_plot]).sum(axis=0)>=1\n",
-    "    \n",
-    "    trk = (dfsel[\"type\"]==1) | (dfsel[\"type\"]==6)\n",
-    "\n",
-    "    points_trk_blk = go.Scatter3d(\n",
-    "        x=dfsel.loc[trk & msk_blocks & msk_layers, 'pos_x'].values,\n",
-    "        y=dfsel.loc[trk&msk_blocks & msk_layers, 'pos_y'].values,\n",
-    "        z=dfsel.loc[trk&msk_blocks & msk_layers, 'pos_z'].values,\n",
-    "        mode=\"markers\",\n",
-    "        marker={\n",
-    "            \"symbol\": \"cross\",\n",
-    "            \"opacity\": 0.8,\n",
-    "            \"size\": 5,\n",
-    "            \"color\": color_largest_blocks(dfsel.loc[trk&msk_blocks&msk_layers, \"block_id\"], highlight_blocks),\n",
-    "            #\"colorscale\": cl.scales['11']['qual'][\"Set3\"]\n",
-    "        },\n",
-    "        name=\"track point in block\"\n",
-    "    )\n",
-    "\n",
-    "    points_trk = go.Scatter3d(\n",
-    "        x=dfsel.loc[trk & ~msk_blocks, 'pos_x'].values,\n",
-    "        y=dfsel.loc[trk & ~msk_blocks, 'pos_y'].values,\n",
-    "        z=dfsel.loc[trk & ~msk_blocks, 'pos_z'].values,\n",
-    "        mode=\"markers\",\n",
-    "        marker={\n",
-    "            \"symbol\": \"cross\",\n",
-    "            \"opacity\": 0.05,\n",
-    "            \"size\": 5,\n",
-    "            \"color\": \"gray\"\n",
-    "            #\"colorscale\": cl.scales['11']['qual'][\"Set3\"]\n",
-    "        },\n",
-    "        name=\"track point\"\n",
-    "    )\n",
-    "\n",
-    "    points_other_blk = go.Scatter3d(\n",
-    "        x=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_x'].values,\n",
-    "        y=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_y'].values,\n",
-    "        z=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_z'].values,\n",
-    "        mode=\"markers\",\n",
-    "        marker={\n",
-    "            \"symbol\": \"circle\",\n",
-    "            \"opacity\": 0.8,\n",
-    "            \"size\": 5,\n",
-    "            \"color\": color_largest_blocks(dfsel.loc[~trk&msk_blocks&msk_layers, \"block_id\"], highlight_blocks),\n",
-    "        },\n",
-    "        name=\"calo cluster in block\"\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "    points_other = go.Scatter3d(\n",
-    "        x=dfsel.loc[~trk & ~msk_blocks, 'pos_x'].values,\n",
-    "        y=dfsel.loc[~trk & ~msk_blocks, 'pos_y'].values,\n",
-    "        z=dfsel.loc[~trk & ~msk_blocks, 'pos_z'].values,\n",
-    "        mode=\"markers\",\n",
-    "        marker={\n",
-    "            \"symbol\": \"circle\",\n",
-    "            \"opacity\": 0.05,\n",
-    "            \"size\": 5,\n",
-    "            \"color\": \"gray\"\n",
-    "        },\n",
-    "        name=\"calo cluster\"\n",
-    "    )\n",
-    "\n",
-    "    line_points_x = []\n",
-    "    line_points_y = []\n",
-    "    line_points_z = []\n",
-    "    \n",
-    "    for ip in np.array(range(len(point_to_point_link))):\n",
-    "        p0 = point_to_point_link[ip, 0]\n",
-    "        p1 = point_to_point_link[ip, 1]\n",
-    "        if dfsel.loc[p0, \"block_id\"] in highlight_blocks or dfsel.loc[p1, \"block_id\"] in highlight_blocks:\n",
-    "            if p0 in dfsel.index and p1 in dfsel.index:\n",
-    "                line_points_x += [dfsel.loc[p0, \"pos_x\"], dfsel.loc[p1, \"pos_x\"], None]\n",
-    "                line_points_y += [dfsel.loc[p0, \"pos_y\"], dfsel.loc[p1, \"pos_y\"], None]\n",
-    "                line_points_z += [dfsel.loc[p0, \"pos_z\"], dfsel.loc[p1, \"pos_z\"], None]\n",
-    "\n",
-    "\n",
-    "    tracks = go.Scatter3d(\n",
-    "        x=line_points_x,\n",
-    "        y=line_points_y,\n",
-    "        z=line_points_z,\n",
-    "        mode=\"lines\",\n",
-    "        opacity=0.2,\n",
-    "        line={\"color\": \"black\"},\n",
-    "        name=\"track between layers\")\n",
-    "    \n",
-    "    data=[\n",
-    "            points_trk,\n",
-    "            points_other,\n",
-    "            points_trk_blk,\n",
-    "            points_other_blk,\n",
-    "        ]\n",
-    "\n",
-    "    if do_tracks:\n",
-    "        data += [tracks]\n",
-    "    fig = go.Figure(data=data)\n",
-    "\n",
-    "    fig.update_layout(\n",
-    "        autosize=False,\n",
-    "        width=700,\n",
-    "        height=500,\n",
-    "        margin=go.layout.Margin(\n",
-    "            l=50,\n",
-    "            r=0,\n",
-    "            b=0,\n",
-    "            t=50,\n",
-    "        ),\n",
-    "        title=title,\n",
-    "        scene_camera={\n",
-    "            \"eye\": dict(x=0.8, y=0.8, z=0.8)\n",
-    "        }\n",
-    "    )\n",
-    "\n",
-    "    fig.show()\n",
-    "    return fig"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "points_data, points_pos, point_to_point_link, point_to_elem = create_points(data[\"elements\"])\n",
-    "df = make_df(points_data, points_pos, point_to_elem, data[\"element_block_id\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "largest_blocks = sorted(Counter(df[\"block_id\"]).items(), key=lambda x: x[1], reverse=True)\n",
-    "largest_blocks[:10]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df, [22, 189, 229], point_to_point_link, \"PFAlgo-based true blocks\")\n",
-    "fig.write_image(\"blocks_true.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_pfalgo = make_df(points_data, points_pos, point_to_elem, cluster_pfblockalgo(len(data[\"elements\"]), dm))\n",
-    "largest_blocks = sorted(Counter(df_pfalgo[\"block_id\"][df_pfalgo[\"type\"]==1]).items(), key=lambda x: x[1], reverse=True)\n",
-    "largest_blocks[:10]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, tracker surface\", [1])\n",
-    "fig.write_image(\"blocks_pfblockalgo_tracker.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, tracker surface\", [1], do_tracks=False)\n",
-    "fig.write_image(\"blocks_pfblockalgo_tracker_notracks.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, ECAL surface\", [3])\n",
-    "fig.write_image(\"blocks_pfblockalgo_ecal.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, ECAL surface\", [3], do_tracks=False)\n",
-    "fig.write_image(\"blocks_pfblockalgo_ecal_notracks.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, HCAL surface\", [5])\n",
-    "fig.write_image(\"blocks_pfblockalgo_hcal.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, HCAL surface\", [5], do_tracks=False)\n",
-    "fig.write_image(\"blocks_pfblockalgo_hcal_notracks.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks\")\n",
-    "fig.write_image(\"blocks_pfblockalgo.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df_pfalgo, np.unique(df_pfalgo[\"block_id\"]), point_to_point_link, \"PFBlockAlgo-based blocks\")\n",
-    "fig.write_image(\"blocks_pfblockalgo_all.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = draw_plot(df, np.unique(df[\"block_id\"]), point_to_point_link, \"PFBAlgo-based true blocks\",)\n",
-    "fig.write_image(\"blocks_true_all.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/simrec.ipynb b/notebooks/old/simrec.ipynb
deleted file mode 100644
index 322c4ac75..000000000
--- a/notebooks/old/simrec.ipynb
+++ /dev/null
@@ -1,463 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "import pandas as pd\n",
-    "import mplhep\n",
-    "plt.style.use(mplhep.style.CMS)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy\n",
-    "import scipy.sparse"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dm = scipy.sparse.load_npz(\"../dist_0.npz\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.imshow(dm.todense()>0.0, cmap=\"Greys\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gen = pd.read_csv(\"../gen_0.csv\", index_col=0)\n",
-    "reco = pd.read_csv(\"../reco_0.csv\", index_col=0)\n",
-    "\n",
-    "fi0 = np.load(\"../ev_0.npz\")\n",
-    "rg = fi0[\"reco_gen\"]\n",
-    "rc = fi0[\"reco_cand\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gen[\"num_matched\"] = (rg > 0.0).sum(axis=0)\n",
-    "reco[\"num_matched\"] = (rg > 0.0).sum(axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "gen[\"pt\"].hist(bins=np.linspace(0,10,100))\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Xs = []\n",
-    "ys = []\n",
-    "ycs = []\n",
-    "for i in range(10):\n",
-    "    fi = np.load(\"../ev_{}.npz\".format(i))\n",
-    "    X = fi[\"X\"]\n",
-    "    y = fi[\"ygen\"]\n",
-    "    yc = fi[\"ycand\"]\n",
-    "    \n",
-    "    Xs += [X]\n",
-    "    ys += [y]\n",
-    "    ycs += [yc]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.hist(rg[rg>0], bins=np.linspace(0,200,100));\n",
-    "# plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for igen in range(20):\n",
-    "    idx_max = np.argmax(rg[:, igen])\n",
-    "    inds_max = np.argsort(rg[:, igen])[::-1][:3]\n",
-    "    rgs = rg[inds_max, igen]\n",
-    "    print(igen, inds_max, rgs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
-    "plt.imshow(rg>0.0, cmap=\"Greys\", interpolation='None')\n",
-    "plt.xlabel(\"genparticle index\")\n",
-    "plt.ylabel(\"recoparticle index\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
-    "\n",
-    "rgs = rg>0.0\n",
-    "plt.hist(rgs.sum(axis=0), bins=np.linspace(0, 20, 21), density=1.0, histtype=\"step\", lw=2, label=\"gen\");\n",
-    "plt.hist(rgs.sum(axis=1), bins=np.linspace(0, 20, 21), density=1.0, histtype=\"step\", lw=2, label=\"reco\");\n",
-    "plt.legend()\n",
-    "#plt.yscale(\"log\")\n",
-    "plt.xlabel(\"number of associations\")\n",
-    "plt.ylabel(\"fraction of total\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
-    "plt.imshow(rc>0.0, cmap=\"Greys\", interpolation='None')\n",
-    "plt.xlabel(\"PFCandidate index\")\n",
-    "plt.ylabel(\"recoparticle index\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5, 5))\n",
-    "rcs = rc>0.0\n",
-    "plt.hist(rcs.sum(axis=0), bins=np.linspace(0, 10, 11), density=1.0, histtype=\"step\", lw=2, label=\"candidate\");\n",
-    "plt.hist(rcs.sum(axis=1), bins=np.linspace(0, 10, 11), density=1.0, histtype=\"step\", lw=2, label=\"reco\");\n",
-    "plt.legend()\n",
-    "#plt.yscale(\"log\")\n",
-    "plt.xlabel(\"number of associations\")\n",
-    "plt.ylabel(\"fraction of total\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_id_pairs(yc[:, 0], X[:, 0])\n",
-    "plt.title(\"reco to PFCandidate\", y=1.0)\n",
-    "plt.ylabel(\"PFCandidate PDGID\")\n",
-    "plt.xlabel(\"Reco object type\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_id_pairs(yc[m2, 0], y[m2, 0])\n",
-    "plt.title(\"gen to PFCandidate\", y=1.0)\n",
-    "plt.xlabel(\"GenParticle PDGID\")\n",
-    "plt.ylabel(\"PFCandidate PDGID\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "has_cand = (yc[:, 0] != 0)\n",
-    "has_gen = (y[:, 0] != 0)\n",
-    "\n",
-    "is_track = X[:, 0] == 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(X[has_gen, 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Counter(X[~has_gen, 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X[is_track].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X[is_track & has_cand & has_gen].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X[is_track & has_cand & ~has_gen].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X[is_track & ~has_cand].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "yc[has_cand & has_gen, 0].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "yc[has_cand & ~has_gen, 0].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pids = np.unique(yc[has_cand, 0])\n",
-    "c1 = Counter(yc[has_cand&has_gen, 0])\n",
-    "c2 = Counter(yc[has_cand&~has_gen, 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "yvals = [c1[p]/np.sum(has_cand) for p in pids]\n",
-    "yvals2 = [c2[p]/np.sum(has_cand) for p in pids]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(9,5))\n",
-    "xs = np.arange(len(pids))\n",
-    "plt.bar(xs, yvals, label=\"matched\")\n",
-    "plt.bar(xs, yvals2, bottom=yvals, label=\"not matched\")\n",
-    "plt.xticks(xs, [int(x) for x in pids]);\n",
-    "plt.ylabel(\"fraction of total candidates\")\n",
-    "plt.xlabel(\"PFCandidate PDGID\")\n",
-    "plt.title(\"PFCandidate to Gen match\")\n",
-    "plt.legend()\n",
-    "plt.ylim(0,0.4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df2 = pd.DataFrame(np.hstack([X, y, yc]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df2[df2[0]==4]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "randinds = np.random.permutation(range(len(df2)))[:100]\n",
-    "df3 = df2[[0, 4, 6, 12]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df3.loc[randinds]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "ax = plt.axes()\n",
-    "\n",
-    "bigmask = (np.abs(X[:, 2]) < 0.2) & (np.abs(X[:, 3]) < 0.2)\n",
-    "plt.scatter(X[bigmask, 2], X[bigmask, 3], marker=\".\", label=\"reco\")\n",
-    "plt.scatter(y[bigmask & m, 2], y[bigmask & m, 3], marker=\"x\", label=\"gen\")\n",
-    "plt.scatter(yc[bigmask & m2, 2], yc[bigmask & m2, 3], marker=\".\", label=\"PF\")\n",
-    "\n",
-    "for idx in np.nonzero(bigmask)[0]:\n",
-    "    _x1, _y1 = X[idx, 2], X[idx, 3]\n",
-    "    _x2, _y2 = y[idx, 2], y[idx, 3]\n",
-    "    _x3, _y3 = yc[idx, 2], yc[idx, 3]\n",
-    "    if _x2 != 0 and abs(_x2) < 0.2 and abs(_y2) < 0.2:\n",
-    "        plt.plot([_x1, _x2], [_y1, _y2], color=\"gray\")\n",
-    "    if _x3 != 0 and abs(_x3) < 0.2 and abs(_y3) < 0.2:\n",
-    "        plt.plot([_x1, _x3], [_y1, _y3], color=\"gray\")\n",
-    "        \n",
-    "plt.xlim(-0.2, 0.2)\n",
-    "plt.ylim(-0.2, 0.2)\n",
-    "plt.xlabel(\"eta\")\n",
-    "plt.ylabel(\"phi\")\n",
-    "plt.legend(loc=(1.01,0.1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.legend?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/tensorflow-model.ipynb b/notebooks/old/tensorflow-model.ipynb
deleted file mode 100644
index a0282a6d4..000000000
--- a/notebooks/old/tensorflow-model.ipynb
+++ /dev/null
@@ -1,438 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
-    "\n",
-    "import pickle\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "from sklearn.metrics import confusion_matrix, accuracy_score"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import keras\n",
-    "import tensorflow as tf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from keras.layers import Input, Dense\n",
-    "from keras.models import Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "elem_labels = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]\n",
-    "class_labels = [0., -211., -13., -11., 1., 2., 11.0, 13., 22., 130., 211.]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Xs = []\n",
-    "ys = []\n",
-    "for iev in range(1, 60):\n",
-    "    for ifile in range(10):\n",
-    "        data = pickle.load(open(\"../data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/pfntuple_{}_{}.pkl\".format(iev, ifile), \"rb\"), encoding='iso-8859-1')\n",
-    "        Xelem = data[0][\"Xelem\"]\n",
-    "        ygen = data[0][\"ygen\"]\n",
-    "        Xelem[:, 0] = [int(elem_labels.index(i)) for i in Xelem[:, 0]]\n",
-    "        ygen[:, 0] = [int(class_labels.index(i)) for i in ygen[:, 0]]\n",
-    "        Xs += [Xelem.copy()]\n",
-    "        ys += [ygen.copy()]\n",
-    "        del data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Xs_training = Xs[:500]\n",
-    "ys_training = ys[:500]\n",
-    "\n",
-    "Xs_testing = Xs[500:]\n",
-    "ys_testing = ys[500:]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def dist(A,B):\n",
-    "    na = tf.reduce_sum(tf.square(A), 1)\n",
-    "    nb = tf.reduce_sum(tf.square(B), 1)\n",
-    "\n",
-    "    na = tf.reshape(na, [-1, 1])\n",
-    "    nb = tf.reshape(nb, [1, -1])\n",
-    "    D = tf.sqrt(tf.maximum(na - 2*tf.matmul(A, B, False, True) + nb, 0.0))\n",
-    "    return D"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class InputEncoding(tf.keras.layers.Layer):\n",
-    "    def __init__(self, num_input_classes):\n",
-    "        super(InputEncoding, self).__init__()\n",
-    "        self.num_input_classes = num_input_classes\n",
-    "        \n",
-    "    def call(self, X):\n",
-    "        Xid = tf.one_hot(tf.cast(X[:, 0], tf.int32), self.num_input_classes)\n",
-    "        Xprop = X[:, 1:]\n",
-    "        return tf.concat([Xid, Xprop], axis=-1)\n",
-    "    \n",
-    "class Distance(tf.keras.layers.Layer):\n",
-    "\n",
-    "    def __init__(self, *args, **kwargs):\n",
-    "        super(Distance, self).__init__(*args, **kwargs)\n",
-    "\n",
-    "    def call(self, inputs):\n",
-    "        \n",
-    "        #compute the pairwise distance matrix between the vectors defined by the first two components of the input array\n",
-    "        D =  dist(inputs[:, :2], inputs[:, :2])\n",
-    "        \n",
-    "        #closer nodes have higher weight, could also consider exp(-D) or such here\n",
-    "        D = tf.math.divide_no_nan(1.0, D)\n",
-    "        \n",
-    "        #turn edges on or off based on activation with an arbitrary shift parameter\n",
-    "        D = tf.keras.activations.sigmoid(D - 5.0)\n",
-    "        \n",
-    "        #keep only upper triangular matrix (unidirectional edges)\n",
-    "        D = tf.linalg.band_part(D, 0, -1)\n",
-    "        return D\n",
-    "    \n",
-    "class GraphConv(tf.keras.layers.Dense):\n",
-    "    def __init__(self, *args, **kwargs):\n",
-    "        super(GraphConv, self).__init__(*args, **kwargs)\n",
-    "    \n",
-    "    def call(self, inputs, adj):\n",
-    "        W = self.weights[0]\n",
-    "        b = self.weights[1]\n",
-    "        support = tf.matmul(inputs, W) + b\n",
-    "        out = tf.matmul(adj, support)\n",
-    "        return self.activation(out)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class PFNet(tf.keras.Model):\n",
-    "    \n",
-    "    def __init__(self, activation=tf.keras.activations.relu, hidden_dim=256):\n",
-    "        super(PFNet, self).__init__()\n",
-    "        self.enc = InputEncoding(len(elem_labels))\n",
-    "        self.layer_input1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input1\")\n",
-    "        self.layer_input2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input2\")\n",
-    "        self.layer_input3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input3\")\n",
-    "        \n",
-    "        self.layer_dist = Distance(name=\"distance\")\n",
-    "        self.layer_conv = GraphConv(hidden_dim, activation=activation, name=\"conv\")\n",
-    "        \n",
-    "        self.layer_id1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id1\")\n",
-    "        self.layer_id2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id2\")\n",
-    "        self.layer_id3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id3\")\n",
-    "        self.layer_id = tf.keras.layers.Dense(len(class_labels), activation=\"linear\", name=\"out_id\")\n",
-    "        \n",
-    "        self.layer_momentum1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum1\")\n",
-    "        self.layer_momentum2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum2\")\n",
-    "        self.layer_momentum3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum3\")\n",
-    "        self.layer_momentum = tf.keras.layers.Dense(3, activation=\"linear\", name=\"out_momentum\")\n",
-    "        \n",
-    "    def call(self, inputs):\n",
-    "        x = self.enc(inputs)\n",
-    "        x = self.layer_input1(x)\n",
-    "        x = self.layer_input2(x)\n",
-    "        x = self.layer_input3(x)\n",
-    "        \n",
-    "        dm = self.layer_dist(x)\n",
-    "        x = self.layer_conv(x, dm)\n",
-    "        \n",
-    "        a = self.layer_id1(x)\n",
-    "        a = self.layer_id2(a)\n",
-    "        a = self.layer_id3(a)\n",
-    "        out_id = self.layer_id(a)\n",
-    "        \n",
-    "        b = self.layer_momentum1(x)\n",
-    "        b = self.layer_momentum2(b)\n",
-    "        b = self.layer_momentum3(b)\n",
-    "        out_momentum = self.layer_momentum(b)\n",
-    "        \n",
-    "        return out_id, out_momentum, dm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = PFNet(hidden_dim=256)\n",
-    "opt = tf.keras.optimizers.Adam(lr=0.001)\n",
-    "ret = model(Xs[0]);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(ret[2].numpy().flatten(), bins=np.linspace(0,1,100));"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def loss(model, inputs, targets, epoch, training):\n",
-    "    pred_id, pred_momentum, _ = model(inputs)\n",
-    "    pred_inds = tf.argmax(pred_id, axis=-1)\n",
-    "    #mask_correct = (pred_inds==targets[:, 0])\n",
-    "    l1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n",
-    "        tf.one_hot(tf.cast(targets[:, 0], tf.int32), depth=len(class_labels)), pred_id))\n",
-    "    l0 = 0*tf.reduce_mean(tf.keras.losses.mse(targets[:, 1:4], pred_momentum[:]))\n",
-    "    return l1 + l0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def grad(model, inputs, targets, epoch):\n",
-    "    with tf.GradientTape() as tape:\n",
-    "        loss_value = loss(model, inputs, targets, epoch, training=True)\n",
-    "        return loss_value, tape.gradient(loss_value, model.trainable_variables)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in model.trainable_variables]\n",
-    "\n",
-    "for i in range(10):\n",
-    "    loss_tot = 0.0\n",
-    "    \n",
-    "    ibatch = 0\n",
-    "    \n",
-    "    true_ids = []\n",
-    "    pred_ids = []\n",
-    "    \n",
-    "    for Xelem, ygen in zip(Xs_training, ys_training):\n",
-    "        loss_value, grads = grad(model, Xelem, ygen, i)\n",
-    "        for igrad, gv in enumerate(grads):\n",
-    "            accum_vars[igrad].assign_add(gv)\n",
-    "        \n",
-    "        loss_tot += loss_value.numpy()\n",
-    "        if ibatch == 5:\n",
-    "            opt.apply_gradients([(accum_vars[igrad] / 5, model.trainable_variables[igrad]) for igrad in range(len(accum_vars))])\n",
-    "            ibatch = 0\n",
-    "            for igrad in range(len(accum_vars)):\n",
-    "                accum_vars[igrad].assign(tf.zeros_like(accum_vars[igrad]))\n",
-    "\n",
-    "        pred_id, pred_momentum, dm = model(Xelem)\n",
-    "        pred_ids += [tf.argmax(pred_id, axis=-1).numpy()]\n",
-    "        true_ids += [ygen[:, 0]]\n",
-    "        ibatch += 1\n",
-    "    true_ids = np.concatenate(true_ids)\n",
-    "    pred_ids = np.concatenate(pred_ids)\n",
-    "    \n",
-    "    true_ids_testing = []\n",
-    "    pred_ids_testing = []\n",
-    "    loss_tot_testing = 0.0\n",
-    "    for Xelem, ygen in zip(Xs_testing, ys_testing):\n",
-    "        pred_id, pred_momentum, _ = model(Xelem)\n",
-    "        true_ids_testing += [ygen[:, 0]]\n",
-    "        pred_ids_testing += [tf.argmax(pred_id, axis=-1).numpy()]\n",
-    "    true_ids_testing = np.concatenate(true_ids_testing)\n",
-    "    pred_ids_testing = np.concatenate(pred_ids_testing)\n",
-    "\n",
-    "    acc = accuracy_score(true_ids, pred_ids)\n",
-    "    acc_testing = accuracy_score(true_ids_testing, pred_ids_testing)\n",
-    "    print(\"epoch={epoch} loss={loss:.2f} acc={acc:.4f}/{acc_testing:.4f}\".format(epoch=i, loss=loss_tot, acc=acc, acc_testing=acc_testing))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_confusion_matrix(cm,\n",
-    "                          target_names,\n",
-    "                          title='Confusion matrix',\n",
-    "                          cmap=None,\n",
-    "                          normalize=True):\n",
-    "    \"\"\"\n",
-    "    given a sklearn confusion matrix (cm), make a nice plot\n",
-    "\n",
-    "    Arguments\n",
-    "    ---------\n",
-    "    cm:           confusion matrix from sklearn.metrics.confusion_matrix\n",
-    "\n",
-    "    target_names: given classification classes such as [0, 1, 2]\n",
-    "                  the class names, for example: ['high', 'medium', 'low']\n",
-    "\n",
-    "    title:        the text to display at the top of the matrix\n",
-    "\n",
-    "    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm\n",
-    "                  see http://matplotlib.org/examples/color/colormaps_reference.html\n",
-    "                  plt.get_cmap('jet') or plt.cm.Blues\n",
-    "\n",
-    "    normalize:    If False, plot the raw numbers\n",
-    "                  If True, plot the proportions\n",
-    "\n",
-    "    Usage\n",
-    "    -----\n",
-    "    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by\n",
-    "                                                              # sklearn.metrics.confusion_matrix\n",
-    "                          normalize    = True,                # show proportions\n",
-    "                          target_names = y_labels_vals,       # list of names of the classes\n",
-    "                          title        = best_estimator_name) # title of graph\n",
-    "\n",
-    "    Citiation\n",
-    "    ---------\n",
-    "    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n",
-    "\n",
-    "    \"\"\"\n",
-    "    import matplotlib.pyplot as plt\n",
-    "    import numpy as np\n",
-    "    import itertools\n",
-    "\n",
-    "    accuracy = np.trace(cm) / float(np.sum(cm))\n",
-    "    misclass = 1 - accuracy\n",
-    "\n",
-    "    if cmap is None:\n",
-    "        cmap = plt.get_cmap('Blues')\n",
-    "\n",
-    "    if normalize:\n",
-    "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
-    "    cm[np.isnan(cm)] = 0.0\n",
-    "\n",
-    "    plt.figure(figsize=(8, 6))\n",
-    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
-    "    plt.title(title)\n",
-    "    plt.colorbar()\n",
-    "\n",
-    "    if target_names is not None:\n",
-    "        tick_marks = np.arange(len(target_names))\n",
-    "        plt.xticks(tick_marks, target_names, rotation=45)\n",
-    "        plt.yticks(tick_marks, target_names)\n",
-    "\n",
-    "    thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
-    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
-    "        if normalize:\n",
-    "            plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "        else:\n",
-    "            plt.text(j, i, \"{:,}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "\n",
-    "\n",
-    "    plt.ylabel('True label')\n",
-    "    plt.xlim(-1, len(target_names))\n",
-    "    plt.ylim(-1, len(target_names))\n",
-    "    plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
-    "    plt.tight_layout()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "true_ids_testing = np.array(true_ids_testing)\n",
-    "pred_ids_testing = np.array(pred_ids_testing)\n",
-    "msk = (true_ids_testing!=0) & (pred_ids_testing!=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cm = confusion_matrix(true_ids_testing[msk], pred_ids_testing[msk], range(1,len(class_labels)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_confusion_matrix(cm, class_labels[1:], normalize=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/test_delphes.ipynb b/notebooks/old/test_delphes.ipynb
deleted file mode 100644
index 8cee079ba..000000000
--- a/notebooks/old/test_delphes.ipynb
+++ /dev/null
@@ -1,705 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.chdir(\"../delphes\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "from torch_geometric.data import Dataset, DataLoader\n",
-    "import train\n",
-    "from sklearn.metrics import confusion_matrix\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_confusion_matrix(cm,\n",
-    "                          target_names,\n",
-    "                          title='Confusion matrix',\n",
-    "                          cmap=None,\n",
-    "                          normalize=True):\n",
-    "    \"\"\"\n",
-    "    given a sklearn confusion matrix (cm), make a nice plot\n",
-    "\n",
-    "    Arguments\n",
-    "    ---------\n",
-    "    cm:           confusion matrix from sklearn.metrics.confusion_matrix\n",
-    "\n",
-    "    target_names: given classification classes such as [0, 1, 2]\n",
-    "                  the class names, for example: ['high', 'medium', 'low']\n",
-    "\n",
-    "    title:        the text to display at the top of the matrix\n",
-    "\n",
-    "    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm\n",
-    "                  see http://matplotlib.org/examples/color/colormaps_reference.html\n",
-    "                  plt.get_cmap('jet') or plt.cm.Blues\n",
-    "\n",
-    "    normalize:    If False, plot the raw numbers\n",
-    "                  If True, plot the proportions\n",
-    "\n",
-    "    Usage\n",
-    "    -----\n",
-    "    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by\n",
-    "                                                              # sklearn.metrics.confusion_matrix\n",
-    "                          normalize    = True,                # show proportions\n",
-    "                          target_names = y_labels_vals,       # list of names of the classes\n",
-    "                          title        = best_estimator_name) # title of graph\n",
-    "\n",
-    "    Citiation\n",
-    "    ---------\n",
-    "    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n",
-    "\n",
-    "    \"\"\"\n",
-    "    import matplotlib.pyplot as plt\n",
-    "    import numpy as np\n",
-    "    import itertools\n",
-    "\n",
-    "    accuracy = np.trace(cm) / float(np.sum(cm))\n",
-    "    misclass = 1 - accuracy\n",
-    "\n",
-    "    if cmap is None:\n",
-    "        cmap = plt.get_cmap('Blues')\n",
-    "\n",
-    "    if normalize:\n",
-    "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
-    "    cm[np.isnan(cm)] = 0.0\n",
-    "\n",
-    "    plt.figure(figsize=(8, 6))\n",
-    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
-    "    plt.title(title)\n",
-    "    plt.colorbar()\n",
-    "\n",
-    "    if target_names is not None:\n",
-    "        tick_marks = np.arange(len(target_names))\n",
-    "        plt.xticks(tick_marks, target_names, rotation=45)\n",
-    "        plt.yticks(tick_marks, target_names)\n",
-    "\n",
-    "    thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
-    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
-    "        if normalize:\n",
-    "            plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "        else:\n",
-    "            plt.text(j, i, \"{:,}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "\n",
-    "\n",
-    "    plt.ylabel('True label')\n",
-    "    plt.xlim(-1, len(target_names))\n",
-    "    plt.ylim(-1, len(target_names))\n",
-    "    plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
-    "    plt.tight_layout()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "device = torch.device(\"cuda\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ds = train.DelphesDataset(\".\", 5000)\n",
-    "ds.raw_dir = \"raw2\"\n",
-    "ds.processed_dir = \"processed2\"\n",
-    "ds = torch.utils.data.Subset(ds, np.arange(start=4000, stop=5000))\n",
-    "d = DataLoader(ds, batch_size=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = train.PFNet(10, 512).to(device=device)\n",
-    "model.load_state_dict(torch.load(\"model_20.pth\"))\n",
-    "model.eval()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n",
-    "params = sum([np.prod(p.size()) for p in model_parameters])\n",
-    "print(\"model has {:.2E} parameters\".format(params))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cms = []\n",
-    "cms2 = []\n",
-    "pred_ps = []\n",
-    "true_ps = []\n",
-    "pred_ps2 = []\n",
-    "true_ps2 = []\n",
-    "ncand_true = []\n",
-    "ncand_pred = []\n",
-    "\n",
-    "for _d in d:\n",
-    "    _d = _d.to(device=device)\n",
-    "    \n",
-    "    cands_tower_id, cands_trk_id, cands_tower_p, cands_trk_p = model(_d)\n",
-    "    \n",
-    "    cands_tower_p[:, 0] = torch.exp(cands_tower_p[:, 0])\n",
-    "    cands_trk_p[:, 0] = torch.exp(cands_trk_p[:, 0])\n",
-    "    \n",
-    "    pred_ids_tower = model.decode_ids(cands_tower_id)\n",
-    "    true_ids_tower = _d.y_tower[:, 0]\n",
-    "    \n",
-    "    pred_ids_trk = model.decode_ids(cands_trk_id)\n",
-    "    true_ids_trk = _d.y_trk[:, 0]\n",
-    "    \n",
-    "#     y_id_pred = torch.argmax(_pred_id, axis=-1)\n",
-    "#     _pred_p[y_id_pred!=0, 0] = torch.exp(_pred_p[y_id_pred!=0, 0])\n",
-    "\n",
-    "    cm = confusion_matrix(true_ids_trk.flatten().cpu(), pred_ids_trk.flatten().detach().cpu(),\n",
-    "        labels=range(len(train.map_candid_to_numid)))\n",
-    "    cm2 = confusion_matrix(true_ids_tower.flatten().cpu(), pred_ids_tower.flatten().detach().cpu(),\n",
-    "        labels=range(len(train.map_candid_to_numid)))\n",
-    "    \n",
-    "    msk = (pred_ids_tower!=0) & (true_ids_tower!=0)\n",
-    "    pred_ps += cands_tower_p[msk].detach().cpu()\n",
-    "    true_ps += _d.y_tower[msk][:, 1:].detach().cpu()\n",
-    "    \n",
-    "    msk = (pred_ids_trk!=0) & (true_ids_trk!=0)\n",
-    "    pred_ps2 += cands_trk_p[msk].detach().cpu()\n",
-    "    true_ps2 += _d.y_trk[msk][:, 1:].detach().cpu()\n",
-    "    \n",
-    "    cms += [cm]\n",
-    "    cms2 += [cm2]\n",
-    "    \n",
-    "    ncand_true += [int((true_ids_tower!=0).sum() + (true_ids_trk!=0).sum())]\n",
-    "    ncand_pred += [int((pred_ids_tower!=0).sum() + (pred_ids_trk!=0).sum())]\n",
-    "cm = sum(cms)\n",
-    "cm2 = sum(cms2)\n",
-    "pred_p = np.stack(pred_ps)\n",
-    "true_p = np.stack(true_ps)\n",
-    "pred_p2 = np.stack(pred_ps2)\n",
-    "true_p2 = np.stack(true_ps2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import networkx as nx\n",
-    "import torch_geometric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adj_matrix = torch_geometric.utils.to_dense_adj(_d.edge_index).cpu().numpy()[0, :, :]\n",
-    "x = _d.x.cpu().numpy()\n",
-    "# y_p = _d.y_tower.cpu().numpy()\n",
-    "# y_p_pred = _pred_p[(y_id_pred!=0).cpu().numpy(), :].detach().cpu().numpy()\n",
-    "\n",
-    "# y_id = _d.y_id.cpu().numpy()\n",
-    "\n",
-    "colors = {\n",
-    "    0: \"r\",\n",
-    "    1: \"b\",\n",
-    "}\n",
-    "\n",
-    "g = nx.from_numpy_matrix(adj_matrix)\n",
-    "for i in range(len(x)):\n",
-    "    g.nodes[i][\"color\"] = colors[x[i, 0]]\n",
-    "    g.nodes[i][\"s\"] = np.abs(x[i, 4] + x[i, 3])\n",
-    "    if x[i, 0] == 0:\n",
-    "        g.nodes[i][\"eta\"] = x[i, 1]\n",
-    "        g.nodes[i][\"phi\"] = x[i, 2]\n",
-    "    elif x[i, 0] == 1:\n",
-    "        g.nodes[i][\"eta\"] = x[i, 5]\n",
-    "        g.nodes[i][\"phi\"] = x[i, 6]\n",
-    "        \n",
-    "pos = {i: (g.nodes[i][\"eta\"], g.nodes[i][\"phi\"]) for i in range(len(g))}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10, 10))\n",
-    "plt.scatter(\n",
-    "    x[:, 1],\n",
-    "    x[:, 2],\n",
-    "    s=np.abs(x[:, 4] + x[:, 3]),\n",
-    "    label=\"det\")\n",
-    "\n",
-    "m = _d.y_tower[:, 0] != 0\n",
-    "plt.scatter(\n",
-    "    _d.y_tower[m][:, 2].cpu(),\n",
-    "   _d.y_tower[m][:, 3].cpu(),\n",
-    "    s=_d.y_tower[m][:, 1].cpu(),\n",
-    "    label=\"gen\")\n",
-    "\n",
-    "m = _d.y_trk[:, 0] != 0\n",
-    "plt.scatter(\n",
-    "    _d.y_trk[m][:, 2].cpu(),\n",
-    "   _d.y_trk[m][:, 3].cpu(),\n",
-    "    s=_d.y_trk[m][:, 1].cpu(),\n",
-    "    label=\"gen-trk\")\n",
-    "\n",
-    "plt.legend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = plt.figure(figsize=(10, 10))\n",
-    "plt.title(\"input graph\")\n",
-    "edges = np.array(list(g.edges))\n",
-    "edges = list(edges[np.random.permutation(len(edges))][:500])\n",
-    "\n",
-    "nx.draw_networkx(g, pos,\n",
-    "    node_size=[g.nodes[n][\"s\"] for n in g.nodes],\n",
-    "    node_color=[g.nodes[n][\"color\"] for n in g.nodes],\n",
-    "    with_labels=False, alpha=0.2, edgelist=edges)\n",
-    "plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)\n",
-    "plt.xlim(-6, 6)\n",
-    "plt.ylim(-4, 4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(3*4, 4))\n",
-    "\n",
-    "ax = plt.subplot(1,3,1)\n",
-    "plt.title(\"input graph\")\n",
-    "nx.draw_networkx(g, pos,\n",
-    "    node_size=[g.nodes[n][\"s\"] for n in g.nodes],\n",
-    "    node_color=[g.nodes[n][\"color\"] for n in g.nodes],\n",
-    "    with_labels=False, alpha=0.2, ax=ax)\n",
-    "ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)\n",
-    "plt.xlim(-6, 6)\n",
-    "plt.ylim(-4, 4)\n",
-    "\n",
-    "plt.subplot(1,3,2)\n",
-    "m = _d.y_tower[:, 0] != 0\n",
-    "plt.scatter(\n",
-    "    _d.y_tower[m][:, 2].cpu(),\n",
-    "   _d.y_tower[m][:, 3].cpu(),\n",
-    "    s=_d.y_tower[m][:, 1].cpu(),\n",
-    "    label=\"gen-tower\")\n",
-    "\n",
-    "m = pred_ids_tower != 0\n",
-    "plt.scatter(\n",
-    "    cands_tower_p[m][:, 1].detach().cpu(),\n",
-    "    cands_tower_p[m][:, 2].detach().cpu(),\n",
-    "    s=cands_tower_p[m][:, 0].detach().cpu(), alpha=0.5, label=\"pred\")\n",
-    "plt.xlim(-6, 6)\n",
-    "plt.ylim(-4, 4)\n",
-    "plt.tight_layout()\n",
-    "plt.legend(loc=1)\n",
-    "\n",
-    "plt.subplot(1,3,3)\n",
-    "m = _d.y_trk[:, 0] != 0\n",
-    "plt.scatter(\n",
-    "    _d.y_trk[m][:, 2].cpu(),\n",
-    "    _d.y_trk[m][:, 3].cpu(),\n",
-    "    s=_d.y_trk[m][:, 1].cpu(),\n",
-    "    label=\"gen-trk\")\n",
-    "\n",
-    "m = pred_ids_trk != 0\n",
-    "plt.scatter(\n",
-    "    cands_trk_p[m][:, 1].detach().cpu(),\n",
-    "    cands_trk_p[m][:, 2].detach().cpu(),\n",
-    "    s=cands_trk_p[m][:, 0].detach().cpu(), alpha=0.5, label=\"pred\")\n",
-    "plt.xlim(-6, 6)\n",
-    "plt.ylim(-4, 4)\n",
-    "plt.tight_layout()\n",
-    "plt.legend(loc=1)\n",
-    "\n",
-    "plt.savefig(\"test.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "target_names = [\"none\", \"211\", \"-211\", \"130\", \"22\", \"11\", \"-11\", \"13\", \"-13\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_confusion_matrix(cm, target_names, normalize=False)\n",
-    "plt.xlim(-0.5, cm.shape[0]-0.5)\n",
-    "plt.ylim(-0.5, cm.shape[0]-0.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_confusion_matrix(cm2, target_names, normalize=False)\n",
-    "plt.xlim(-0.5, cm.shape[0]-0.5)\n",
-    "plt.ylim(-0.5, cm.shape[0]-0.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(5,5))\n",
-    "plt.xlim(0, 3000)\n",
-    "plt.ylim(0, 3000)\n",
-    "plt.plot([0,5000],[0,5000], color=\"black\")\n",
-    "plt.scatter(ncand_true, ncand_pred, marker=\".\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(0, 10, 100)\n",
-    "plt.hist(true_p[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")\n",
-    "#plt.ylim(100,1e4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(0, 10, 100)\n",
-    "plt.hist(true_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")\n",
-    "#plt.ylim(100,1e4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(0, 100, 100)\n",
-    "plt.hist(true_p[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(0, 100, 100)\n",
-    "plt.hist(true_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(-6, 6, 100)\n",
-    "plt.hist(true_p[:, 1], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p[:, 1], bins=b, histtype=\"step\", lw=2);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(-6, 6, 100)\n",
-    "plt.hist(true_p2[:, 1], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p2[:, 1], bins=b, histtype=\"step\", lw=2);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(-4, 4, 100)\n",
-    "plt.hist(true_p[:, 2], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p[:, 2], bins=b, histtype=\"step\", lw=2);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(6, 4))\n",
-    "b = np.linspace(-4, 4, 100)\n",
-    "plt.hist(true_p2[:, 2], bins=b, histtype=\"step\", lw=2);\n",
-    "plt.hist(pred_p2[:, 2], bins=b, histtype=\"step\", lw=2);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rp = np.random.permutation(len(pred_p))[:5000]\n",
-    "rp2 = np.random.permutation(len(pred_p2))[:5000]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.corrcoef(true_p[:, 0], pred_p[:, 0])[0,1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.corrcoef(true_p2[:, 0], pred_p2[:, 0])[0,1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.scatter(true_p[rp, 0], pred_p[rp, 0], marker=\".\", alpha=0.5)\n",
-    "plt.xlim(0, 10)\n",
-    "plt.ylim(0, 10)\n",
-    "plt.plot([0, 10], [0, 10], color=\"black\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.scatter(true_p[rp, 0], pred_p[rp, 0], marker=\".\", alpha=0.5)\n",
-    "plt.xlim(0.1, 1000)\n",
-    "plt.ylim(0.1, 1000)\n",
-    "plt.plot([0.1, 1000], [0.1, 1000], color=\"black\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.scatter(true_p2[rp2, 0], pred_p2[rp2, 0], marker=\".\", alpha=0.5)\n",
-    "plt.xlim(0.1, 1000)\n",
-    "plt.ylim(0.1, 1000)\n",
-    "plt.plot([0.1, 1000], [0.1, 1000], color=\"black\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.scatter(true_p[rp, 1], pred_p[rp, 1], marker=\".\", alpha=0.5)\n",
-    "plt.plot([-7, 7], [-7, 7], color=\"black\")\n",
-    "plt.xlim(-7,7)\n",
-    "plt.ylim(-7,7)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10,10))\n",
-    "plt.scatter(true_p[rp, 2], pred_p[rp, 2], marker=\".\", alpha=0.5)\n",
-    "plt.plot([-4, 4], [-4, 4], color=\"black\")\n",
-    "plt.xlim(-4,4)\n",
-    "plt.ylim(-4,4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = _d.x[:, 0] == 1\n",
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 1].cpu(), _d.y_trk[:, 2].cpu(), marker=\".\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 2].cpu(), _d.y_trk[:, 3].cpu(), marker=\".\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = _d.x[:, 0] == 0\n",
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 1].cpu(), _d.y_tower[:, 2].cpu(), marker=\".\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = _d.x[:, 0] == 0\n",
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 2].cpu(), _d.y_tower[:, 3].cpu(), marker=\".\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = _d.x[:, 0] == 0\n",
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 3].cpu() + _d.x[msk, 4].cpu(), _d.y_tower[:, 1].cpu(), marker=\".\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = _d.x[:, 0] == 1\n",
-    "plt.figure(figsize=(4,4))\n",
-    "plt.scatter(_d.x[msk, 4].cpu(), _d.y_trk[:, 1].cpu(), marker=\".\")\n",
-    "plt.xscale(\"log\")\n",
-    "plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/old/test_end2end.ipynb b/notebooks/old/test_end2end.ipynb
deleted file mode 100644
index 8ae7950b9..000000000
--- a/notebooks/old/test_end2end.ipynb
+++ /dev/null
@@ -1,1257 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sklearn\n",
-    "import sklearn.metrics\n",
-    "\n",
-    "import numpy as np\n",
-    "import matplotlib\n",
-    "import matplotlib.pyplot as plt\n",
-    "import pandas\n",
-    "import mplhep\n",
-    "\n",
-    "import sys\n",
-    "sys.path += [\"../test\"]\n",
-    "\n",
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from tf_model import class_labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n",
-    "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n",
-    "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n",
-    "    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=True, horizontalalignment='left', fontsize=10)\n",
-    "\n",
-    "def sample_label(ax, y=0.98):\n",
-    "    plt.text(0.03, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n",
-    "    \n",
-    "pid_to_text = {\n",
-    "    211: r\"charged hadrons ($\\pi^-$, ...)\",\n",
-    "    -211: r\"charged hadrons ($\\pi^+$, ...)\",\n",
-    "    130: r\"neutral hadrons (K, ...)\",\n",
-    "    1: r\"HF hadron (EM)\",\n",
-    "    2: r\"HF-HAD hadron (HAD)\",\n",
-    "}\n",
-    "def particle_label(ax, pid):\n",
-    "    plt.text(0.03, 0.92, pid_to_text[pid], va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_confusion_matrix(cm,\n",
-    "                          target_names,\n",
-    "                          title='Confusion matrix',\n",
-    "                          cmap=None,\n",
-    "                          normalize=True):\n",
-    "    import matplotlib.pyplot as plt\n",
-    "    import numpy as np\n",
-    "    import itertools\n",
-    "\n",
-    "    accuracy = np.trace(cm) / float(np.sum(cm))\n",
-    "    misclass = 1 - accuracy\n",
-    "\n",
-    "    if cmap is None:\n",
-    "        cmap = plt.get_cmap('Blues')\n",
-    "\n",
-    "    if normalize:\n",
-    "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
-    "    cm[np.isnan(cm)] = 0.0\n",
-    "\n",
-    "    fig = plt.figure(figsize=(8, 6))\n",
-    "    ax = plt.axes()\n",
-    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
-    "    plt.title(title)\n",
-    "    plt.colorbar()\n",
-    "\n",
-    "    if target_names is not None:\n",
-    "        tick_marks = np.arange(len(target_names))\n",
-    "        plt.xticks(tick_marks, target_names, rotation=45)\n",
-    "        plt.yticks(tick_marks, target_names)\n",
-    "\n",
-    "    thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n",
-    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
-    "        if normalize:\n",
-    "            plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "        else:\n",
-    "            plt.text(j, i, \"{:0.1f}\".format(cm[i, j]),\n",
-    "                     horizontalalignment=\"center\",\n",
-    "                     color=\"white\" if cm[i, j] > thresh else \"black\")\n",
-    "\n",
-    "\n",
-    "    plt.ylabel('True label')\n",
-    "    plt.xlim(-1, len(target_names))\n",
-    "    plt.ylim(-1, len(target_names))\n",
-    "    plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n",
-    "    plt.tight_layout()\n",
-    "    \n",
-    "    return fig, ax"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!rm *.pdf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def deltaphi(phi1, phi2):\n",
-    "    return np.fmod(phi1 - phi2 + np.pi, 2*np.pi) - np.pi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# model = \"PFNet7_TTbar_14TeV_TuneCUETP8M1_cfi_gen__npar_3440145__cfg_4ff944b45d__user_jpata__ntrain_3600__lr_1e-05__1587059029\"\n",
-    "# epoch = 45\n",
-    "# big_df = pandas.read_pickle(\"../data/{}/epoch_{}/df.pkl.bz2\".format(model, epoch))\n",
-    "# #big_df = pandas.read_pickle(\"../test/TTbar_14TeV_TuneCUETP8M1_cfi.pkl.bz2\")\n",
-    "\n",
-    "big_df = pandas.read_pickle(\"/storage/user/jpata/particleflow/experiments/run_1/df_1.pkl.bz2\")\n",
-    "big_df[\"pred_phi\"] = np.arctan2(np.sin(big_df[\"pred_phi\"]), np.cos(big_df[\"pred_phi\"]))\n",
-    "\n",
-    "# big_df2 = pandas.read_pickle(\"../experiments/run_2/df_1.pkl.bz2\")\n",
-    "# big_df2[\"pred_phi\"] = np.arctan2(np.sin(big_df2[\"pred_phi\"]), np.cos(big_df2[\"pred_phi\"]))\n",
-    "\n",
-    "targettype = \"cand\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "big_df.loc[big_df[\"pred_pid\"]==211, [\"target_e\", \"pred_e\"]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#msk = (big_df[\"target_pid\"] != 0) & ((big_df[\"pred_pid\"] != 0))\n",
-    "msk = np.ones(len(big_df), dtype=np.bool)\n",
-    "confusion2 = sklearn.metrics.confusion_matrix(\n",
-    "    big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk],\n",
-    "    labels=class_labels\n",
-    ")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig, ax = plot_confusion_matrix(\n",
-    "    cm=100.0*confusion2/np.sum(confusion2), target_names=[int(x) for x in class_labels], normalize=False\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig, ax = plot_confusion_matrix(\n",
-    "    cm=confusion2, target_names=[int(x) for x in class_labels], normalize=True\n",
-    ")\n",
-    "\n",
-    "acc = sklearn.metrics.accuracy_score(big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk])\n",
-    "plt.title(\"\")\n",
-    "#plt.title(\"ML-PF, accuracy={:.2f}\".format(acc))\n",
-    "plt.ylabel(\"reco PF candidate PID\\nassociated to input PFElement\")\n",
-    "plt.xlabel(\"predicted PID\\nML-PF candidate,\\naccuracy: {:.2f}\".format(acc))\n",
-    "cms_label(x0=0.20, x1=0.26, y=0.95)\n",
-    "sample_label(ax, y=0.995)\n",
-    "plt.savefig(\"confusion_mlpf.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins, counts = np.unique(big_df.loc[big_df[\"target_pid\"]!=0, \"target_pid\"], return_counts=True)\n",
-    "bins, counts2 = np.unique(big_df.loc[big_df[\"pred_pid\"]!=0, \"pred_pid\"], return_counts=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(4,4))\n",
-    "ax = plt.axes()\n",
-    "xs = np.arange(len(counts))\n",
-    "plt.bar(xs, counts/500.0, width=0.4, label=\"offline PF\")\n",
-    "plt.bar(xs+0.4, counts2/500.0, width=0.4, label=\"ML-PF\")\n",
-    "plt.xticks(xs+0.2, bins);\n",
-    "plt.ylabel(\"average number of particles per event\")\n",
-    "plt.xlabel(\"particle PID\")\n",
-    "plt.ylim(0,2000)\n",
-    "plt.legend(loc=\"best\", frameon=False)\n",
-    "cms_label()\n",
-    "sample_label(ax)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_cands = []\n",
-    "num_preds = []\n",
-    "\n",
-    "sum_e_true = []\n",
-    "sum_e_pred = []\n",
-    "\n",
-    "for k, v in big_df.groupby(\"iev\"):\n",
-    "    \n",
-    "    num_cand = 0\n",
-    "    num_pred = 0\n",
-    "    for pid in [211, -211, 130, 22, 11, -11, 13, -13, 1, 2]:\n",
-    "        if pid == 0:\n",
-    "            continue\n",
-    "        num_cand += np.sum(v[\"target_pid\"] == pid)\n",
-    "        num_pred += np.sum(v[\"pred_pid\"] == pid)\n",
-    "    num_cands += [num_cand]\n",
-    "    num_preds += [num_pred]\n",
-    "    \n",
-    "    sum_e_true += [np.sum(v[\"target_e\"])/len(v)]\n",
-    "    sum_e_pred += [np.sum(v[\"pred_e\"])/len(v)]\n",
-    "    \n",
-    "sum_e_true = np.array(sum_e_true)\n",
-    "sum_e_pred = np.array(sum_e_pred)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(4,4))\n",
-    "min_num = 1000\n",
-    "max_num = 6000\n",
-    "hist = np.histogram2d(num_cands, num_preds, bins=(np.linspace(min_num, max_num,100), np.linspace(min_num, max_num,100)))\n",
-    "mplhep.hist2dplot(hist[0], hist[1], hist[2], cbar=False, cmap=\"Blues\")\n",
-    "plt.plot([min_num, max_num], [min_num, max_num], ls=\"--\", lw=0.5, color=\"black\")\n",
-    "\n",
-    "#plt.xlabel(\"True hadron multiplicity $|\\eta| < 3.0$\")\n",
-    "#plt.ylabel(\"Predicted hadron multiplicity $|\\eta| < 3.0$\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_eff(df, pid):\n",
-    "    v0 = np.sum(df==pid)\n",
-    "    return v0 / len(df), np.sqrt(v0)/len(df)\n",
-    "\n",
-    "def get_fake(df, pid):\n",
-    "    v0 = np.sum(df!=pid)\n",
-    "    return v0 / len(df), np.sqrt(v0)/len(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_E_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"E_val\"], bins[\"E_val\"]))\n",
-    "    mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n",
-    "    plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"E_xlabel\"])\n",
-    "    plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"E_xlabel\"])\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.plot(\n",
-    "        [bins[\"E_val\"][0], bins[\"E_val\"][-1]],\n",
-    "        [bins[\"E_val\"][0], bins[\"E_val\"][-1]],\n",
-    "        color=\"black\", ls=\"--\", lw=0.5)\n",
-    "    plt.savefig(\"energy_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    plt.hist(v0[msk_true, 0], bins=bins[\"E_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n",
-    "    plt.hist(v0[msk_pred, 1], bins=bins[\"E_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n",
-    "    plt.xlabel(bins[\"E_xlabel\"])\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n",
-    "    plt.savefig(\"energy_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])\n",
-    "\n",
-    "    res = (v0[msk_both, 1] - v0[msk_both, 0])/v0[msk_both, 0]\n",
-    "    res[np.isnan(res)] = -1\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    ax.text(0.98, 0.98, \"avg. $\\Delta E / E$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n",
-    "    plt.hist(res, bins=bins[\"E_res\"], density=1.0);\n",
-    "    plt.xlabel(\"$\\Delta E / E$\")\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.savefig(\"energy_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    #efficiency vs fake rate\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    big_df[\"bins_target_e\"] = np.searchsorted(bins[\"E_val\"], big_df[\"target_e\"])\n",
-    "    big_df[\"bins_pred_e\"] = np.searchsorted(bins[\"E_val\"], big_df[\"pred_e\"])\n",
-    "\n",
-    "    vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_e\")[\"pred_pid\"].apply(get_eff, pid)\n",
-    "    vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_e\")[\"target_pid\"].apply(get_fake, pid)\n",
-    "\n",
-    "    out_eff = np.zeros((len(bins[\"E_val\"]), 2))\n",
-    "    out_fake = np.zeros((len(bins[\"E_val\"]), 2))\n",
-    "    for ib in range(len(bins[\"E_val\"])):\n",
-    "        if ib in vals_eff.keys():\n",
-    "            out_eff[ib, 0] = vals_eff[ib][0]\n",
-    "            out_eff[ib, 1] = vals_eff[ib][1]\n",
-    "        if ib in vals_fake.keys():\n",
-    "            out_fake[ib, 0] = vals_fake[ib][0]\n",
-    "            out_fake[ib, 1] = vals_fake[ib][1]\n",
-    "\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "\n",
-    "    plt.errorbar(bins[\"E_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n",
-    "    plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n",
-    "    ax.set_ylim(0, 1.5)\n",
-    "    plt.xlabel(bins[\"E_xlabel\"])\n",
-    "\n",
-    "    ax2 = ax.twinx()\n",
-    "    col = \"red\"\n",
-    "    plt.errorbar(bins[\"E_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n",
-    "    plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n",
-    "    plt.xlabel(bins[\"E_xlabel\"])\n",
-    "    ax2.set_ylim(0, 1.5)\n",
-    "    lines, labels = ax.get_legend_handles_labels()\n",
-    "    lines2, labels2 = ax2.get_legend_handles_labels()\n",
-    "    ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n",
-    "    plt.savefig(\"energy_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_eta_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"eta_val\"], bins[\"eta_val\"]))\n",
-    "    mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n",
-    "    plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"eta_xlabel\"])\n",
-    "    plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"eta_xlabel\"])\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.plot(\n",
-    "        [bins[\"eta_val\"][0], bins[\"eta_val\"][-1]],\n",
-    "        [bins[\"eta_val\"][0], bins[\"eta_val\"][-1]],\n",
-    "        color=\"black\", ls=\"--\", lw=0.5)\n",
-    "    plt.savefig(\"eta_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    plt.hist(v0[msk_true, 0], bins=bins[\"eta_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n",
-    "    plt.hist(v0[msk_pred, 1], bins=bins[\"eta_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n",
-    "    plt.xlabel(bins[\"eta_xlabel\"])\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n",
-    "    plt.savefig(\"eta_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])\n",
-    "\n",
-    "    res = (v0[msk_both, 1] - v0[msk_both, 0])\n",
-    "    res[np.isnan(res)] = -1\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    ax.text(0.98, 0.98, \"avg. $\\Delta \\eta$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n",
-    "    plt.hist(res, bins=bins[\"eta_res\"], density=1.0);\n",
-    "    plt.xlabel(\"$\\Delta \\eta$\")\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.savefig(\"eta_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    #efficiency vs fake rate\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    big_df[\"bins_target_eta\"] = np.searchsorted(bins[\"eta_val\"], big_df[\"target_eta\"])\n",
-    "    big_df[\"bins_pred_eta\"] = np.searchsorted(bins[\"eta_val\"], big_df[\"pred_eta\"])\n",
-    "\n",
-    "    vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_eta\")[\"pred_pid\"].apply(get_eff, pid)\n",
-    "    vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_eta\")[\"target_pid\"].apply(get_fake, pid)\n",
-    "\n",
-    "    out_eff = np.zeros((len(bins[\"eta_val\"]), 2))\n",
-    "    out_fake = np.zeros((len(bins[\"eta_val\"]), 2))\n",
-    "    for ib in range(len(bins[\"eta_val\"])):\n",
-    "        if ib in vals_eff.keys():\n",
-    "            out_eff[ib, 0] = vals_eff[ib][0]\n",
-    "            out_eff[ib, 1] = vals_eff[ib][1]\n",
-    "        if ib in vals_fake.keys():\n",
-    "            out_fake[ib, 0] = vals_fake[ib][0]\n",
-    "            out_fake[ib, 1] = vals_fake[ib][1]\n",
-    "\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "\n",
-    "    plt.errorbar(bins[\"eta_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n",
-    "    plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n",
-    "    ax.set_ylim(0, 1.5)\n",
-    "    plt.xlabel(bins[\"eta_xlabel\"])\n",
-    "\n",
-    "    ax2 = ax.twinx()\n",
-    "    col = \"red\"\n",
-    "    plt.errorbar(bins[\"eta_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n",
-    "    plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n",
-    "    plt.xlabel(bins[\"eta_xlabel\"])\n",
-    "    ax2.set_ylim(0, 1.5)\n",
-    "    lines, labels = ax.get_legend_handles_labels()\n",
-    "    lines2, labels2 = ax2.get_legend_handles_labels()\n",
-    "    ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n",
-    "    plt.savefig(\"eta_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_phi_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"phi_val\"], bins[\"phi_val\"]))\n",
-    "    mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n",
-    "    plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"phi_xlabel\"])\n",
-    "    plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"phi_xlabel\"])\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.plot(\n",
-    "        [bins[\"phi_val\"][0], bins[\"phi_val\"][-1]],\n",
-    "        [bins[\"phi_val\"][0], bins[\"phi_val\"][-1]],\n",
-    "        color=\"black\", ls=\"--\", lw=0.5)\n",
-    "    plt.savefig(\"phi_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    plt.hist(v0[msk_true, 0], bins=bins[\"phi_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n",
-    "    plt.hist(v0[msk_pred, 1], bins=bins[\"phi_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n",
-    "    plt.xlabel(bins[\"phi_xlabel\"])\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.savefig(\"phi_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n",
-    "\n",
-    "    res = (v0[msk_both, 1] - v0[msk_both, 0])\n",
-    "    res[np.isnan(res)] = -1\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    ax.text(0.98, 0.98, \"avg. $\\Delta \\phi$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n",
-    "    plt.hist(res, bins=bins[\"phi_res\"], density=1.0);\n",
-    "    plt.xlabel(\"$\\Delta \\phi$\")\n",
-    "    plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "    plt.savefig(\"phi_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "    \n",
-    "    #efficiency vs fake rate\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    ax = plt.axes()\n",
-    "    big_df[\"bins_target_phi\"] = np.searchsorted(bins[\"phi_val\"], big_df[\"target_phi\"])\n",
-    "    big_df[\"bins_pred_phi\"] = np.searchsorted(bins[\"phi_val\"], big_df[\"pred_phi\"])\n",
-    "\n",
-    "    vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_phi\")[\"pred_pid\"].apply(get_eff, pid)\n",
-    "    vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_phi\")[\"target_pid\"].apply(get_fake, pid)\n",
-    "\n",
-    "    out_eff = np.zeros((len(bins[\"phi_val\"]), 2))\n",
-    "    out_fake = np.zeros((len(bins[\"phi_val\"]), 2))\n",
-    "    for ib in range(len(bins[\"phi_val\"])):\n",
-    "        if ib in vals_eff.keys():\n",
-    "            out_eff[ib, 0] = vals_eff[ib][0]\n",
-    "            out_eff[ib, 1] = vals_eff[ib][1]\n",
-    "        if ib in vals_fake.keys():\n",
-    "            out_fake[ib, 0] = vals_fake[ib][0]\n",
-    "            out_fake[ib, 1] = vals_fake[ib][1]\n",
-    "\n",
-    "    cms_label()\n",
-    "    sample_label(ax)\n",
-    "    particle_label(ax, pid)\n",
-    "\n",
-    "    plt.errorbar(bins[\"phi_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n",
-    "    plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n",
-    "    ax.set_ylim(0, 1.5)\n",
-    "    plt.xlabel(bins[\"phi_xlabel\"])\n",
-    "\n",
-    "    ax2 = ax.twinx()\n",
-    "    col = \"red\"\n",
-    "    plt.errorbar(bins[\"phi_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n",
-    "    plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n",
-    "    plt.xlabel(bins[\"phi_xlabel\"])\n",
-    "    ax2.set_ylim(0, 1.5)\n",
-    "    lines, labels = ax.get_legend_handles_labels()\n",
-    "    lines2, labels2 = ax2.get_legend_handles_labels()\n",
-    "    ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n",
-    "    plt.savefig(\"phi_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def prepare_resolution_plots(big_df, pid, bins):\n",
-    "\n",
-    "    msk_true = (big_df[\"target_pid\"]==pid)\n",
-    "    msk_pred = (big_df[\"pred_pid\"]==pid)\n",
-    "    msk_both = msk_true&msk_pred\n",
-    "    v0 = big_df[[\"target_e\", \"pred_e\"]].values\n",
-    "    v1 = big_df[[\"target_eta\", \"pred_eta\"]].values\n",
-    "    v2 = big_df[[\"target_phi\", \"pred_phi\"]].values\n",
-    "    \n",
-    "    plot_E_reso(pid, v0, msk_true, msk_pred, msk_both, bins)\n",
-    "    plot_eta_reso(pid, v1, msk_true, msk_pred, msk_both, bins)\n",
-    "    plot_phi_reso(pid, v2, msk_true, msk_pred, msk_both, bins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = {\n",
-    "    211: {\n",
-    "        \"E_val\": np.linspace(0, 5, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"eta_val\": np.linspace(-4, 4, 61),\n",
-    "        \"eta_res\": np.linspace(-0.5, 0.5, 61),\n",
-    "        \"E_xlabel\": \"Energy [GeV]\",\n",
-    "        \"eta_xlabel\": \"$\\eta$\",\n",
-    "        \"phi_val\": np.linspace(-4, 4, 61),\n",
-    "        \"phi_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"phi_xlabel\": \"Energy [GeV]\",\n",
-    "        \"phi_xlabel\": \"$\\phi$\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    },\n",
-    "    -211: {\n",
-    "        \"E_val\": np.linspace(0, 5, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"eta_val\": np.linspace(-4, 4, 61),\n",
-    "        \"eta_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"E_xlabel\": \"Energy [GeV]\",\n",
-    "        \"eta_xlabel\": \"$\\eta$\",\n",
-    "        \"phi_val\": np.linspace(-4, 4, 61),\n",
-    "        \"phi_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"phi_xlabel\": \"Energy [GeV]\",\n",
-    "        \"phi_xlabel\": \"$\\phi$\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    },\n",
-    "    130: {\n",
-    "        \"E_val\": np.linspace(0, 10, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"eta_val\": np.linspace(-4, 4, 61),\n",
-    "        \"eta_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"E_xlabel\": \"Energy [GeV]\",\n",
-    "        \"eta_xlabel\": \"$\\eta$\",\n",
-    "        \"phi_val\": np.linspace(-4, 4, 61),\n",
-    "        \"phi_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"phi_xlabel\": \"Energy [GeV]\",\n",
-    "        \"phi_xlabel\": \"$\\phi$\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    },\n",
-    "    22: {\n",
-    "        \"E_val\": np.linspace(0, 10, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"eta_val\": np.linspace(-4, 4, 61),\n",
-    "        \"eta_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"E_xlabel\": \"Energy [GeV]\",\n",
-    "        \"eta_xlabel\": \"$\\eta$\",\n",
-    "        \"phi_val\": np.linspace(-4, 4, 61),\n",
-    "        \"phi_res\": np.linspace(-0.5, 0.5, 41),\n",
-    "        \"phi_xlabel\": \"Energy [GeV]\",\n",
-    "        \"phi_xlabel\": \"$\\phi$\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    },\n",
-    "    1: {\n",
-    "        \"E_val\": np.linspace(0, 100, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"xlabel\": \"Energy [GeV]\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    },\n",
-    "    2: {\n",
-    "        \"E_val\": np.linspace(0, 50, 61),\n",
-    "        \"E_res\": np.linspace(-1, 1, 61),\n",
-    "        \"xlabel\": \"Energy [GeV]\",\n",
-    "        \"true_val\": \"reco PF\",\n",
-    "        \"pred_val\": \"ML-PF\",\n",
-    "    }\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prepare_resolution_plots(big_df, 211, bins[211])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prepare_resolution_plots(big_df, 130, bins[130])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prepare_resolution_plots(big_df, 22, bins[22])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "msk = (big_df[\"target_pid\"] != 0) & ((big_df[\"pred_pid\"] != 0))\n",
-    "#msk = np.ones(len(big_df), dtype=np.bool)\n",
-    "confusion2 = sklearn.metrics.confusion_matrix(\n",
-    "    big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk],\n",
-    "    labels=class_labels[1:]\n",
-    ")\n",
-    "plot_confusion_matrix(\n",
-    "    cm=confusion2, target_names=[int(x) for x in class_labels][1:], normalize=True\n",
-    ")\n",
-    "\n",
-    "acc = sklearn.metrics.accuracy_score(big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk])\n",
-    "plt.title(\"ML-PF, accuracy={:.2f}\".format(acc))\n",
-    "plt.ylabel(\"reco PF candidate PID\")\n",
-    "plt.xlabel(\"ML-PF candidate PID\")\n",
-    "plt.savefig(\"confusion_mlpf.pdf\", bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins_eta = np.linspace(-4, 4, 21)\n",
-    "bins_e = np.logspace(-1, 3, 21)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_eff(df, target_pid=None):\n",
-    "    v0 = np.sum(df==target_pid)\n",
-    "    return (v0 / len(df), np.sqrt(v0)/len(df))\n",
-    "    vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"cand_pid\"].apply(get_eff, target_pid=pid)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_effs_cand_pred(pid, by, bins):\n",
-    "    bs = by + \"_bins\"\n",
-    "    big_df[bs] = np.searchsorted(bins, big_df[by])\n",
-    "    \n",
-    "    vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"cand_pid\"].apply(get_eff, target_pid=pid)\n",
-    "    xs1 = [bins[min(k, len(bins)-1)] for k in vals.keys()][:-1]\n",
-    "    ys1 = [v[0] for v in vals.values][:-1]\n",
-    "    es1 = [v[1] for v in vals.values][:-1]\n",
-    "\n",
-    "    vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"pred_pid\"].apply(get_eff, target_pid=pid)\n",
-    "    xs2 = [bins[min(k, len(bins)-1)] for k in vals.keys()][:-1]\n",
-    "    ys2 = [v[0] for v in vals.values][:-1]\n",
-    "    es2 = [v[1] for v in vals.values][:-1]\n",
-    "    \n",
-    "    return xs1, (ys1, es1), (ys2, es2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#for pid in [211, -211, 130, 22, 1, 2, -11, 11]:\n",
-    "for pid in [211, -211, 130, 22, 11, -11, 1, 2]:\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    xs, (ys1, es1), (ys2, es2) = get_effs_cand_pred(pid, \"gen_e\", bins_e)\n",
-    "    plt.errorbar(xs, ys1, es1, lw=1, elinewidth=1, marker=\".\", label=\"standard PF\")\n",
-    "    plt.errorbar(xs, ys2, es2, lw=1, elinewidth=1, marker=\".\", label=\"ML-PF\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.ylim(0, 1.5)\n",
-    "    plt.xscale(\"log\")\n",
-    "    plt.xlabel(\"gen E\")\n",
-    "    plt.ylabel(\"reconstruction efficiency\")\n",
-    "    plt.title(\"pid={}\".format(pid))\n",
-    "    plt.savefig(\"eff_pt_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    xs, (ys1, es1), (ys2, es2) = get_effs_cand_pred(pid, \"gen_eta\", bins_eta)\n",
-    "    plt.errorbar(xs, ys1, es1, lw=1, elinewidth=1, marker=\".\", label=\"standard PF\")\n",
-    "    plt.errorbar(xs, ys2, es2, lw=1, elinewidth=1, marker=\".\", label=\"ML-PF\")\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.ylim(0, 1.5)\n",
-    "    plt.xlabel(\"gen eta\")\n",
-    "    plt.ylabel(\"reconstruction efficiency\")\n",
-    "    plt.title(\"pid={}\".format(pid))\n",
-    "    plt.savefig(\"eff_eta_pid{}.pdf\".format(pid), bbox_inches=\"tight\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def make_plot_reg(big_df, pid, pred_type, val, bins):\n",
-    "\n",
-    "    m = big_df[(big_df[\"cand_pid\"]==pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)][[\"cand_{}\".format(val), \"{}_{}\".format(pred_type, val)]].values\n",
-    "    corr = np.corrcoef(m[:, 0], m[:, 1])[0,1]\n",
-    "    \n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    plt.hist(m[:, 0], bins=bins, histtype=\"step\", lw=2, label=\"true\")\n",
-    "    plt.hist(m[:, 1], bins=bins, histtype=\"step\", lw=2, label=\"pred\")\n",
-    "    plt.xlabel(val)\n",
-    "    plt.legend(frameon=False)\n",
-    "    plt.savefig(\"pred_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n",
-    "\n",
-    "    ngen = np.sum((big_df[\"cand_pid\"]==pid))\n",
-    "    eff = np.sum((big_df[\"cand_pid\"]==pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)) / float(np.sum((big_df[\"cand_pid\"]==pid)))\n",
-    "    fake = np.sum((big_df[\"cand_pid\"]!=pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)) / float(np.sum((big_df[\"{}_pid\".format(pred_type)]==pid)))\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    plt.title(\"ngen={} corr={:.2f}\\neff={:.2f} fake={:.2f}\".format(ngen, corr, eff, fake))\n",
-    "    h = np.histogram2d(m[:, 0], m[:, 1], bins=(bins, bins))\n",
-    "    mplhep.hist2dplot(h[0], h[1], h[2], cmap=\"Blues\", cbar=False)\n",
-    "    plt.xlabel(\"True {}\".format(val))\n",
-    "    plt.ylabel(\"Predicted {}\".format(val))\n",
-    "    plt.savefig(\"pred_corr_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n",
-    "\n",
-    "    plt.figure(figsize=(4,4))\n",
-    "    var = np.abs(m[:, 1] / m[:, 0])\n",
-    "    var[var > 100] = 100\n",
-    "    var[var < 0] = 0\n",
-    "    plt.hist(var, bins=np.linspace(0.5, 1.5, 101))\n",
-    "    plt.xlabel(\"true {} / predicted {}\".format(val, val))\n",
-    "    plt.title(\"mu={:.4f} s={:.4f}\".format(np.mean(var), np.std(var)))\n",
-    "    plt.savefig(\"pred_ratio_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n",
-    "    #plt.axvline(1.0, color=\"black\")\n",
-    "    ##plt.yscale(\"log\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(0, 5, 101)\n",
-    "pid = 211\n",
-    "val = \"e\"\n",
-    "\n",
-    "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n",
-    "make_plot_reg(big_df, pid, \"pred\", val, bins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-4, 4, 61)\n",
-    "pid = 211\n",
-    "val = \"eta\"\n",
-    "\n",
-    "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n",
-    "make_plot_reg(big_df, pid, \"pred\", val, bins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bins = np.linspace(-4, 4, 61)\n",
-    "pid = 211\n",
-    "val = \"phi\"\n",
-    "\n",
-    "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n",
-    "make_plot_reg(big_df, pid, \"pred\", val, bins)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# n_preds = []\n",
-    "# n_trues = []\n",
-    "# for i in range(len(pred_ids)):\n",
-    "#     n_true = np.sum(true_ids[i]!=0)\n",
-    "#     n_pred = np.sum(pred_ids[i]!=0)\n",
-    "#     n_preds += [n_pred]\n",
-    "#     n_trues += [n_true]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "# ax = plt.axes()\n",
-    "# plt.plot([1500,5000],[1500,5000], color=\"black\", lw=0.5)\n",
-    "# plt.scatter(n_trues, n_preds, marker=\".\", alpha=0.5)\n",
-    "# plt.xlim(1500,5000)\n",
-    "# plt.ylim(1500,5000)\n",
-    "# plt.xlabel(\"Number of Target PF Candidates\",fontsize=13)\n",
-    "# plt.ylabel(\"Number of Predicted GNN Candidates\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run3\")\n",
-    "\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# #plt.savefig(\"num_pred.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# cms = []\n",
-    "# for i in range(len(pred_ids)):\n",
-    "#     cm = sklearn.metrics.confusion_matrix(\n",
-    "#         true_ids[i],\n",
-    "#         pred_ids[i], labels=range(len(train_end2end.class_labels))\n",
-    "#     )\n",
-    "#     cms += [cm]\n",
-    "# cm = sum(cms)\n",
-    "# cm = cm / 1000.0\n",
-    "# cm = np.round(cm, 1)#.astype(np.int)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# train_end2end.plot_confusion_matrix(cm, [int(x) for x in train_end2end.class_labels], normalize=True)\n",
-    "# #plt.xlim(-0.5, 9.5)\n",
-    "# #plt.ylim(-0.5, 9.5)\n",
-    "# plt.title(\"Normalized Confusion Matrix (QCD Run3)\")\n",
-    "# #plt.text(0.02, 0.98, \"CMS Simulation, preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\")\n",
-    "# #plt.tight_layout()\n",
-    "# plt.savefig(\"cm.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# pm = np.concatenate(pred_momenta)\n",
-    "# tm = np.concatenate(true_momenta)\n",
-    "# ti = np.concatenate(true_ids)\n",
-    "# pi = np.concatenate(pred_ids)\n",
-    "\n",
-    "\n",
-    "# pm[:, 0] = np.power(10, pm[:, 0])\n",
-    "# tm[:, 0] = np.power(10, tm[:, 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "\n",
-    "# ax = plt.axes()\n",
-    "# bins = np.linspace(0, 50, 100)\n",
-    "# h0 = plt.hist(pm[pi!=0, 0], bins=bins, histtype=\"step\", lw=1, label=\"PF\");\n",
-    "# h1 = plt.hist(tm[ti!=0, 0], bins=bins, histtype=\"step\", lw=1, label=\"GNN\");\n",
-    "# plt.yscale(\"log\")\n",
-    "# plt.legend(frameon=False)\n",
-    "# plt.ylim(10, 1e7)\n",
-    "\n",
-    "# plt.xlabel(\"Candidate $p_{\\mathrm{T}}$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run 3\")\n",
-    "\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"pt_hist.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "# ax = plt.axes()\n",
-    "\n",
-    "# bins = np.linspace(-4, 4, 100)\n",
-    "# plt.hist(pm[pi!=0, 1], bins=bins, histtype=\"step\", lw=1);\n",
-    "# plt.hist(tm[ti!=0, 1], bins=bins, histtype=\"step\", lw=1);\n",
-    "# plt.yscale(\"log\")\n",
-    "\n",
-    "# plt.ylim(1000, 1e6)\n",
-    "# plt.xlabel(\"Candidate $\\eta$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run 3\")\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"eta_hist.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "\n",
-    "# ax = plt.axes()\n",
-    "# bins = np.linspace(-3, 3, 60)\n",
-    "# plt.hist(pm[pi!=0, 2], bins=bins, histtype=\"step\", lw=1);\n",
-    "# plt.hist(tm[ti!=0, 2], bins=bins, histtype=\"step\", lw=1);\n",
-    "# plt.yscale(\"log\")\n",
-    "# plt.ylim(1000, 1e6)\n",
-    "\n",
-    "# plt.xlabel(\"Candidate $\\phi$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run 3\")\n",
-    "\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"phi_hist.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "# ax = plt.axes()\n",
-    "\n",
-    "# subidx = np.where((pi!=0)&(ti!=0))[0]\n",
-    "# rp = np.random.permutation(range(len(subidx)))[:1000]\n",
-    "\n",
-    "# plt.scatter(pm[subidx[rp], 0], tm[subidx[rp], 0], marker=\".\", alpha=0.5)\n",
-    "# plt.xlim(0,2)\n",
-    "# plt.ylim(0,2)\n",
-    "# plt.plot([0,2],[0,2], color=\"black\")\n",
-    "\n",
-    "# plt.xlabel(\"Target PF Candidate $p_{\\mathrm{T}}$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Predicted GNN Candidate $p_{\\mathrm{T}}$ (a.u.)\", fontsize=13)\n",
-    "# #plt.title(\"QCD Run 3, 1000 candidates\")\n",
-    "\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"pt_corr.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "# ax = plt.axes()\n",
-    "\n",
-    "# plt.plot([-7, 7], [-7, 7], color=\"black\", lw=0.5)\n",
-    "# plt.scatter(pm[subidx[rp], 1], tm[subidx[rp], 1], marker=\".\", alpha=0.5)\n",
-    "# plt.xlim(-7, 7)\n",
-    "# plt.ylim(-7, 7)\n",
-    "\n",
-    "# plt.xlabel(\"Target PF Candidate $\\eta$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Predicted GNN Candidate $\\eta$ (a.u.)\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run 3, 1000 candidates\")\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"eta_corr.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# plt.figure(figsize=(5, 5))\n",
-    "# ax = plt.axes()\n",
-    "\n",
-    "# plt.plot([-5, 5], [-5, 5], color=\"black\", lw=0.5)\n",
-    "# plt.scatter(pm[subidx[rp], 2], tm[subidx[rp], 2], marker=\".\", alpha=0.5)\n",
-    "# plt.xlim(-3,3)\n",
-    "# plt.ylim(-3,3)\n",
-    "\n",
-    "\n",
-    "# plt.xlabel(\"Target PF Candidate $\\phi$ (a.u.)\",fontsize=13)\n",
-    "# plt.ylabel(\"Predicted GNN Candidate $\\phi$ (a.u.)\",fontsize=13)\n",
-    "# #plt.title(\"QCD Run3, 1000 candidates\")\n",
-    "\n",
-    "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n",
-    "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n",
-    "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n",
-    "# plt.tight_layout()\n",
-    "# plt.savefig(\"phi_corr.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import pandas as pd\n",
-    "# import tqdm\n",
-    "\n",
-    "# import matplotlib as mpl\n",
-    "# mpl.rcParams['figure.figsize'] = [8.0, 6.0]\n",
-    "# mpl.rcParams['font.size'] = 12\n",
-    "# mpl.rcParams['legend.fontsize'] = 'large'\n",
-    "# mpl.rcParams['figure.titlesize'] = 'medium'\n",
-    "\n",
-    "# d = full_dataset.get(1)\n",
-    "# d.batch = torch.zeros((len(d.x)), dtype=torch.long)\n",
-    "# d = d.to(device=device)\n",
-    "# train_end2end.data_prep(d, device=device)\n",
-    "# edges, cand_id_onehot, cand_momentum = model(d)\n",
-    "# output = edges.detach().cpu().numpy()\n",
-    "# d = full_dataset.get(1)\n",
-    "# x_data = d.x.detach().cpu().numpy()\n",
-    "# mask = ((x_data[:,4]==0) & (x_data[:,5]==0) & (x_data[:,6]==0) & (x_data[:,7]==0))\n",
-    "# good_index = np.zeros((x_data.shape[0],1,2),dtype=int)\n",
-    "# good_x = x_data[:,2:4].copy()                                                                            \n",
-    "# good_x[~mask] = x_data[~mask,2:4].copy()\n",
-    "# df = pd.DataFrame(good_x, columns=['eta','phi'])\n",
-    "# df['isTrack'] = ~mask\n",
-    "# row, col = d.edge_index.cpu().detach().numpy()\n",
-    "# y_truth = d.ycand.cpu().detach().numpy()\n",
-    "\n",
-    "# min_phi = -1.25\n",
-    "# max_phi = 1.25\n",
-    "# min_eta = -1.25\n",
-    "# max_eta = 1.25\n",
-    "# extra = 1.0\n",
-    "# x = 'eta'\n",
-    "# y = 'phi'\n",
-    "# for plot_type in [['input'],['truth'],['output']]: \n",
-    "#     k = 0\n",
-    "#     plt.figure(figsize=(8, 6))                        \n",
-    "#     for i, j in tqdm.tqdm(zip(row, col),total=len(y_truth)):\n",
-    "#         x1 = df[x][i]\n",
-    "#         x2 = df[x][j]\n",
-    "#         y1 = df[y][i]\n",
-    "#         y2 = df[y][j]\n",
-    "#         if (x1 < min_eta-extra or x1 > max_eta+extra) or (x2 < min_eta-extra or x2 > max_eta+extra): continue\n",
-    "#         if (y1 < min_phi-extra or y1 > max_phi+extra) or (y2 < min_phi-extra or y2 > max_phi+extra): continue\n",
-    "#         if 'input' in plot_type:\n",
-    "#             seg_args = dict(c='b',alpha=0.1,zorder=1)\n",
-    "#             plt.plot([df[x][i], df[x][j]],\n",
-    "#                  [df[y][i], df[y][j]], '-', **seg_args)\n",
-    "#         if 'truth' in plot_type and y_truth[k]:\n",
-    "#             seg_args = dict(c='r',alpha=0.8,zorder=2)\n",
-    "#             plt.plot([df[x][i], df[x][j]],\n",
-    "#                  [df[y][i], df[y][j]], '-', **seg_args)\n",
-    "#         if 'output' in plot_type:\n",
-    "#             seg_args = dict(c='g',alpha=output[k].item(),zorder=3)\n",
-    "#             plt.plot([df[x][i], df[x][j]],\n",
-    "#                  [df[y][i], df[y][j]], '-', **seg_args)\n",
-    "#         k+=1\n",
-    "#     cut_mask = (df[x] > min_eta-extra) & (df[x] < max_eta+extra) & (df[y] > min_phi-extra) & (df[y] < max_phi+extra)\n",
-    "#     cluster_mask = cut_mask & ~df['isTrack']\n",
-    "#     track_mask = cut_mask & df['isTrack']\n",
-    "#     plt.scatter(df[x][cluster_mask], df[y][cluster_mask],c='g',marker='o',s=50,zorder=4,alpha=1)\n",
-    "#     plt.scatter(df[x][track_mask], df[y][track_mask],c='b',marker='p',s=50,zorder=5,alpha=1)\n",
-    "#     plt.xlabel(\"Track or Cluster $\\eta$\",fontsize=18)\n",
-    "#     plt.ylabel(\"Track or Cluster $\\phi$\",fontsize=18)\n",
-    "#     plt.xlim(min_eta, max_eta)\n",
-    "#     plt.ylim(min_phi, max_phi)\n",
-    "#     plt.figtext(0.12, 0.90,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=20)\n",
-    "#     plt.figtext(0.22, 0.90,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=18)\n",
-    "#     plt.figtext(0.67, 0.90,'Run 3 (14 TeV)',  wrap=True, horizontalalignment='left', fontsize=18)\n",
-    "#     plt.savefig('graph_%s_%s_%s.pdf'%(x,y,'_'.join(plot_type)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pickle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d = pickle.load(open(\"../raw/pfntuple_1_0.pkl\", \"rb\"))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/pfnet-debug.ipynb b/notebooks/pfnet-debug.ipynb
index d37af9248..36dd4c91b 100644
--- a/notebooks/pfnet-debug.ipynb
+++ b/notebooks/pfnet-debug.ipynb
@@ -23,6 +23,7 @@
     "import sys\n",
     "\n",
     "sys.path.append(\"/home/joosep/particleflow/mlpf\")\n",
+    "sys.path.append(\"/home/joosep/particleflow/hep_tfds/\")\n",
     "import tfmodel.model\n",
     "import tfmodel.data\n",
     "import tfmodel.model_setup\n",
@@ -38,6 +39,23 @@
     "from matplotlib import cm"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c58b7a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n",
+    "    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n",
+    "    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n",
+    "    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n",
+    "    \n",
+    "def sample_label(ax, x=0.01, y=0.93):\n",
+    "    plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", ha=\"left\", size=10, transform=ax.transAxes)\n",
+    "    "
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -45,8 +63,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(\"/home/joosep/particleflow/parameters/cms.yaml\") as f:\n",
-    "    config = yaml.load(f)\n",
+    "with open(\"/home/joosep/particleflow/experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/config.yaml\") as f:\n",
+    "    config = yaml.safe_load(f)\n",
     "config[\"setup\"][\"multi_output\"] = True\n",
     "config[\"parameters\"][\"debug\"] = True"
    ]
@@ -64,10 +82,12 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c4107771",
+   "id": "d9fbca7a",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "config[\"datasets\"][\"cms_pf_ttbar\"]"
+   ]
   },
   {
    "cell_type": "code",
@@ -78,6 +98,7 @@
    "source": [
     "cds = config[\"dataset\"]\n",
     "\n",
+    "config[\"datasets\"][\"cms_pf_ttbar\"][\"data_dir\"] = \"/home/joosep/tensorflow_datasets/\"\n",
     "config[\"datasets\"][\"cms_pf_ttbar\"][\"batch_per_gpu\"] = 1\n",
     "ds_val, ds_info = tfmodel.utils.get_heptfds_dataset(\n",
     "    \"cms_pf_ttbar\",\n",
@@ -92,19 +113,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ret = model.build((1, 6400, 18))\n",
+    "ret = model.build((1, 6400, 25))\n",
     "#model.set_trainable_classification()\n",
-    "model.load_weights(\"/home/joosep/particleflow/experiments/cms_20210917_142344_403761.gpu0.local/weights/weights-200-0.059240.hdf5\")"
+    "model.load_weights(\"/home/joosep/particleflow/experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/weights/weights-200-0.074496.hdf5\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "aa7c2864",
+   "id": "18732bbe",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa7c2864",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
    "source": [
     "for X, y, w in ds_val:\n",
+    "    X = tf.expand_dims(X, axis=0)\n",
     "    X_val = X.numpy()\n",
     "    ret = model.predict_on_batch(X)\n",
     "    break"
@@ -149,11 +183,17 @@
     "    colorlist = [cm.Dark2(x) for x in evenly_spaced_interval]\n",
     "    bin_idx = get_bin_index(ret[layer_name][\"bins\"][0])\n",
     "\n",
-    "    plt.figure(figsize=(4,4))\n",
+    "    plt.figure(figsize=(7,7))\n",
+    "    ax = plt.axes()\n",
     "    plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=\".\", s=energy)\n",
     "    plt.xlabel(\"PFElement $\\eta$\")\n",
     "    plt.ylabel(\"PFElement $\\phi$\")\n",
     "    plt.title(\"Binning in {}\".format(layer_name))\n",
+    "    cms_label(ax)\n",
+    "    sample_label(ax)\n",
+    "    plt.ylim(-3.8, 3.8)\n",
+    "    plt.text(0.5, 0.05, \"Each point corresponds to a PFElement in a simulated event.\\nUnique colors correspond to the bin assignment in this layer.\",\n",
+    "        ha=\"center\", va=\"center\", transform=ax.transAxes)\n",
     "    plt.savefig(\"bins_{}.pdf\".format(layer_name), bbox_inches=\"tight\")\n",
     "    plt.savefig(\"bins_{}.png\".format(layer_name), bbox_inches=\"tight\", dpi=300)"
    ]
@@ -178,6 +218,16 @@
     "plot_binning_in_layer(\"cg_1\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8f0f81f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_binning_in_layer(\"cg_2\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -206,9 +256,9 @@
    "outputs": [],
    "source": [
     "def plot_dms(dms):\n",
-    "    fig = plt.figure(figsize=(4*4, 3*4))\n",
-    "    for i in range(min(len(dms), 49)):\n",
-    "        ax = plt.subplot(7,7,i+1)\n",
+    "    fig = plt.figure(figsize=(10*4, 10*4))\n",
+    "    for i in range(min(len(dms), 100)):\n",
+    "        ax = plt.subplot(10,10,i+1)\n",
     "        plt.axes(ax)\n",
     "        plt.imshow(dms[i], interpolation=\"none\", norm=matplotlib.colors.Normalize(vmin=0, vmax=1), cmap=\"Blues\")\n",
     "        #plt.colorbar()\n",
@@ -295,14 +345,6 @@
     "plt.savefig(\"dm_cg_energy_1.pdf\", bbox_inches=\"tight\")\n",
     "plt.savefig(\"dm_cg_energy_1.png\", bbox_inches=\"tight\", dpi=300)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c853c865",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/scripts/local_test_cms_pipeline.sh b/scripts/local_test_cms_pipeline.sh
index cb3a67ba9..e023d3b91 100755
--- a/scripts/local_test_cms_pipeline.sh
+++ b/scripts/local_test_cms_pipeline.sh
@@ -34,6 +34,6 @@ python3 mlpf/pipeline.py train -c parameters/cms.yaml --nepochs 2 --customize pi
 ls ./experiments/cms_*/weights/
 
 #Generate the pred.npz file of predictions
-python3 mlpf/pipeline.py evaluate -c parameters/cms.yaml -t ./experiments/cms_*
+python3 mlpf/pipeline.py evaluate -t ./experiments/cms_*
 
 python3 mlpf/pipeline.py train -c parameters/cms-transformer.yaml --nepochs 2 --customize pipeline_test
diff --git a/scripts/local_test_delphes_pipeline.sh b/scripts/local_test_delphes_pipeline.sh
index 6e2b18e4f..60c60b8e2 100755
--- a/scripts/local_test_delphes_pipeline.sh
+++ b/scripts/local_test_delphes_pipeline.sh
@@ -19,4 +19,4 @@ python3 mlpf/pipeline.py train -c parameters/delphes.yaml --nepochs 2 --ntrain 5
 ls ./experiments/delphes_*/weights/
 
 #Generate the pred.npz file of predictions
-python3 mlpf/pipeline.py evaluate -c parameters/delphes.yaml -t ./experiments/delphes_*
+python3 mlpf/pipeline.py evaluate -t ./experiments/delphes_*