diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d62240322..cf16ce5eb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,7 +24,8 @@ jobs: tensorflow-probability==0.12.2 tensorflow-addons==0.13.0 \ tqdm click tensorflow-datasets 'ray[default]' 'ray[tune]' \ tf-models-official tensorflow-text \ - tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad + tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \ + tensorflow-estimator==2.6.0 keras==2.6.0 git submodule init git submodule update - name: Run delphes TF model @@ -45,7 +46,8 @@ jobs: tensorflow-probability==0.12.2 tensorflow-addons==0.13.0 \ tqdm click tensorflow-datasets 'ray[default]' 'ray[tune]' \ tf-models-official tensorflow-text \ - tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad + tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \ + tensorflow-estimator==2.6.0 keras==2.6.0 git submodule init git submodule update - name: Run CMS TF model using the pipeline diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py index 182071712..ee72dfb72 100644 --- a/mlpf/pipeline.py +++ b/mlpf/pipeline.py @@ -87,6 +87,7 @@ def customize_pipeline_test(config): if "physical" in config["train_test_datasets"]: config["train_test_datasets"]["physical"]["datasets"] = ["cms_pf_ttbar"] config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]} + config["train_test_datasets"]["physical"]["batch_per_gpu"] = 5 return config diff --git a/mlpf/plotting/cms_fwlite.py b/mlpf/plotting/cms_fwlite.py index f638fc961..81eec7d21 100644 --- a/mlpf/plotting/cms_fwlite.py +++ b/mlpf/plotting/cms_fwlite.py @@ -34,6 +34,32 @@ def get(self, event): ("energy", "[o.energy() for o in obj]"), ] )) + expressions.append(Expression( + "ak4PFJetsPuppi", + "vector", + [ + ("pt", "[o.pt() for o in obj]"), + ("eta", "[o.eta() for o in obj]"), + ("phi", "[o.phi() for o in obj]"), + ("energy", "[o.energy() for o in obj]"), + ] + )) + expressions.append(Expression( + "pfMet", + "vector", + [ + ("pt", "[o.pt() for o in obj]"), + ("phi", "[o.phi() for o in obj]"), + ] + )) + expressions.append(Expression( + "pfMetPuppi", + "vector", + [ + ("pt", "[o.pt() for o in obj]"), + ("phi", "[o.phi() for o in obj]"), + ] + )) expressions.append(Expression( "particleFlow", "vector", diff --git a/mlpf/tfmodel/timing.py b/mlpf/tfmodel/timing.py index eaebf425b..ba6cd8913 100644 --- a/mlpf/tfmodel/timing.py +++ b/mlpf/tfmodel/timing.py @@ -1,6 +1,7 @@ import numpy as np import time import pynvml +import sys #pip install only onnxruntime_gpu, not onnxruntime! import onnxruntime @@ -17,7 +18,7 @@ mem_initial = mem.used/1000/1000 print("mem_initial", mem_initial) - onnx_sess = onnxruntime.InferenceSession("model.onnx", providers=EP_list) + onnx_sess = onnxruntime.InferenceSession(sys.argv[1], providers=EP_list) time.sleep(5) mem = pynvml.nvmlDeviceGetMemoryInfo(handle) @@ -32,7 +33,7 @@ for i in range(100): #allocate array in system RAM - X = np.array(np.random.randn(1, num_elems, 18), np.float32) + X = np.array(np.random.randn(1, num_elems, 25), np.float32) #transfer data to GPU, run model, transfer data back t0 = time.time() diff --git a/notebooks/cms-mlpf.ipynb b/notebooks/cms-mlpf.ipynb index af2a616e9..0b49617e2 100644 --- a/notebooks/cms-mlpf.ipynb +++ b/notebooks/cms-mlpf.ipynb @@ -19,7 +19,9 @@ "import scipy\n", "import mplhep\n", "\n", - "import pandas" + "import pandas\n", + "import boost_histogram as bh\n", + "import itertools" ] }, { @@ -40,13 +42,19 @@ "metadata": {}, "outputs": [], "source": [ - "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n", + "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n", + " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n", + " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n", + " plt.figtext(x2, y,'Run 3 (14 TeV)', wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n", + "\n", + "def cms_label_sample_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n", " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n", " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n", " plt.figtext(x2, y,'Run 3 (14 TeV), $\\mathrm{t}\\overline{\\mathrm{t}}$ events', wrap=False, horizontalalignment='left', fontsize=10)\n", "\n", - "def sample_label(ax, x=0.03, y=0.98):\n", - " plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n" + " \n", + "def sample_label(ax, x=0.01, y=0.93):\n", + " plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", ha=\"left\", size=10, transform=ax.transAxes)" ] }, { @@ -94,7 +102,9 @@ "ELEM_NAMES_CMS = [\"NONE\", \"TRACK\", \"PS1\", \"PS2\", \"ECAL\", \"HCAL\", \"GSF\", \"BREM\", \"HFEM\", \"HFHAD\", \"SC\", \"HO\"]\n", "\n", "CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]\n", - "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"gamma\", \"ele\", \"mu\"]" + "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"$\\gamma$\", \"$e^\\pm$\", \"$\\mu^\\pm$\"]\n", + "\n", + "class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}" ] }, { @@ -104,7 +114,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = \"../experiments/cms_20210929_223058_191573.gpu0.local/evaluation/\"" + "path = \"../experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/evaluation/\"" ] }, { @@ -152,109 +162,117 @@ "metadata": {}, "outputs": [], "source": [ - "def plot_distribution(prefix, bins, var, particle_label, labels):\n", + "def get_distribution(prefix, bins, var):\n", "\n", " hists = []\n", - " for icls in range(1,8):\n", + " for pid in [13,11,22,1,2,130,211]:\n", + " icls = CLASS_LABELS_CMS.index(pid)\n", " msk_pid = (yvals_f[prefix+\"_cls_id\"]==icls)\n", - " h = np.histogram(yvals_f[prefix + \"_\" + var][msk_pid], bins=bins)\n", + " h = bh.Histogram(bh.axis.Variable(bins))\n", + " d = yvals_f[prefix + \"_\" + var][msk_pid]\n", + " h.fill(d.flatten())\n", " hists.append(h)\n", - " \n", - " plt.figure(figsize=(5,5))\n", - " ax = plt.axes()\n", + " return hists\n", + "\n", + "# plt.figure(figsize=(5,5))\n", + "# ax = plt.axes()\n", " \n", - " mplhep.histplot(\n", - " [h[0] for h in hists], bins=hists[0][1], ax=ax, stack=True, histtype=\"fill\",\n", - " label=labels\n", - " )\n", - " plt.legend(ncol=2, frameon=False)\n", - " plt.xlabel(var)\n", - " cms_label(x1=0.22, x2=0.55)\n", - " plt.ylabel(\"Number of particles / bin\")\n", - " plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n", - " return ax" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d41ecf82", - "metadata": {}, - "outputs": [], - "source": [ - "plot_distribution(\"true\", np.linspace(0,2000,61), \"energy\", \"PF\", CLASS_NAMES_CMS[1:])\n", - "plt.yscale(\"log\")\n", - "plt.ylim(top=1e9)\n", - "plt.savefig(\"energy_true.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67e4ada0", - "metadata": {}, - "outputs": [], - "source": [ - "plot_distribution(\"pred\", np.linspace(0,2000,61), \"energy\", \"MLPF\", CLASS_NAMES_CMS[1:])\n", - "plt.yscale(\"log\")\n", - "plt.ylim(top=1e9)\n", - "plt.savefig(\"energy_pred.pdf\", bbox_inches=\"tight\")" + "# mplhep.histplot(\n", + "# [h[0] for h in hists], bins=hists[0][1], ax=ax, stack=True, histtype=\"fill\",\n", + "# label=labels\n", + "# )\n", + "# plt.legend(ncol=2, frameon=False)\n", + "# plt.xlabel(var)\n", + "# cms_label(x1=0.22, x2=0.55)\n", + "# plt.ylabel(\"Number of particles / bin\")\n", + "# plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n", + "# return ax" ] }, { "cell_type": "code", "execution_count": null, - "id": "03a0dd55", + "id": "d5b04426", "metadata": {}, "outputs": [], "source": [ - "plot_distribution(\"true\", np.linspace(0,200,61), \"pt\", \"PF\", CLASS_NAMES_CMS[1:])\n", + "hists_true = get_distribution(\"true\", np.linspace(0,200,61), \"pt\")\n", + "hists_pred = get_distribution(\"pred\", np.linspace(0,200,61), \"pt\")\n", + "\n", + "plt.figure(figsize=(7, 7))\n", + "ax = plt.axes()\n", + "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n", + "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n", + "\n", + "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n", + "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n", + "plt.gca().add_artist(legend1)\n", + "plt.ylabel(\"Total number of particles / bin\")\n", + "cms_label(ax)\n", + "sample_label(ax)\n", + "\n", "plt.yscale(\"log\")\n", "plt.ylim(top=1e9)\n", "plt.xlabel(\"PFCandidate $p_T$ [GeV]\")\n", - "plt.savefig(\"pt_true.pdf\", bbox_inches=\"tight\")" + "\n", + "plt.savefig(\"pt_true_vs_pred.pdf\", bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "851fcbd1", + "id": "d41ecf82", "metadata": {}, "outputs": [], "source": [ - "plot_distribution(\"pred\", np.linspace(0,200,61), \"pt\", \"MLPF\", CLASS_NAMES_CMS[1:])\n", + "hists_true = get_distribution(\"true\", np.linspace(0,2000,61), \"energy\")\n", + "hists_pred = get_distribution(\"pred\", np.linspace(0,2000,61), \"energy\")\n", + "\n", + "plt.figure(figsize=(7, 7))\n", + "ax = plt.axes()\n", + "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n", + "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n", + "\n", + "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.64), title=\"PF\")\n", + "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.64), title=\"MLPF\")\n", + "plt.gca().add_artist(legend1)\n", + "plt.ylabel(\"Total number of particles / bin\")\n", + "cms_label(ax)\n", + "sample_label(ax)\n", + "\n", "plt.yscale(\"log\")\n", "plt.ylim(top=1e9)\n", - "plt.xlabel(\"MLPFCandidate $p_T$ [GeV]\")\n", - "plt.savefig(\"pt_pred.pdf\", bbox_inches=\"tight\")" + "plt.xlabel(\"PFCandidate $E$ [GeV]\")\n", + "\n", + "plt.savefig(\"energy_true_vs_pred.pdf\", bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "80ab845d", + "id": "03a0dd55", "metadata": {}, "outputs": [], "source": [ - "plot_distribution(\"true\", np.linspace(-5,5,61), \"eta\", \"PF\", CLASS_NAMES_CMS[1:])\n", + "hists_true = get_distribution(\"true\", np.linspace(-6,6,61), \"eta\")\n", + "hists_pred = get_distribution(\"pred\", np.linspace(-6,6,61), \"eta\")\n", + "\n", + "plt.figure(figsize=(7, 7))\n", + "ax = plt.axes()\n", + "v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n", + "v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n", + "\n", + "legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n", + "legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n", + "plt.gca().add_artist(legend1)\n", + "plt.ylabel(\"Total number of particles / bin\")\n", + "cms_label(ax)\n", + "sample_label(ax)\n", + "\n", "plt.yscale(\"log\")\n", - "plt.ylim(bottom=1e4, top=1e6)\n", + "plt.ylim(top=2e9)\n", "plt.xlabel(\"PFCandidate $\\eta$\")\n", - "plt.savefig(\"eta_true.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f9414f8", - "metadata": {}, - "outputs": [], - "source": [ - "plot_distribution(\"pred\", np.linspace(-5,5,61), \"eta\", \"MLPF\", CLASS_NAMES_CMS[1:])\n", - "plt.yscale(\"log\")\n", - "plt.ylim(bottom=1e4, top=1e6)\n", - "plt.xlabel(\"MLPFCandidate $\\eta$\")\n", - "plt.savefig(\"eta_pred.pdf\", bbox_inches=\"tight\")" + "plt.savefig(\"eta_true_vs_pred.pdf\", bbox_inches=\"tight\")" ] }, { @@ -309,7 +327,8 @@ "for icls in range(1,8):\n", " npred = np.sum(yvals[\"pred_cls_id\"] == icls, axis=1)\n", " ncand = np.sum(yvals[\"true_cls_id\"] == icls, axis=1)\n", - " plt.figure(figsize=(6,6))\n", + " plt.figure(figsize=(7,7))\n", + " ax = plt.axes()\n", " plt.scatter(ncand, npred, marker=\".\", alpha=0.8)\n", " a = 0.5*min(np.min(npred), np.min(ncand))\n", " b = 1.5*max(np.max(npred), np.max(ncand))\n", @@ -319,9 +338,10 @@ " plt.title(CLASS_NAMES_CMS[icls],y=1.05)\n", " plt.xlabel(\"number of PFCandidates\")\n", " plt.ylabel(\"number of MLPFCandidates\")\n", - " cms_label(x2=0.6, y=0.89)\n", + " cms_label(ax)\n", + " sample_label(ax)\n", " plt.savefig(\"num_cls{}.pdf\".format(icls), bbox_inches=\"tight\")\n", - " plt.savefig(\"num_cls{}.png\".format(icls), bbox_inches=\"tight\", dpi=300)\n" + " plt.savefig(\"num_cls{}.png\".format(icls), bbox_inches=\"tight\", dpi=300)" ] }, { @@ -509,9 +529,9 @@ " plt.plot(test_smooth, color=p1[0].get_color(), lw=2, label=\"test\")\n", " \n", " plt.ylim(test[-1]*(1.0-margin), test[-1]*(1.0+margin))\n", - " plt.legend(loc=\"best\", frameon=False)\n", + " plt.legend(loc=3, frameon=False)\n", " plt.xlabel(\"epoch\")\n", - " cms_label(x1=0.18)" + " cms_label(ax,y=0.95)" ] }, { @@ -586,44 +606,29 @@ "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(8, 8))\n", + "plt.figure(figsize=(7,7))\n", "ax = plt.axes()\n", "plt.imshow(cm_norm, cmap=\"Blues\")\n", "plt.colorbar()\n", "\n", - "cms_label(x1=0.18, x2=0.52, y=0.82)\n", + "thresh = cm_norm.max() / 1.5\n", + "for i, j in itertools.product(range(cm_norm.shape[0]), range(cm_norm.shape[1])):\n", + " plt.text(j, i, \"{:0.2f}\".format(cm_norm[i, j]),\n", + " horizontalalignment=\"center\",\n", + " color=\"white\" if cm_norm[i, j] > thresh else \"black\")\n", + "\n", + "cms_label(ax, x1=0.12)\n", + "#cms_label_sample_label(x1=0.18, x2=0.52, y=0.82)\n", "plt.xticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n", "plt.yticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n", - "plt.xlabel(\"Predicted PFCandidate\")\n", - "plt.ylabel(\"True PFCandidate\")\n", - "plt.title(\"MLPF trained on PF\", y=1.03)\n", + "plt.xlabel(\"MLPF candidate ID\")\n", + "plt.ylabel(\"PF candidate ID\")\n", + "plt.ylim(-0.5, 6.9)\n", + "plt.title(\"MLPF trained on PF\")\n", "plt.savefig(\"cm_normed.pdf\", bbox_inches=\"tight\")\n", "plt.savefig(\"cm_normed.png\", bbox_inches=\"tight\", dpi=300)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "prepared-fruit", - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(8, 8))\n", - "ax = plt.axes()\n", - "plt.imshow(cm/np.sum(cm), cmap=\"Blues\")\n", - "plt.colorbar()\n", - "\n", - "cms_label(x1=0.18, x2=0.52, y=0.82)\n", - "#sample_label(ax, x=0.8, y=1.0)\n", - "plt.xticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n", - "plt.yticks(range(len(CLASS_NAMES_CMS)-1), CLASS_NAMES_CMS[1:]);\n", - "plt.xlabel(\"Predicted PFCandidate\")\n", - "plt.ylabel(\"True PFCandidate\")\n", - "plt.title(\"MLPF trained on PF\", y=1.03)\n", - "plt.savefig(\"cm.pdf\", bbox_inches=\"tight\")\n", - "plt.savefig(\"cm.png\", bbox_inches=\"tight\", dpi=300)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -649,7 +654,7 @@ " plt.title(CLASS_NAMES_CMS[icls], y=1.05)\n", " plt.xlabel(ivar)\n", " plt.ylabel(\"Number of particles / bin\")\n", - " cms_label(x1=0.2, x2=0.6)\n", + " cms_label(ax)\n", " plt.savefig(\"distribution_icls{}_{}.pdf\".format(icls, ivar))\n", " plt.savefig(\"distribution_icls{}_{}.png\".format(icls, ivar), dpi=300)" ] @@ -669,7 +674,7 @@ " maxval=3,\n", " norm=matplotlib.colors.LogNorm()):\n", " \n", - " plt.figure(figsize=(6,5))\n", + " plt.figure(figsize=(8,7))\n", " ax = plt.axes()\n", " \n", " bins = np.linspace(minval, maxval, 100)\n", @@ -693,8 +698,8 @@ " plt.plot([minval, maxval], [minval, maxval], color=\"black\", ls=\"--\", lw=0.5)\n", " plt.xlim(minval, maxval)\n", " plt.ylim(minval, maxval)\n", - " cms_label(x1=0.2, x2=0.48)\n", - " plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)\n", + " cms_label(ax)\n", + " plt.text(0.02, 0.94, particle_label, transform=ax.transAxes)\n", " ax.set_xticks(ax.get_yticks());" ] }, @@ -761,7 +766,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_particle_regression(ivar=\"energy\", icls=3, particle_label=\"HF\", minval=0.0, maxval=4)\n", + "plot_particle_regression(ivar=\"energy\", icls=3, particle_label=\"HFEM\", minval=0.0, maxval=4)\n", "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n", "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n", "plt.savefig(\"energy_corr_cls3_log.pdf\", bbox_inches=\"tight\")\n", @@ -775,7 +780,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_particle_regression(ivar=\"energy\", icls=4, particle_label=\"HF\", minval=0.0, maxval=4)\n", + "plot_particle_regression(ivar=\"energy\", icls=4, particle_label=\"HFHAD\", minval=0.0, maxval=4)\n", "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n", "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n", "plt.savefig(\"energy_corr_cls4_log.pdf\", bbox_inches=\"tight\")\n", @@ -803,12 +808,34 @@ "metadata": {}, "outputs": [], "source": [ - "plot_particle_regression(ivar=\"energy\", icls=5, particle_label=\"gamma\", minval=0.0, maxval=4)\n", + "plot_particle_regression(ivar=\"energy\", icls=6, particle_label=\"e\", minval=0.0, maxval=4)\n", "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n", "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n", - "plt.savefig(\"energy_corr_cls5_log.pdf\", bbox_inches=\"tight\")\n", - "plt.savefig(\"energy_corr_cls5_log.png\", bbox_inches=\"tight\", dpi=300)" + "plt.savefig(\"energy_corr_cls6_log.pdf\", bbox_inches=\"tight\")\n", + "plt.savefig(\"energy_corr_cls6_log.png\", bbox_inches=\"tight\", dpi=300)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51617bae", + "metadata": {}, + "outputs": [], + "source": [ + "plot_particle_regression(ivar=\"energy\", icls=7, particle_label=\"mu\", minval=0.0, maxval=4)\n", + "plt.xlabel(\"PFCandidate $\\log_{10}$ E/GeV\")\n", + "plt.ylabel(\"MLPFCandidate $\\log_{10}$ E/GeV\")\n", + "plt.savefig(\"energy_corr_cls7_log.pdf\", bbox_inches=\"tight\")\n", + "plt.savefig(\"energy_corr_cls7_log.png\", bbox_inches=\"tight\", dpi=300)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17609f40", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/cmssw.ipynb b/notebooks/cmssw.ipynb index c52a4a9d2..eeec151df 100644 --- a/notebooks/cmssw.ipynb +++ b/notebooks/cmssw.ipynb @@ -9,56 +9,54 @@ "source": [ "import pickle\n", "import numpy as np\n", - "import mplhep\n", "import awkward\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as mpatches\n", "\n", "import uproot\n", - "import boost_histogram as bh\n" + "import boost_histogram as bh\n", + "import mplhep\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "aa92c191", + "id": "4f940835", "metadata": {}, "outputs": [], "source": [ - "physics_process = \"qcd\"\n", + "CMS_PF_CLASS_NAMES = [\"none\" \"charged hadron\", \"neutral hadron\", \"hfem\", \"hfhad\", \"photon\", \"electron\", \"muon\"]\n", "\n", - "data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/out.pkl\", \"rb\")))\n", - "data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/out.pkl\", \"rb\")))\n", + "ELEM_LABELS_CMS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]\n", + "ELEM_NAMES_CMS = [\"NONE\", \"TRACK\", \"PS1\", \"PS2\", \"ECAL\", \"HCAL\", \"GSF\", \"BREM\", \"HFEM\", \"HFHAD\", \"SC\", \"HO\"]\n", "\n", - "fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n", - "fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "extreme-exhibit", - "metadata": {}, - "outputs": [], - "source": [ - "physics_process = \"ttbar\"\n", - "data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/out.pkl\", \"rb\")))\n", - "data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/out.pkl\", \"rb\")))\n", + "CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]\n", + "CLASS_NAMES_CMS = [\"none\", \"ch.had\", \"n.had\", \"HFEM\", \"HFHAD\", \"$\\gamma$\", \"$e^\\pm$\", \"$\\mu^\\pm$\"]\n", "\n", - "fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n", - "fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")" + "class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}" ] }, { "cell_type": "code", "execution_count": null, - "id": "excited-shepherd", + "id": "aa92c191", "metadata": {}, "outputs": [], "source": [ - "# physics_process = \"singlepi\"\n", - "# data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_11_3_0_pre2/11688.0_mlpf/out.pkl\", \"rb\")))\n", - "# data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_11_3_0_pre2/11688.0_baseline/out.pkl\", \"rb\")))" + "physics_process = \"ttbar\" #\"ttbar\", \"qcd\"\n", + "\n", + "if physics_process == \"qcd\":\n", + " data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/out.pkl\", \"rb\")))\n", + " data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/out.pkl\", \"rb\")))\n", + "\n", + " fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n", + " fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n", + "elif physics_process == \"ttbar\":\n", + " data_mlpf = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/out.pkl\", \"rb\")))\n", + " data_baseline = awkward.Array(pickle.load(open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/out.pkl\", \"rb\")))\n", + "\n", + " fi1 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")\n", + " fi2 = uproot.open(\"/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root\")" ] }, { @@ -68,489 +66,307 @@ "metadata": {}, "outputs": [], "source": [ - "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n", - " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n", - " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n", - " plt.figtext(x2, y,'Run 3 (14 TeV)', wrap=True, horizontalalignment='left', fontsize=10)\n", + "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n", + " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n", + " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n", + " plt.figtext(x2, y,'Run 3 (14 TeV)', wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n", + " \n", + "def sample_label(ax, physics_process=physics_process, x=0.01, y=0.93):\n", + " plt.text(x, y, physics_process_str[physics_process], ha=\"left\", size=10, transform=ax.transAxes)\n", " \n", "physics_process_str = {\n", " \"ttbar\": \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\",\n", " \"singlepi\": \"single $\\pi^{\\pm}$ events\",\n", - " \"qcd\": \"QCD\",\n", - "}\n", - "\n", - "def sample_label(ax, x=0.03, y=0.98, additional_text=\"\", physics_process=physics_process):\n", - " plt.text(x, y,\n", - " physics_process_str[physics_process]+additional_text,\n", - " va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n" + " \"qcd\": \"QCD events\",\n", + "}" ] }, { "cell_type": "code", "execution_count": null, - "id": "speaking-contents", + "id": "ccfdab54", "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(5, 5))\n", - "ax = plt.axes()\n", + "def plot_candidates_pf_vs_mlpf(variable, varname, bins):\n", + " plt.figure(figsize=(12,12))\n", + " ax = plt.axes()\n", "\n", - "bins = np.linspace(0, 500, 61)\n", - "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"pt\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n", - "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"pt\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n", - "plt.yscale(\"log\")\n", - "plt.ylim(top=1e5)\n", - "cms_label()\n", - "sample_label(ax, x=0.02)\n", - "plt.xlabel(\"ak4PFJetsCHS $p_T$ [GeV]\")\n", - "plt.ylabel(\"Number of jets\")\n", - "plt.legend(loc=\"best\")\n", + " hists_baseline = []\n", + " hists_mlpf = []\n", + " iplot = 1\n", + " for pid in [13,11,22,1,2,130,211]:\n", + " msk1 = np.abs(data_baseline[\"particleFlow\"][\"pdgId\"]) == pid\n", + " msk2 = np.abs(data_mlpf[\"particleFlow\"][\"pdgId\"]) == pid\n", "\n", - "plt.savefig(\"ak4jet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "formed-single", - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", + " d1 = awkward.flatten(data_baseline[\"particleFlow\"][variable][msk1])\n", + " d2 = awkward.flatten(data_mlpf[\"particleFlow\"][variable][msk2])\n", + " \n", + " h1 = bh.Histogram(bh.axis.Variable(bins))\n", + " h1.fill(d1)\n", + " h2 = bh.Histogram(bh.axis.Variable(bins))\n", + " h2.fill(d2)\n", + " \n", + " ax = plt.subplot(3,3,iplot)\n", + " plt.sca(ax)\n", "\n", - "bins = np.linspace(0, 2500, 61)\n", - "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"energy\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n", - "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"energy\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n", - "plt.yscale(\"log\")\n", - "plt.ylim(top=1e5)\n", - "cms_label()\n", - "sample_label(ax, x=0.02)\n", + " mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n", + " mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n", + " \n", + " if variable!=\"eta\":\n", + " plt.yscale(\"log\")\n", "\n", - "plt.xlabel(\"ak4PFJetsCHS $E$ [GeV]\")\n", - "plt.ylabel(\"Number of jets\")\n", - "plt.legend(loc=\"best\")\n", + " plt.legend(loc=\"best\", frameon=False, title=class_names[pid])\n", + " plt.xlabel(varname)\n", + " plt.ylabel(\"Number of particles / bin\")\n", + " sample_label(ax, x=0.08)\n", "\n", - "plt.savefig(\"ak4jet_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "nonprofit-polish", - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", + " iplot += 1\n", + " \n", + " hists_baseline.append(h1)\n", + " hists_mlpf.append(h2)\n", + " plt.tight_layout()\n", + " return hists_baseline, hists_mlpf\n", "\n", - "bins = np.linspace(-6, 6, 101)\n", - "plt.hist(awkward.flatten(data_baseline[\"ak4PFJetsCHS\"][\"eta\"]), bins=bins, histtype=\"step\", lw=2, label=\"PF\");\n", - "plt.hist(awkward.flatten(data_mlpf[\"ak4PFJetsCHS\"][\"eta\"]), bins=bins, histtype=\"step\", lw=2, label=\"MLPF\");\n", - "#plt.yscale(\"log\")\n", - "cms_label()\n", - "sample_label(ax)\n", - "plt.ylim(top=2000)\n", - "plt.xlabel(\"ak4PFJetsCHS $\\eta$\")\n", - "plt.ylabel(\"Number of jets\")\n", - "plt.legend(loc=\"best\")\n", + "def plot_candidates_pf_vs_mlpf_single(hists):\n", + " plt.figure(figsize=(7, 7))\n", + " ax = plt.axes()\n", + " v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists[0]], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)\n", + " v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists[1]], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype=\"errorbar\")\n", "\n", - "plt.savefig(\"ak4jet_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" + " legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title=\"PF\")\n", + " legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title=\"MLPF\")\n", + " plt.gca().add_artist(legend1)\n", + " plt.ylabel(\"Total number of particles / bin\")\n", + " cms_label(ax)\n", + " sample_label(ax)" ] }, { "cell_type": "code", "execution_count": null, - "id": "center-heath", + "id": "a70c3657", "metadata": {}, "outputs": [], "source": [ - "color_map = {\n", - " 1: \"red\",\n", - " 2: \"blue\",\n", - " 11: \"orange\",\n", - " 22: \"cyan\",\n", - " 13: \"purple\",\n", - " 130: \"green\",\n", - " 211: \"black\"\n", - "}\n", - "\n", - "particle_labels = {\n", - " 1: \"HFEM\",\n", - " 2: \"HFHAD\",\n", - " 11: \"$e^\\pm$\",\n", - " 22: \"$\\gamma$\",\n", - " 13: \"$\\mu$\",\n", - " 130: \"neutral hadron\",\n", - " 211: \"charged hadron\"\n", - " \n", - "}" + "hists = plot_candidates_pf_vs_mlpf(\"pt\", \"PFCandidate $p_T$ [GeV]\", np.linspace(0,200,101))\n", + "# plt.savefig(\"candidates_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", + "# plt.savefig(\"candidates_pt_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "sufficient-medication", + "id": "1f85a942", "metadata": {}, "outputs": [], "source": [ - "def draw_event(iev):\n", - " pt_0 = data_mlpf[\"particleFlow\"][\"pt\"][iev]\n", - " energy_0 = data_mlpf[\"particleFlow\"][\"energy\"][iev]\n", - " eta_0 = data_mlpf[\"particleFlow\"][\"eta\"][iev]\n", - " phi_0 = data_mlpf[\"particleFlow\"][\"phi\"][iev]\n", - " pdgid_0 = np.abs(data_mlpf[\"particleFlow\"][\"pdgId\"][iev])\n", - " \n", - " pt_1 = data_baseline[\"particleFlow\"][\"pt\"][iev]\n", - " energy_1 = data_baseline[\"particleFlow\"][\"energy\"][iev]\n", - " eta_1 = data_baseline[\"particleFlow\"][\"eta\"][iev]\n", - " phi_1 = data_baseline[\"particleFlow\"][\"phi\"][iev]\n", - " pdgid_1 = np.abs(data_baseline[\"particleFlow\"][\"pdgId\"][iev])\n", - " \n", - " plt.figure(figsize=(5, 5))\n", - " ax = plt.axes()\n", - " plt.scatter(eta_0, phi_0, marker=\".\", s=energy_0, c=[color_map[p] for p in pdgid_0], alpha=0.6)\n", - "\n", - " pids = [211,130,1,2,22,11,13]\n", - " for p in pids:\n", - " plt.plot([], [], color=color_map[p], lw=0, marker=\"o\", label=particle_labels[p])\n", - " plt.legend(loc=8, frameon=False, ncol=3, fontsize=8)\n", + "plot_candidates_pf_vs_mlpf_single(hists)\n", + "plt.xlabel(\"PFCandidate $p_T$ [GeV]\")\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e7)\n", "\n", - " cms_label()\n", - " sample_label(ax)\n", - " plt.xlim(-6,6)\n", - " plt.ylim(-5,4)\n", - " plt.xlabel(\"PFCandidate $\\eta$\")\n", - " plt.ylabel(\"PFCandidate $\\phi$\")\n", - " plt.title(\"MLPF (trained on PF), CMSSW-ONNX inference\", y=1.05)\n", - " plt.savefig(\"event_mlpf_{}_iev{}.pdf\".format(physics_process, iev), bbox_inches=\"tight\")\n", - " plt.savefig(\"event_mlpf_{}_iev{}.png\".format(physics_process, iev), bbox_inches=\"tight\", dpi=300)\n", - " \n", - " plt.figure(figsize=(5, 5))\n", - " ax = plt.axes()\n", - " plt.scatter(eta_1, phi_1, marker=\".\", s=energy_1, c=[color_map[p] for p in pdgid_1], alpha=0.6)\n", - "# plt.scatter(\n", - "# data_baseline[\"ak4PFJetsCHS\"][\"eta\"][iev],\n", - "# data_baseline[\"ak4PFJetsCHS\"][\"phi\"][iev],\n", - "# s=data_baseline[\"ak4PFJetsCHS\"][\"energy\"][iev], color=\"gray\", alpha=0.3\n", - "# )\n", - " cms_label()\n", - " sample_label(ax)\n", - " plt.xlim(-6,6)\n", - " plt.ylim(-5,4)\n", - " plt.xlabel(\"PFCandidate $\\eta$\")\n", - " plt.ylabel(\"PFCandidate $\\phi$\")\n", - " plt.title(\"Standard PF, CMSSW\", y=1.05)\n", - " \n", - " pids = [211,130,1,2,22,11,13]\n", - " for p in pids:\n", - " plt.plot([], [], color=color_map[p], lw=0, marker=\"o\", label=particle_labels[p])\n", - " plt.legend(loc=8, frameon=False, ncol=3, fontsize=8)\n", - " \n", - " plt.savefig(\"event_pf_{}_iev{}.pdf\".format(physics_process, iev), bbox_inches=\"tight\")\n", - " plt.savefig(\"event_pf_{}_iev{}.png\".format(physics_process, iev), bbox_inches=\"tight\", dpi=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "comfortable-albert", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "draw_event(0)" + "plt.savefig(\"candidates_pt_single_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", + "plt.savefig(\"candidates_pt_single_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "accompanied-wichita", + "id": "ae617dff", "metadata": {}, "outputs": [], "source": [ - "draw_event(1)" + "hists = plot_candidates_pf_vs_mlpf(\"eta\", \"PFCandidate $\\eta$\", np.linspace(-6, 6,101))\n", + "plt.savefig(\"candidates_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", + "plt.savefig(\"candidates_eta_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "curious-lover", - "metadata": { - "scrolled": false - }, + "id": "e7a574d5", + "metadata": {}, "outputs": [], "source": [ - "draw_event(2)" + "plot_candidates_pf_vs_mlpf_single(hists)\n", + "plt.xlabel(\"PFCandidate $\\eta$\")\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e8)\n", + "plt.savefig(\"candidates_eta_single_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", + "plt.savefig(\"candidates_ete_single_{}.png\".format(physics_process), dpi=400, bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "3fe02a91", + "id": "d756ef80", "metadata": {}, "outputs": [], "source": [ - "def plot_dqm(key, title, rebin=None):\n", - " h1 = fi1.get(key).to_boost()\n", - " h2 = fi2.get(key).to_boost()\n", + "def plot_pf_vs_mlpf_jet(jetcoll, variable, bins):\n", + " plt.figure(figsize=(7,7))\n", + " ax = plt.axes()\n", "\n", - " fig, (ax1, ax2) = plt.subplots(2, 1)\n", - " plt.sca(ax1)\n", - " if rebin:\n", - " h1 = h1[bh.rebin(rebin)]\n", - " h2 = h2[bh.rebin(rebin)]\n", - " \n", - " mplhep.histplot(h1, yerr=0, label=\"PF\");\n", - " mplhep.histplot(h2, yerr=0, label=\"MLPF\");\n", - " plt.legend(frameon=False)\n", - " plt.ylabel(\"Number of particles / bin\")\n", - " sample_label(ax=ax1, additional_text=\", \"+title, physics_process=physics_process)\n", + " h1 = bh.Histogram(bh.axis.Variable(bins))\n", + " h1.fill(awkward.flatten(data_baseline[jetcoll][variable]))\n", "\n", - " plt.sca(ax2)\n", - " ratio_hist = h2/h1\n", - " vals_y = ratio_hist.values()\n", - " vals_y[np.isnan(vals_y)] = 0\n", - " plt.plot(ratio_hist.axes[0].centers, vals_y, color=\"gray\", lw=0, marker=\".\")\n", - " plt.ylim(0,2)\n", - " plt.axhline(1.0, color=\"black\", ls=\"--\")\n", - " plt.ylabel(\"MLPF / PF\")\n", - " \n", - " return ax1, ax2\n", - " \n", - "#plt.xscale(\"log\")\n", - "#plt.yscale(\"log\")\n", + " h2 = bh.Histogram(bh.axis.Variable(bins))\n", + " h2.fill(awkward.flatten(data_mlpf[jetcoll][variable]))\n", "\n", - "log10_pt = \"$\\log_{10}[p_T/\\mathrm{GeV}]$\"\n", - "eta = \"$\\eta$\"\n", - "\n", - "dqm_plots_ptcl = [\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/chargedHadron/chargedHadronLog10Pt\",\n", - " \"ch.had.\", log10_pt, \"ch_had_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/chargedHadron/chargedHadronEta\",\n", - " \"ch.had.\", eta, \"ch_had_eta\"),\n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronLog10Pt\",\n", - " \"n.had.\", log10_pt, \"n_had_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronPtLow\",\n", - " \"n.had.\", \"$p_T$ [GeV]\", \"n_had_ptlow\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronPtMid\",\n", - " \"n.had.\", \"$p_T$ [GeV]\", \"n_had_ptmid\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/neutralHadron/neutralHadronEta\",\n", - " \"n.had.\", eta, \"n_had_eta\"),\n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_hadron/HF_hadronLog10Pt\",\n", - " \"HFHAD\", log10_pt, \"hfhad_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_hadron/HF_hadronEta\",\n", - " \"HFHAD\", eta, \"hfhad_eta\"),\n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_EM_particle/HF_EM_particleLog10Pt\",\n", - " \"HFEM\", log10_pt, \"hfem_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/HF_EM_particle/HF_EM_particleEta\",\n", - " \"HFEM\", eta, \"hfem_eta\"),\n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/photon/photonLog10Pt\",\n", - " \"photon\", log10_pt, \"photon_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/photon/photonEta\",\n", - " \"photon\", eta, \"photon_eta\"),\n", - " \n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/electron/electronLog10Pt\",\n", - " \"electron\", log10_pt, \"electron_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/electron/electronEta\",\n", - " \"electron\", eta, \"electron_eta\"), \n", - " \n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/muon/muonLog10Pt\",\n", - " \"muon\", log10_pt, \"muon_logpt\"),\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PackedCandidates/muon/muonEta\",\n", - " \"muon\", eta, \"muon_eta\"),\n", - "]\n", + " mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n", + " mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n", + " cms_label(ax)\n", + " sample_label(ax, x=0.02)\n", "\n", - "dqm_plots_jetres = [\n", - " (\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/mean_delta_et_Over_et_VS_et_\",\n", - " \"jets\", \"gen-jet $E_t$\", \"$\\Delta E_t / E_t$\"),\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "348eb94c", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "for key, title, xlabel, plot_label in dqm_plots_ptcl:\n", - " rh = plot_dqm(key, title)\n", - " plt.xlabel(xlabel)\n", - " cms_label()\n", - " plt.savefig(\"dqm_{}_{}.pdf\".format(plot_label, physics_process), bbox_inches=\"tight\")\n", - " plt.savefig(\"dqm_{}_{}.png\".format(plot_label, physics_process), bbox_inches=\"tight\", dpi=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79d3fdfe", - "metadata": {}, - "outputs": [], - "source": [ - "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/Cleanedak4PFJetsCHS/Pt\", \"ak4PFCHS jets\")\n", - "ax2.set_xlabel(\"jet $p_t$ [GeV]\")\n", - "ax1.set_ylabel(\"number of jets / bin\")\n", - "#plt.xscale(\"log\")\n", - "#plt.ylim(bottom=1, top=1e4)\n", - "ax1.set_yscale(\"log\")\n", - "ax1.set_ylim(bottom=1, top=1e5)\n", - "#ax2.set_ylim(0,5)\n", + " plt.ylabel(\"Number of jets\")\n", + " plt.legend(loc=(0.8, 0.85), frameon=False)\n", "\n", - "cms_label()\n", - "plt.savefig(\"dqm_jet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", - "plt.savefig(\"dqm_jet_pt_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)" + " plt.savefig(\"ak4jet_puppi_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "67ee87dc", + "id": "a538baca", "metadata": {}, "outputs": [], "source": [ - "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Pt\", \"ak4PFPuppi jets\")\n", - "ax2.set_xlabel(\"jet $p_t$ [GeV]\")\n", - "ax1.set_ylabel(\"number of jets / bin\")\n", - "#plt.xscale(\"log\")\n", - "#plt.ylim(bottom=1, top=1e4)\n", - "ax1.set_yscale(\"log\")\n", - "ax1.set_ylim(bottom=1, top=1e5)\n", - "#ax2.set_ylim(0,5)\n", - "\n", - "cms_label()\n", - "plt.savefig(\"dqm_jet_pt_puppi_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", - "plt.savefig(\"dqm_jet_pt_puppi_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"pt\", np.linspace(0,500,61))\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e5)\n", + "plt.xlabel(\"ak4PFJetsCHS $p_T$ [GeV]\")\n", + "plt.savefig(\"ak4jet_chs_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "1665549f", + "id": "23271180", "metadata": {}, "outputs": [], "source": [ - "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/Cleanedak4PFJetsCHS/Eta\", \"ak4PFCHS jets\")\n", - "ax2.set_xlabel(\"jet $\\eta$\")\n", - "ax1.set_ylabel(\"number of jets / bin\")\n", - "#plt.xscale(\"log\")\n", - "#plt.ylim(bottom=1, top=1e4)\n", - "#ax1.set_yscale(\"log\")\n", - "ax1.set_ylim(bottom=0, top=1e3)\n", - "#ax2.set_ylim(0,5)\n", - "\n", - "cms_label()\n", - "plt.savefig(\"dqm_jet_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", - "plt.savefig(\"dqm_jet_eta_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"pt\", np.linspace(0,500,61))\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e5)\n", + "plt.xlabel(\"ak4PFJetsPuppi $p_T$ [GeV]\")\n", + "plt.savefig(\"ak4jet_puppi_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "7f320f04", + "id": "e30d3355", "metadata": {}, "outputs": [], "source": [ - "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Eta\", \"ak4PFPuppi jets\")\n", - "ax2.set_xlabel(\"jet $\\eta$\")\n", - "ax1.set_ylabel(\"number of jets / bin\")\n", - "#plt.xscale(\"log\")\n", - "#plt.ylim(bottom=1, top=1e4)\n", - "#ax1.set_yscale(\"log\")\n", - "#ax1.set_ylim(bottom=0, top=20)\n", - "#ax2.set_ylim(0,5)\n", - "\n", - "cms_label()\n", - "plt.savefig(\"dqm_jet_eta_puppi_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", - "plt.savefig(\"dqm_jet_eta_puppi_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"eta\", np.linspace(-6, 6, 61))\n", + "plt.ylim(0,10000)\n", + "plt.xlabel(\"ak4PFJetsCHS $\\eta$\")\n", + "plt.savefig(\"ak4jet_chs_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "7908e453", + "id": "2e220a66", "metadata": {}, "outputs": [], "source": [ - "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/mean_delta_et_Over_et_VS_et_\", \"AK4 PF jets\")\n", - "# plt.xlabel(\"gen-jet $E_t$ [GeV]\")\n", - "# plt.ylabel(\"profiled $\\mu(\\Delta E_t / E_t$)\")\n", - "# plt.xscale(\"log\")\n", - "# plt.ylim(0,3)\n", - "# cms_label()\n", - "# plt.savefig(\"dqm_jet_mean_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"eta\", np.linspace(-6, 6, 61))\n", + "plt.ylim(0,2000)\n", + "plt.xlabel(\"ak4PFJetsPuppi $\\eta$\")\n", + "plt.savefig(\"ak4jet_puppi_eta_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "d3bdc5b1", + "id": "10d7a3d8", "metadata": {}, "outputs": [], "source": [ - "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/sigma_delta_et_Over_et_VS_et_\", \"AK4 PF jets\")\n", - "# plt.xlabel(\"gen-jet $E_t$ [GeV]\")\n", - "# plt.ylabel(\"profiled $\\sigma(\\Delta E_t / E_t)$\")\n", - "# plt.xscale(\"log\")\n", - "# plt.ylim(0,10)\n", - "# cms_label()\n", - "# plt.savefig(\"dqm_jet_sigma_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsCHS\", \"energy\", np.linspace(0,2500,61))\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e5)\n", + "plt.xlabel(\"ak4PFJetsCHS $E$ [GeV]\")\n", + "plt.savefig(\"ak4jet_chs_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "545c5575", + "id": "7dfa0975", "metadata": {}, "outputs": [], "source": [ - "ax1, ax2 = plot_dqm(\"DQMData/Run 1/JetMET/Run summary/METValidation/pfMet/MET\", \"PFMET\", rebin=1)\n", - "ax2.set_xlabel(\"$\\sum E_t$ [GeV]\")\n", - "ax1.set_ylabel(\"number of events / bin\")\n", - "#ax1.set_xscale(\"log\")\n", - "ax1.set_ylim(bottom=1, top=1000)\n", - "ax1.set_yscale(\"log\")\n", - "plt.savefig(\"dqm_met_sumet_{}.pdf\".format(physics_process), bbox_inches=\"tight\")\n", - "plt.savefig(\"dqm_met_sumet_{}.png\".format(physics_process), bbox_inches=\"tight\", dpi=300)" + "plot_pf_vs_mlpf_jet(\"ak4PFJetsPuppi\", \"energy\", np.linspace(0,2500,61))\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e5)\n", + "plt.xlabel(\"ak4PFJetsPuppi $E$ [GeV]\")\n", + "plt.savefig(\"ak4jet_puppi_energy_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "cb82fc75", + "id": "674700b6", "metadata": {}, "outputs": [], "source": [ - "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/profileRMS_delta_et_Over_et_VS_et_\", \"PFMET\")\n", - "# plt.xlabel(\"gen-MET $E_t$ [GeV]\")\n", - "# plt.ylabel(\"profiled RMS $\\Delta E_t / E_t$\")\n", - "# plt.xscale(\"log\")\n", - "# plt.ylim(0,3)\n", - "# cms_label()\n", - "# plt.savefig(\"dqm_met_profileRMS_delta_et_Over_et_VS_et.pdf\", bbox_inches=\"tight\")" + "plt.figure(figsize=(7,7))\n", + "ax = plt.axes()\n", + "\n", + "bins = np.linspace(0, 500, 41)\n", + "\n", + "h1 = bh.Histogram(bh.axis.Variable(bins))\n", + "h1.fill(awkward.flatten(data_baseline[\"pfMet\"][\"pt\"]))\n", + "\n", + "h2 = bh.Histogram(bh.axis.Variable(bins))\n", + "h2.fill(awkward.flatten(data_mlpf[\"pfMet\"][\"pt\"]))\n", + "\n", + "mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n", + "mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e3)\n", + "cms_label(ax)\n", + "sample_label(ax, x=0.02)\n", + "plt.xlabel(\"pfMet $p_T$ [GeV]\")\n", + "plt.ylabel(\"Number of events\")\n", + "plt.legend(loc=(0.8, 0.85), frameon=False)\n", + "\n", + "plt.savefig(\"pfmet_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "74a0e050", + "id": "114f3670", "metadata": {}, "outputs": [], "source": [ - "# plot_dqm(\"DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/profile_delta_et_VS_et_\", \"PFMET\")\n", - "# plt.xlabel(\"gen-MET $E_t$ [GeV]\")\n", - "# plt.ylabel(\"profiled $\\Delta E_t$ [GeV]\")\n", - "# plt.xscale(\"log\")\n", - "# plt.ylim(0, 80)\n", - "# cms_label()\n", - "# plt.savefig(\"dqm_met_delta_et_VS_et.pdf\", bbox_inches=\"tight\")" + "plt.figure(figsize=(7,7))\n", + "ax = plt.axes()\n", + "\n", + "bins = np.linspace(0, 500, 41)\n", + "\n", + "h1 = bh.Histogram(bh.axis.Variable(bins))\n", + "h1.fill(awkward.flatten(data_baseline[\"pfMetPuppi\"][\"pt\"]))\n", + "\n", + "h2 = bh.Histogram(bh.axis.Variable(bins))\n", + "h2.fill(awkward.flatten(data_mlpf[\"pfMetPuppi\"][\"pt\"]))\n", + "\n", + "mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\");\n", + "mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF\");\n", + "plt.yscale(\"log\")\n", + "plt.ylim(top=1e3)\n", + "cms_label(ax)\n", + "sample_label(ax, x=0.02)\n", + "plt.xlabel(\"pfMet PUPPI $p_T$ [GeV]\")\n", + "plt.ylabel(\"Number of events\")\n", + "plt.legend(loc=(0.8, 0.85), frameon=False)\n", + "\n", + "plt.savefig(\"pfmet_puppi_pt_{}.pdf\".format(physics_process), bbox_inches=\"tight\")" ] }, { @@ -561,81 +377,81 @@ "outputs": [], "source": [ "timing_output = \"\"\"\n", - "Nelem=1600 mean_time=5.92 ms stddev_time=5.03 ms mem_used=1018 MB\n", - "Nelem=1920 mean_time=6.57 ms stddev_time=1.01 ms mem_used=1110 MB\n", - "Nelem=2240 mean_time=6.92 ms stddev_time=0.81 ms mem_used=1127 MB\n", - "Nelem=2560 mean_time=7.37 ms stddev_time=0.66 ms mem_used=1136 MB\n", - "Nelem=2880 mean_time=8.17 ms stddev_time=0.56 ms mem_used=1123 MB\n", - "Nelem=3200 mean_time=8.88 ms stddev_time=1.09 ms mem_used=1121 MB\n", - "Nelem=3520 mean_time=9.51 ms stddev_time=0.65 ms mem_used=1121 MB\n", - "Nelem=3840 mean_time=10.48 ms stddev_time=0.93 ms mem_used=1255 MB\n", - "Nelem=4160 mean_time=11.05 ms stddev_time=0.87 ms mem_used=1255 MB\n", - "Nelem=4480 mean_time=12.07 ms stddev_time=0.81 ms mem_used=1230 MB\n", - "Nelem=4800 mean_time=12.92 ms stddev_time=0.89 ms mem_used=1230 MB\n", - "Nelem=5120 mean_time=13.44 ms stddev_time=0.75 ms mem_used=1230 MB\n", - "Nelem=5440 mean_time=14.07 ms stddev_time=0.78 ms mem_used=1230 MB\n", - "Nelem=5760 mean_time=15.00 ms stddev_time=0.84 ms mem_used=1230 MB\n", - "Nelem=6080 mean_time=15.74 ms stddev_time=1.05 ms mem_used=1230 MB\n", - "Nelem=6400 mean_time=16.32 ms stddev_time=1.30 ms mem_used=1230 MB\n", - "Nelem=6720 mean_time=17.24 ms stddev_time=0.99 ms mem_used=1230 MB\n", - "Nelem=7040 mean_time=17.74 ms stddev_time=0.85 ms mem_used=1230 MB\n", - "Nelem=7360 mean_time=18.59 ms stddev_time=1.04 ms mem_used=1230 MB\n", - "Nelem=7680 mean_time=19.33 ms stddev_time=0.93 ms mem_used=1499 MB\n", - "Nelem=8000 mean_time=20.00 ms stddev_time=1.06 ms mem_used=1499 MB\n", - "Nelem=8320 mean_time=20.55 ms stddev_time=1.13 ms mem_used=1499 MB\n", - "Nelem=8640 mean_time=21.10 ms stddev_time=0.90 ms mem_used=1499 MB\n", - "Nelem=8960 mean_time=22.88 ms stddev_time=1.24 ms mem_used=1499 MB\n", - "Nelem=9280 mean_time=23.44 ms stddev_time=1.14 ms mem_used=1499 MB\n", - "Nelem=9600 mean_time=23.93 ms stddev_time=1.04 ms mem_used=1499 MB\n", - "Nelem=9920 mean_time=24.75 ms stddev_time=0.91 ms mem_used=1499 MB\n", - "Nelem=10240 mean_time=25.47 ms stddev_time=1.33 ms mem_used=1499 MB\n", - "Nelem=10560 mean_time=26.29 ms stddev_time=1.33 ms mem_used=1499 MB\n", - "Nelem=10880 mean_time=26.72 ms stddev_time=1.18 ms mem_used=1490 MB\n", - "Nelem=11200 mean_time=29.50 ms stddev_time=2.60 ms mem_used=1502 MB\n", - "Nelem=11520 mean_time=28.50 ms stddev_time=0.91 ms mem_used=1491 MB\n", - "Nelem=11840 mean_time=29.11 ms stddev_time=1.14 ms mem_used=1491 MB\n", - "Nelem=12160 mean_time=30.01 ms stddev_time=1.15 ms mem_used=1499 MB\n", - "Nelem=12480 mean_time=30.55 ms stddev_time=0.94 ms mem_used=1499 MB\n", - "Nelem=12800 mean_time=31.31 ms stddev_time=1.08 ms mem_used=1499 MB\n", - "Nelem=13120 mean_time=32.61 ms stddev_time=1.19 ms mem_used=1499 MB\n", - "Nelem=13440 mean_time=33.37 ms stddev_time=1.01 ms mem_used=1499 MB\n", - "Nelem=13760 mean_time=34.13 ms stddev_time=1.18 ms mem_used=1499 MB\n", - "Nelem=14080 mean_time=34.73 ms stddev_time=1.40 ms mem_used=1499 MB\n", - "Nelem=14400 mean_time=35.79 ms stddev_time=1.70 ms mem_used=2036 MB\n", - "Nelem=14720 mean_time=36.68 ms stddev_time=1.37 ms mem_used=2036 MB\n", - "Nelem=15040 mean_time=37.17 ms stddev_time=0.97 ms mem_used=2036 MB\n", - "Nelem=15360 mean_time=38.73 ms stddev_time=1.19 ms mem_used=2036 MB\n", - "Nelem=15680 mean_time=39.80 ms stddev_time=1.04 ms mem_used=2036 MB\n", - "Nelem=16000 mean_time=40.87 ms stddev_time=1.46 ms mem_used=1996 MB\n", - "Nelem=16320 mean_time=41.89 ms stddev_time=1.01 ms mem_used=1996 MB\n", - "Nelem=16640 mean_time=43.36 ms stddev_time=1.08 ms mem_used=1996 MB\n", - "Nelem=16960 mean_time=44.87 ms stddev_time=1.35 ms mem_used=1996 MB\n", - "Nelem=17280 mean_time=46.04 ms stddev_time=0.96 ms mem_used=1996 MB\n", - "Nelem=17600 mean_time=47.96 ms stddev_time=1.47 ms mem_used=1996 MB\n", - "Nelem=17920 mean_time=49.01 ms stddev_time=1.35 ms mem_used=1996 MB\n", - "Nelem=18240 mean_time=50.04 ms stddev_time=1.34 ms mem_used=1956 MB\n", - "Nelem=18560 mean_time=51.34 ms stddev_time=1.49 ms mem_used=1956 MB\n", - "Nelem=18880 mean_time=52.16 ms stddev_time=1.20 ms mem_used=1956 MB\n", - "Nelem=19200 mean_time=53.19 ms stddev_time=1.20 ms mem_used=1956 MB\n", - "Nelem=19520 mean_time=54.03 ms stddev_time=0.96 ms mem_used=1956 MB\n", - "Nelem=19840 mean_time=55.68 ms stddev_time=1.05 ms mem_used=1956 MB\n", - "Nelem=20160 mean_time=56.88 ms stddev_time=1.12 ms mem_used=1956 MB\n", - "Nelem=20480 mean_time=57.49 ms stddev_time=1.50 ms mem_used=1956 MB\n", - "Nelem=20800 mean_time=60.40 ms stddev_time=3.51 ms mem_used=1959 MB\n", - "Nelem=21120 mean_time=61.30 ms stddev_time=3.90 ms mem_used=1959 MB\n", - "Nelem=21440 mean_time=60.74 ms stddev_time=1.05 ms mem_used=1948 MB\n", - "Nelem=21760 mean_time=61.66 ms stddev_time=1.29 ms mem_used=1948 MB\n", - "Nelem=22080 mean_time=63.35 ms stddev_time=1.11 ms mem_used=1948 MB\n", - "Nelem=22400 mean_time=64.70 ms stddev_time=1.16 ms mem_used=1948 MB\n", - "Nelem=22720 mean_time=65.63 ms stddev_time=0.95 ms mem_used=1948 MB\n", - "Nelem=23040 mean_time=67.09 ms stddev_time=1.02 ms mem_used=1948 MB\n", - "Nelem=23360 mean_time=68.40 ms stddev_time=1.15 ms mem_used=1948 MB\n", - "Nelem=23680 mean_time=69.76 ms stddev_time=0.88 ms mem_used=1948 MB\n", - "Nelem=24000 mean_time=71.55 ms stddev_time=0.94 ms mem_used=1948 MB\n", - "Nelem=24320 mean_time=73.04 ms stddev_time=1.46 ms mem_used=1948 MB\n", - "Nelem=24640 mean_time=74.53 ms stddev_time=1.28 ms mem_used=1948 MB\n", - "Nelem=24960 mean_time=76.03 ms stddev_time=1.07 ms mem_used=1948 MB\n", - "Nelem=25280 mean_time=77.59 ms stddev_time=0.88 ms mem_used=1948 MB\n", + "Nelem=1600 mean_time=4.66 ms stddev_time=2.55 ms mem_used=711 MB\n", + "Nelem=1920 mean_time=4.74 ms stddev_time=0.52 ms mem_used=711 MB\n", + "Nelem=2240 mean_time=5.53 ms stddev_time=0.63 ms mem_used=711 MB\n", + "Nelem=2560 mean_time=5.88 ms stddev_time=0.52 ms mem_used=711 MB\n", + "Nelem=2880 mean_time=6.22 ms stddev_time=0.63 ms mem_used=745 MB\n", + "Nelem=3200 mean_time=6.50 ms stddev_time=0.64 ms mem_used=745 MB\n", + "Nelem=3520 mean_time=7.07 ms stddev_time=0.61 ms mem_used=745 MB\n", + "Nelem=3840 mean_time=7.53 ms stddev_time=0.68 ms mem_used=745 MB\n", + "Nelem=4160 mean_time=7.76 ms stddev_time=0.69 ms mem_used=745 MB\n", + "Nelem=4480 mean_time=8.66 ms stddev_time=0.72 ms mem_used=745 MB\n", + "Nelem=4800 mean_time=9.00 ms stddev_time=0.57 ms mem_used=745 MB\n", + "Nelem=5120 mean_time=9.22 ms stddev_time=0.84 ms mem_used=745 MB\n", + "Nelem=5440 mean_time=9.64 ms stddev_time=0.73 ms mem_used=812 MB\n", + "Nelem=5760 mean_time=10.39 ms stddev_time=1.06 ms mem_used=812 MB\n", + "Nelem=6080 mean_time=10.77 ms stddev_time=0.69 ms mem_used=812 MB\n", + "Nelem=6400 mean_time=11.33 ms stddev_time=0.75 ms mem_used=812 MB\n", + "Nelem=6720 mean_time=12.19 ms stddev_time=0.77 ms mem_used=812 MB\n", + "Nelem=7040 mean_time=12.54 ms stddev_time=0.72 ms mem_used=812 MB\n", + "Nelem=7360 mean_time=13.08 ms stddev_time=0.78 ms mem_used=812 MB\n", + "Nelem=7680 mean_time=13.71 ms stddev_time=0.81 ms mem_used=812 MB\n", + "Nelem=8000 mean_time=14.11 ms stddev_time=0.74 ms mem_used=812 MB\n", + "Nelem=8320 mean_time=14.85 ms stddev_time=0.86 ms mem_used=812 MB\n", + "Nelem=8640 mean_time=15.36 ms stddev_time=0.79 ms mem_used=812 MB\n", + "Nelem=8960 mean_time=16.76 ms stddev_time=1.06 ms mem_used=812 MB\n", + "Nelem=9280 mean_time=17.27 ms stddev_time=0.71 ms mem_used=812 MB\n", + "Nelem=9600 mean_time=17.97 ms stddev_time=0.85 ms mem_used=812 MB\n", + "Nelem=9920 mean_time=18.73 ms stddev_time=0.94 ms mem_used=812 MB\n", + "Nelem=10240 mean_time=19.26 ms stddev_time=0.89 ms mem_used=812 MB\n", + "Nelem=10560 mean_time=19.91 ms stddev_time=0.90 ms mem_used=946 MB\n", + "Nelem=10880 mean_time=20.55 ms stddev_time=0.87 ms mem_used=946 MB\n", + "Nelem=11200 mean_time=21.82 ms stddev_time=0.78 ms mem_used=940 MB\n", + "Nelem=11520 mean_time=22.48 ms stddev_time=0.75 ms mem_used=940 MB\n", + "Nelem=11840 mean_time=23.33 ms stddev_time=0.98 ms mem_used=940 MB\n", + "Nelem=12160 mean_time=24.28 ms stddev_time=0.85 ms mem_used=940 MB\n", + "Nelem=12480 mean_time=24.85 ms stddev_time=0.67 ms mem_used=940 MB\n", + "Nelem=12800 mean_time=25.58 ms stddev_time=0.68 ms mem_used=940 MB\n", + "Nelem=13120 mean_time=26.58 ms stddev_time=0.78 ms mem_used=940 MB\n", + "Nelem=13440 mean_time=27.15 ms stddev_time=0.63 ms mem_used=940 MB\n", + "Nelem=13760 mean_time=27.72 ms stddev_time=0.85 ms mem_used=940 MB\n", + "Nelem=14080 mean_time=28.08 ms stddev_time=0.66 ms mem_used=940 MB\n", + "Nelem=14400 mean_time=28.70 ms stddev_time=0.73 ms mem_used=940 MB\n", + "Nelem=14720 mean_time=29.22 ms stddev_time=0.66 ms mem_used=940 MB\n", + "Nelem=15040 mean_time=29.73 ms stddev_time=0.80 ms mem_used=940 MB\n", + "Nelem=15360 mean_time=30.71 ms stddev_time=0.85 ms mem_used=940 MB\n", + "Nelem=15680 mean_time=31.15 ms stddev_time=0.74 ms mem_used=940 MB\n", + "Nelem=16000 mean_time=31.74 ms stddev_time=0.80 ms mem_used=940 MB\n", + "Nelem=16320 mean_time=32.27 ms stddev_time=0.77 ms mem_used=940 MB\n", + "Nelem=16640 mean_time=33.07 ms stddev_time=1.08 ms mem_used=940 MB\n", + "Nelem=16960 mean_time=33.60 ms stddev_time=0.69 ms mem_used=940 MB\n", + "Nelem=17280 mean_time=34.43 ms stddev_time=0.64 ms mem_used=940 MB\n", + "Nelem=17600 mean_time=35.34 ms stddev_time=0.75 ms mem_used=940 MB\n", + "Nelem=17920 mean_time=35.84 ms stddev_time=0.68 ms mem_used=940 MB\n", + "Nelem=18240 mean_time=36.51 ms stddev_time=0.85 ms mem_used=940 MB\n", + "Nelem=18560 mean_time=37.23 ms stddev_time=0.87 ms mem_used=940 MB\n", + "Nelem=18880 mean_time=37.72 ms stddev_time=0.78 ms mem_used=940 MB\n", + "Nelem=19200 mean_time=38.33 ms stddev_time=0.87 ms mem_used=940 MB\n", + "Nelem=19520 mean_time=38.95 ms stddev_time=0.87 ms mem_used=940 MB\n", + "Nelem=19840 mean_time=39.73 ms stddev_time=0.74 ms mem_used=940 MB\n", + "Nelem=20160 mean_time=40.27 ms stddev_time=0.81 ms mem_used=940 MB\n", + "Nelem=20480 mean_time=40.86 ms stddev_time=0.74 ms mem_used=940 MB\n", + "Nelem=20800 mean_time=41.71 ms stddev_time=0.94 ms mem_used=940 MB\n", + "Nelem=21120 mean_time=42.35 ms stddev_time=1.38 ms mem_used=1209 MB\n", + "Nelem=21440 mean_time=42.91 ms stddev_time=1.18 ms mem_used=1209 MB\n", + "Nelem=21760 mean_time=43.40 ms stddev_time=0.98 ms mem_used=1184 MB\n", + "Nelem=22080 mean_time=44.43 ms stddev_time=1.04 ms mem_used=1184 MB\n", + "Nelem=22400 mean_time=45.22 ms stddev_time=1.02 ms mem_used=1184 MB\n", + "Nelem=22720 mean_time=45.57 ms stddev_time=0.94 ms mem_used=1184 MB\n", + "Nelem=23040 mean_time=46.21 ms stddev_time=0.86 ms mem_used=1184 MB\n", + "Nelem=23360 mean_time=46.85 ms stddev_time=0.95 ms mem_used=1184 MB\n", + "Nelem=23680 mean_time=47.52 ms stddev_time=1.57 ms mem_used=1184 MB\n", + "Nelem=24000 mean_time=48.31 ms stddev_time=0.74 ms mem_used=1184 MB\n", + "Nelem=24320 mean_time=48.92 ms stddev_time=0.75 ms mem_used=1184 MB\n", + "Nelem=24640 mean_time=49.70 ms stddev_time=0.92 ms mem_used=1184 MB\n", + "Nelem=24960 mean_time=50.26 ms stddev_time=0.93 ms mem_used=1184 MB\n", + "Nelem=25280 mean_time=50.98 ms stddev_time=0.89 ms mem_used=1184 MB\n", "\"\"\"" ] }, @@ -682,12 +498,12 @@ "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(5,5))\n", + "plt.figure(figsize=(7, 7))\n", "ax = plt.axes()\n", "plt.hist(nelem, bins=np.linspace(2000,6000,100));\n", "plt.ylabel(\"Number of events / bin\")\n", "plt.xlabel(\"PFElements per event\")\n", - "cms_label()\n", + "cms_label(ax)\n", "sample_label(ax, physics_process=\"ttbar\")" ] }, @@ -699,6 +515,7 @@ "outputs": [], "source": [ "plt.figure(figsize=(10, 3))\n", + "ax = plt.axes()\n", "plt.errorbar(time_x, time_y, yerr=time_y_err, marker=\".\", label=\"MLPF\")\n", "plt.axvline(np.mean(nelem)-np.std(nelem), color=\"black\", ls=\"--\", lw=1.0, label=r\"$t\\bar{t}$+PU Run 3\")\n", "plt.axvline(np.mean(nelem)+np.std(nelem), color=\"black\", ls=\"--\", lw=1.0)\n", @@ -707,8 +524,10 @@ "plt.ylim(0,100)\n", "plt.ylabel(\"Average runtime per event [ms]\")\n", "plt.xlabel(\"PFElements per event\")\n", - "plt.legend(frameon=False)\n", - "cms_label(x1=0.17, x2=0.8)\n", + "plt.legend(loc=4, frameon=False)\n", + "cms_label(ax, y=0.93, x1=0.07, x2=0.99)\n", + "plt.text(4000, 20, \"typical Run3 range\", rotation=90)\n", + "plt.text(6000, 70, \"Inference with ONNXRuntime in a single CPU thread,\\nsingle GPU stream on NVIDIA RTX2060S 8GB.\\nNot a production-like setup. Synthetic inputs.\")\n", "plt.savefig(\"runtime_scaling.pdf\", bbox_inches=\"tight\")\n", "plt.savefig(\"runtime_scaling.png\", bbox_inches=\"tight\", dpi=300)" ] @@ -721,6 +540,7 @@ "outputs": [], "source": [ "plt.figure(figsize=(10, 3))\n", + "ax = plt.axes()\n", "plt.plot(time_x, gpu_mem_use, marker=\".\", label=\"MLPF\")\n", "plt.axvline(np.mean(nelem)-np.std(nelem), color=\"black\", ls=\"--\", lw=1.0, label=r\"$t\\bar{t}$+PU Run 3\")\n", "plt.axvline(np.mean(nelem)+np.std(nelem), color=\"black\", ls=\"--\", lw=1.0)\n", @@ -729,8 +549,10 @@ "plt.ylim(0,3000)\n", "plt.ylabel(\"Maximum GPU memory used [MB]\")\n", "plt.xlabel(\"PFElements per event\")\n", - "plt.legend(frameon=False, loc=4)\n", - "cms_label(x1=0.17, x2=0.8)\n", + "plt.legend(loc=4, frameon=False)\n", + "cms_label(ax, y=0.93, x1=0.07, x2=0.99)\n", + "plt.text(4000, 500, \"typical Run3 range\", rotation=90)\n", + "plt.text(6000, 2100, \"Inference with ONNXRuntime in a single CPU thread,\\nsingle GPU stream on NVIDIA RTX2060S 8GB.\\nNot a production-like setup. Synthetic inputs.\")\n", "plt.savefig(\"memory_scaling.pdf\", bbox_inches=\"tight\")\n", "plt.savefig(\"memory_scaling.png\", bbox_inches=\"tight\", dpi=300)" ] @@ -738,7 +560,7 @@ { "cell_type": "code", "execution_count": null, - "id": "777ba9f3", + "id": "83f1f32d", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/old/benchmarks.ipynb b/notebooks/old/benchmarks.ipynb deleted file mode 100644 index af935721b..000000000 --- a/notebooks/old/benchmarks.ipynb +++ /dev/null @@ -1,443 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import numba\n", - "import pickle\n", - "import glob\n", - "import pandas as pd\n", - "from matplotlib.colors import LogNorm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_df(fl, name):\n", - " bls = []\n", - " for fi in fl:\n", - " d = pickle.load(open(fi, \"rb\"))\n", - " bls += [d[name]]\n", - " return pd.DataFrame(bls)\n", - "\n", - "def text_in_box(mat, thresh):\n", - " for i in range(len(mat)):\n", - " for j in range(len(mat)):\n", - " plt.text(i, j, \"{0:.3f}\".format(mat[i,j]), ha=\"center\", va=\"center\", color=\"white\" if mat[i, j] > thresh else \"black\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n", - " plt.figure(figsize=(5,5))\n", - " plt.scatter(df_blocks[\"num_blocks_true\"], df_blocks[\"num_blocks_pred\"], marker=\".\", label=\"Edge classifier\", alpha=0.5)\n", - " plt.scatter(df_blocks_dummy[\"num_blocks_true\"], df_blocks_dummy[\"num_blocks_pred\"], marker=\"x\", label=\"PFBlockAlgo\", alpha=0.5)\n", - " plt.scatter(df_blocks_clue[\"num_blocks_true\"], df_blocks_clue[\"num_blocks_pred\"], marker=\"^\", label=\"CLUE\", alpha=0.5)\n", - " plt.scatter(df_blocks_gnn[\"num_blocks_true\"], df_blocks_gnn[\"num_blocks_pred\"], marker=\"^\", label=\"GNN\", alpha=0.5)\n", - " plt.xlim(0,5000)\n", - " plt.ylim(0,5000)\n", - " plt.plot([0,5000], [0,5000], color=\"black\", lw=1, ls=\"--\")\n", - " plt.xlabel(\"number of blocks (true)\")\n", - " plt.ylabel(\"number of blocks (pred)\")\n", - " plt.title(\"Number of blocks, {0}\".format(sample))\n", - " plt.legend(frameon=False, loc=\"best\")\n", - " plt.savefig(\"num_blocks_{0}.pdf\".format(sample), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n", - " plt.figure(figsize=(5,5))\n", - " plt.scatter(df_blocks[\"max_block_size_true\"], df_blocks[\"max_block_size_pred\"], marker=\".\", label=\"Edge classifier\", alpha=0.3)\n", - " plt.scatter(df_blocks_dummy[\"max_block_size_true\"], df_blocks_dummy[\"max_block_size_pred\"], marker=\"x\", label=\"PFBlockAlgo\", alpha=0.3)\n", - " plt.scatter(df_blocks_clue[\"max_block_size_true\"], df_blocks_clue[\"max_block_size_pred\"], marker=\"^\", label=\"CLUE\", alpha=0.3)\n", - " plt.scatter(df_blocks_gnn[\"max_block_size_true\"], df_blocks_gnn[\"max_block_size_pred\"], marker=\"^\", label=\"GNN\", alpha=0.3)\n", - " plt.xlim(0,3000)\n", - " plt.ylim(0,3000)\n", - " plt.plot([0,3000], [0,3000], color=\"black\", lw=1, ls=\"--\")\n", - " plt.xlabel(\"maximum block size (true)\")\n", - " plt.ylabel(\"maximum block size (pred)\")\n", - " plt.title(\"Block finder model, {0}\".format(sample))\n", - " plt.legend(frameon=False, loc=\"best\")\n", - " plt.savefig(\"block_size_{0}.pdf\".format(sample), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n", - " plt.figure(figsize=(5,5))\n", - " plt.scatter(df_blocks[\"edge_precision\"], df_blocks[\"edge_recall\"], marker=\".\", alpha=0.5, label=\"Edge classifier\")\n", - " plt.scatter(df_blocks_dummy[\"edge_precision\"], df_blocks_dummy[\"edge_recall\"], marker=\"x\", alpha=0.5, label=\"PFBlockAlgo\")\n", - " plt.scatter(df_blocks_clue[\"edge_precision\"], df_blocks_clue[\"edge_recall\"], marker=\"^\", alpha=0.5, label=\"CLUE\")\n", - " plt.scatter(df_blocks_gnn[\"edge_precision\"], df_blocks_gnn[\"edge_recall\"], marker=\"^\", alpha=0.5, label=\"GNN\")\n", - "\n", - " plt.xlim(0,1.2)\n", - " plt.ylim(0,1.2)\n", - "\n", - " plt.xlabel(\"edge precision: TP / (TP + FP)\")\n", - " plt.ylabel(\"edge recall: TP / (TP + FN)\")\n", - " plt.title(\"Edge classification, {0}\".format(sample))\n", - " plt.legend(frameon=False)\n", - " plt.savefig(\"edge_precision_recall_{0}.pdf\".format(sample), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, sample):\n", - " plt.figure(figsize=(5,5))\n", - " b = np.logspace(0.1, 4, 40)\n", - " plt.hist(df_blocks[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"Edge classifier, m={0:.0f}\".format(np.mean(df_blocks[\"max_block_size_pred\"])));\n", - " plt.hist(df_blocks_dummy[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"PFBlockAlgo, m={0:.0f}\".format(np.mean(df_blocks_dummy[\"max_block_size_pred\"])));\n", - " plt.hist(df_blocks_clue[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"GLUE, m={0:.0f}\".format(np.mean(df_blocks_clue[\"max_block_size_pred\"])));\n", - " plt.hist(df_blocks_gnn[\"max_block_size_pred\"], bins=b, histtype=\"step\", lw=2, label=\"GNN, m={0:.0f}\".format(np.mean(df_blocks_gnn[\"max_block_size_pred\"])));\n", - " plt.hist(df_blocks[\"max_block_size_true\"], bins=b, histtype=\"step\", lw=2, label=\"True blocks, m={0:.0f}\".format(np.mean(df_blocks[\"max_block_size_true\"])));\n", - " plt.xscale(\"log\")\n", - " plt.legend(frameon=False)\n", - " plt.title(\"Maximum block size, {0}\".format(sample))\n", - " plt.savefig(\"max_block_size_{0}.pdf\".format(sample), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fl = glob.glob(\"../data/NuGun_run3/step3*.pkl\")\n", - "df_blocks = get_df(fl, \"blocks\")\n", - "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n", - "df_blocks_clue = get_df(fl, \"blocks_clue\")\n", - "\n", - "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n", - "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n", - "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")\n", - "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"NuGun-Run3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fl = glob.glob(\"../data/QCD_run3/step3*.pkl\")\n", - "df_blocks = get_df(fl, \"blocks\")\n", - "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n", - "df_blocks_clue = get_df(fl, \"blocks_clue\")\n", - "\n", - "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n", - "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n", - "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")\n", - "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"QCD-Run3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fl = glob.glob(\"../data/TTbar_run3/step3*.pkl\")\n", - "df_blocks = get_df(fl, \"blocks\")\n", - "df_blocks_dummy = get_df(fl, \"blocks_dummy\")\n", - "df_blocks_clue = get_df(fl, \"blocks_clue\")\n", - "df_blocks_gnn = get_df(fl, \"blocks_gnn\")\n", - "\n", - "plot_num_blocks(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n", - "plot_block_size(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n", - "plot_block_size_histo(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")\n", - "plot_precision_recall(df_blocks, df_blocks_dummy, df_blocks_clue, df_blocks_gnn, \"TTbar-Run3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# b = np.linspace(0,1,100)\n", - "# plt.hist(df_blocks[\"adjusted_mutual_info_score\"], bins=b, label=\"Edge classifier\");\n", - "# plt.hist(df_blocks_dummy[\"adjusted_mutual_info_score\"], bins=b, label=\"PFBlockAlgo\");\n", - "# plt.xlabel(\"adjusted MI score\\n(higher is better)\")\n", - "# plt.legend(frameon=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_true_blocks = get_df(fl, \"cand_true_blocks\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_true_blocks.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(df_true_blocks[\"num_cands_true\"], df_true_blocks[\"num_cands_pred\"], marker=\".\")\n", - "plt.xlim(0,4000)\n", - "plt.ylim(0,4000)\n", - "plt.title(\"True blocks, true vs. predicted candidates\")\n", - "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n", - "\n", - "plt.xlabel(\"number of true candidates\")\n", - "plt.ylabel(\"number of predicted candidates\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "mat = df_true_blocks[\"ncand_confusion_matrix\"].sum()\n", - "mat = 100.0 * mat / np.sum(mat)\n", - "plt.imshow(mat, cmap=\"Blues\")\n", - "text_in_box(mat, 60)\n", - "plt.colorbar()\n", - "labels = range(4)\n", - "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.xlim(-0.5,3.5)\n", - "plt.ylim(-0.5,3.5)\n", - "plt.title(\"True blocks, true vs. predicted candidates\")\n", - "plt.xlabel(\"Number of true candidates\")\n", - "plt.ylabel(\"Number of predicted candidates\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "mat = df_true_blocks[\"pdgid_confusion_matrix\"].sum()\n", - "mat = 100.0 * mat / np.sum(mat)\n", - "plt.imshow(mat, cmap=\"Blues\")\n", - "text_in_box(mat, 20)\n", - "plt.colorbar()\n", - "labels = [-211, -13, 0, 1, 2, 13, 22, 130, 211]\n", - "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.xlim(-0.5,8.5)\n", - "plt.ylim(-0.5,8.5)\n", - "\n", - "plt.title(\"True blocks, true vs. predicted candidates (%)\")\n", - "plt.xlabel(\"pdgid of true candidates\")\n", - "plt.ylabel(\"pdgid of predicted candidates\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(0, 10, 20)\n", - "mat = df_true_blocks[\"pt_matrix\"].sum()\n", - "plt.title(\"True blocks, true vs. predicted candidates\")\n", - "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n", - "plt.colorbar()\n", - "\n", - "plt.xlabel(\"true candidate $p_T$ [GeV]\")\n", - "plt.ylabel(\"predicted candidate $p_T$ [GeV]\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-6, 6, 20)\n", - "mat = df_true_blocks[\"eta_matrix\"].sum()\n", - "#mat = 100 * mat / np.sum(mat)\n", - "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n", - "plt.colorbar()\n", - "\n", - "plt.title(\"True blocks, true vs. predicted candidates\")\n", - "plt.xlabel(\"true candidate $\\eta$\")\n", - "plt.ylabel(\"predicted candidate $\\eta$\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mat = df_true_blocks[\"phi_matrix\"].sum()\n", - "bins = np.linspace(-4, 4, 20)\n", - "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n", - "plt.colorbar()\n", - "\n", - "plt.title(\"True blocks, true vs. predicted candidates\")\n", - "plt.xlabel(\"true candidate $\\phi$\")\n", - "plt.ylabel(\"predicted candidate $\\phi$\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_pred_blocks = get_df(fl, \"cand_pred_blocks\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_pred_blocks.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(df_pred_blocks[\"num_cands_true\"], df_pred_blocks[\"num_cands_pred\"], marker=\".\")\n", - "plt.xlim(0,4000)\n", - "plt.ylim(0,4000)\n", - "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n", - "\n", - "plt.title(\"True vs. predicted candidates\\nusing predicted blocks\")\n", - "\n", - "plt.xlabel(\"number of true blocks\")\n", - "plt.ylabel(\"number of predicted blocks\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(df_pred_blocks[\"num_cands_true\"], df_pred_blocks[\"num_cands_matched\"], marker=\".\")\n", - "\n", - "plt.xlim(0,4000)\n", - "plt.ylim(0,4000)\n", - "plt.plot([0,4000], [0,4000], color=\"black\", lw=1, ls=\"--\")\n", - "\n", - "plt.title(\"True vs. predicted candidates\\nusing predicted blocks\")\n", - "\n", - "plt.xlabel(\"number of true candidates\")\n", - "plt.ylabel(\"number of matched candidates\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "mat = df_pred_blocks[\"pdgid_confusion_matrix\"].sum()\n", - "mat = 100.0 * mat / np.sum(mat)\n", - "plt.imshow(mat, cmap=\"Blues\")\n", - "text_in_box(mat, 20)\n", - "plt.colorbar()\n", - "labels = [-211, -13, 0, 1, 2, 13, 22, 130, 211]\n", - "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.xlim(-0.5,8.5)\n", - "plt.ylim(-0.5,8.5)\n", - "\n", - "plt.title(\"Predicted blocks, true vs. predicted candidates (matched) (%)\")\n", - "plt.xlabel(\"pdgid of true candidates\")\n", - "plt.ylabel(\"pdgid of predicted candidates\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mat = df_pred_blocks[\"pt_matrix\"].sum()\n", - "bins = np.linspace(0, 10, 20)\n", - "\n", - "#mat = 100 * mat / np.sum(mat)\n", - "plt.title(\"Predicted blocks, true vs. matched candidates\")\n", - "plt.imshow(mat, norm=LogNorm(vmin=1, vmax=10*np.max(mat)), origin=\"lower\", cmap=\"Blues\", extent=(min(bins), max(bins), min(bins), max(bins)))\n", - "plt.colorbar()\n", - "\n", - "plt.xlabel(\"true candidate $p_T$ [GeV]\")\n", - "plt.ylabel(\"predicted candidate $p_T$ [GeV]\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/old/check_clustering.ipynb b/notebooks/old/check_clustering.ipynb deleted file mode 100644 index 8380899b0..000000000 --- a/notebooks/old/check_clustering.ipynb +++ /dev/null @@ -1,418 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import scipy\n", - "from matplotlib.colors import LogNorm\n", - "import networkx\n", - "import sklearn\n", - "import sklearn.cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import matplotlib.pyplot as plt\n", - "import sys\n", - "\n", - "import numpy as np\n", - "import sklearn\n", - "import keras\n", - "\n", - "import os\n", - "os.chdir(\"..\")\n", - "\n", - "sys.path += [\"test\"]\n", - "from train_clustering import load_element_pairs, fill_target_matrix, fill_elem_pairs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "training_info = json.load(open(\"clustering.json\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.title(\"Edge classifier\")\n", - "plt.plot(training_info[\"loss\"])\n", - "plt.plot(training_info[\"val_loss\"])\n", - "plt.xlabel(\"epoch\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fn = \"data/TTbar/191009_155100/step3_AOD_{0}_ev{1}.npz\".format(1, 0)\n", - "data = np.load(fn)\n", - "els = data[\"elements\"]\n", - "els_blid = data[\"element_block_id\"]\n", - "\n", - "fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n", - "dm = scipy.sparse.load_npz(fi).todense()\n", - "\n", - "#Create the matrix of elements thar are connected according to the miniblock id\n", - "target_matrix = np.zeros((len(els_blid), len(els_blid)), dtype=np.int32)\n", - "fill_target_matrix(target_matrix, els_blid)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "g = networkx.from_numpy_matrix(dm)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "ax = plt.axes()\n", - "networkx.draw(\n", - " g,\n", - " pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],\n", - " node_size=1, axes=ax, alpha=0.2, edge_color=\"grey\", edgelist=[])\n", - "plt.axis('on')\n", - "plt.xlabel(\"$\\eta$\")\n", - "plt.ylabel(\"$\\phi$\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "ax = plt.axes()\n", - "networkx.draw(\n", - " g,\n", - " pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],\n", - " node_size=1, axes=ax, alpha=0.2, edge_color=\"grey\", edgelist=list(g.edges)[:200])\n", - "plt.axis('on')\n", - "plt.title(\"{0} elements, 200 / {1} edges\".format(len(g.edges), len(g.edges)))\n", - "plt.xlabel(\"$\\eta$\")\n", - "plt.ylabel(\"$\\phi$\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_elem_pairs_X = []\n", - "all_dms = []\n", - "all_blockids_true = [] \n", - "all_elem_pairs_y = []\n", - "\n", - "for i in range(500):\n", - " for j in range(6,7):\n", - " fn = \"data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz\".format(i, j)\n", - " all_blockids_true += [np.load(open(fn, \"rb\"))[\"element_block_id\"]]\n", - " \n", - " print(\"Loading {0}\".format(fn))\n", - " elem_pairs_X, elem_pairs_y = load_element_pairs(fn)\n", - " all_elem_pairs_X += [elem_pairs_X]\n", - " all_elem_pairs_y += [elem_pairs_y]\n", - " \n", - " fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n", - " dm = scipy.sparse.load_npz(fi).todense()\n", - " all_dms += [dm]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "elem_pairs_X = np.vstack(all_elem_pairs_X)\n", - "elem_pairs_y = np.vstack(all_elem_pairs_y)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.load_model(\"clustering.h5\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pp = model.predict(elem_pairs_X, batch_size=10000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "t0 = 5\n", - "t1 = 1\n", - "sel = (elem_pairs_X[:, 0]==t0) & (elem_pairs_X[:, 1]==t1)\n", - "\n", - "dumdum = LogisticRegression(solver=\"lbfgs\")\n", - "dumdum.fit(elem_pairs_X[sel], elem_pairs_y[sel, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "r = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], pp[sel])\n", - "r2 = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], 1.0 - elem_pairs_X[sel, 2])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "threshold = 0.5\n", - "idx = len(r2) - np.searchsorted(r[2][::-1], threshold)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(r[2], r[0], label=\"edge FPR\")\n", - "plt.plot(r[2], r[1], label=\"edge TPR\")\n", - "plt.xlim(0,1)\n", - "plt.legend()\n", - "plt.ylabel(\"rate\")\n", - "plt.xlabel(\"threshold\")\n", - "plt.title(\"Element {0}<->{1} linking\".format(t0, t1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.plot(r[0], r[1], label=\"simple feedforward DNN\")\n", - "plt.plot(r2[0], r2[1], color=\"black\", ls=\"--\", label=\"logistic regression\")\n", - "plt.title(\"Element {0}<->{1} linking\".format(t0, t1))\n", - "plt.xlabel(\"edge FPR\")\n", - "plt.ylabel(\"edge TPR\")\n", - "plt.xlim(0,1)\n", - "plt.ylim(0,1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "elem_pairs_X[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numba\n", - "@numba.njit\n", - "def pred_to_adj_matrix(nelems, pred, dm, adj_matrix):\n", - " n = 0\n", - " for i in range(nelems):\n", - " for j in range(i+1, nelems):\n", - " assert(n <= len(pred))\n", - " if dm[i,j] > 0:\n", - " adj_matrix[i,j] = pred[n, 0]\n", - " n += 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def predict_clustering(fn):\n", - " fi = open(fn, \"rb\")\n", - " data = np.load(fi)\n", - " els = data[\"elements\"]\n", - " els_blid = data[\"element_block_id\"]\n", - " nelem = len(els)\n", - "\n", - " #Load the distance matrix\n", - " fi = open(fn.replace(\"ev\", \"dist\"), \"rb\")\n", - " dm = scipy.sparse.load_npz(fi).todense()\n", - " \n", - " print(els.shape, dm.shape)\n", - " return els, dm\n", - "# elem_pairs_X, elem_pairs_y = load_element_pairs(fn)\n", - "# pp = model.predict(elem_pairs_X)\n", - " \n", - "# adj_matrix = np.zeros((nelem, nelem), dtype=np.float32)\n", - "# pred_to_adj_matrix(nelem, pp, dm, adj_matrix)\n", - "# return adj_matrix" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fn" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "els, dm = predict_clustering('data/TTbar/191009_155100/step3_AOD_7_ev0.npz')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@numba.njit\n", - "def set_triu(i1, i2, vec, mat):\n", - " for i in range(len(i1)):\n", - " mat[i1[i], i2[i]] = vec[i]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i1, i2 = np.triu_indices(len(els))\n", - "dmv = np.array(dm[i1, i2])[0, :]\n", - "vec = np.vstack([els[i1, 0], els[i2, 0], dmv]).T\n", - "p2 = model.predict_proba(vec, batch_size=100000)\n", - "p2[dmv==0]=0\n", - "\n", - "mat = np.zeros((len(els), len(els)))\n", - "set_triu(i1, i2, p2[:, 0], mat)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "g1 = networkx.from_numpy_matrix(dm)\n", - "g2 = networkx.from_numpy_matrix(mat>0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(list(networkx.connected_component_subgraphs(g1)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(list(networkx.connected_component_subgraphs(g2)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sklearn.metrics" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/check_regression.ipynb b/notebooks/old/check_regression.ipynb deleted file mode 100644 index 17131032c..000000000 --- a/notebooks/old/check_regression.ipynb +++ /dev/null @@ -1,336 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import matplotlib.pyplot as plt\n", - "import sys\n", - "\n", - "import numpy as np\n", - "import sklearn\n", - "import keras\n", - "import pickle\n", - "\n", - "import os\n", - "os.chdir(\"..\")\n", - "\n", - "sys.path += [\"test\"]\n", - "from train_regression import get_unique_X_y\n", - "from predict_pf import set_pred_to_zero" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from matplotlib.colors import LogNorm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "training_info = json.load(open(\"regression.json\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(training_info[\"loss\"])\n", - "plt.plot(training_info[\"val_loss\"])\n", - "plt.xlabel(\"epochs\")\n", - "plt.ylim(0,0.1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_Xs = []\n", - "all_ys = []\n", - "for i in range(500):\n", - " for j in range(6,7):\n", - " fn = \"data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz\".format(i, j)\n", - " print(\"Loading {0}\".format(fn))\n", - " fi = open(fn, \"rb\")\n", - " data = np.load(fi)\n", - "\n", - " Xs, ys = get_unique_X_y(data[\"elements\"], data[\"element_block_id\"], data[\"candidates\"], data[\"candidate_block_id\"])\n", - "\n", - " all_Xs += [Xs]\n", - " all_ys += [ys]\n", - "all_Xs = np.vstack(all_Xs)\n", - "all_ys = np.vstack(all_ys)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X_types = all_Xs[:, :, 0]\n", - "X_kin = all_Xs[:, :, 1:]\n", - "X_kin = X_kin.reshape((X_kin.shape[0], X_kin.shape[1]*X_kin.shape[2]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"preprocessing.pkl\", \"rb\") as fi:\n", - " preproc = pickle.load(fi)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "enc_X = preproc[\"enc_X\"]\n", - "scaler_X = preproc[\"scaler_X\"]\n", - "enc_y = preproc[\"enc_y\"]\n", - "scaler_y = preproc[\"scaler_y\"]\n", - "num_onehot_y = 27" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trf = enc_X.transform(X_types)\n", - "X = np.hstack([trf, scaler_X.transform(X_kin)])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model2 = keras.models.load_model(\"regression.h5\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pred2 = model2.predict(X, batch_size=100000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cand_types = enc_y.inverse_transform(pred2[:, :num_onehot_y])\n", - "ncand = (cand_types!=0).sum(axis=1)\n", - "ncand_true = (all_ys[:, :, 0]!=0).sum(axis=1)\n", - "\n", - "cand_momenta = scaler_y.inverse_transform(pred2[:, num_onehot_y:])\n", - "set_pred_to_zero(cand_momenta, ncand)\n", - "cand_momenta = cand_momenta.reshape((cand_momenta.shape[0], 3, 3))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def text_in_box(mat):\n", - " for i in range(len(mat)):\n", - " for j in range(len(mat)):\n", - " plt.text(i, j, \"{0:.3f}\".format(mat[i,j]), ha=\"center\", va=\"center\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labels = [0,1,2,3]\n", - "confusion_matrix_ncand = sklearn.metrics.confusion_matrix(ncand_true, ncand, labels=labels)\n", - "confusion_matrix_ncand = confusion_matrix_ncand / np.sum(confusion_matrix_ncand)\n", - "plt.imshow(confusion_matrix_ncand*100, norm=LogNorm(vmin=1e-9, vmax=100))\n", - "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.xlim(-1,4)\n", - "plt.ylim(-1,4)\n", - "plt.colorbar()\n", - "plt.xlabel(\"True ncand\")\n", - "plt.ylabel(\"Predicted ncand\")\n", - "text_in_box(confusion_matrix_ncand*100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "confusion_matrix_ncand[2]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labels = np.unique(all_ys[:, :, 0])\n", - "confusion_matrix = sklearn.metrics.confusion_matrix(all_ys[:, :, 0].flatten(), cand_types.flatten(), labels=labels)\n", - "confusion_matrix = confusion_matrix / np.sum(confusion_matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.imshow(confusion_matrix*100, norm=LogNorm(vmin=1e-9, vmax=100))\n", - "plt.xlim(-1,9)\n", - "plt.ylim(-1,9)\n", - "plt.colorbar()\n", - "plt.xlabel(\"True pdgid\")\n", - "plt.ylabel(\"Predicted pdgid\")\n", - "text_in_box(confusion_matrix*100)\n", - "plt.xticks(range(len(labels)), labels=[int(x) for x in labels])\n", - "plt.yticks(range(len(labels)), labels=[int(x) for x in labels])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#all cases where there was a true particle and it was also reconstructed\n", - "msk = (all_ys[:, :, 0].flatten()!=0) & (cand_types.flatten() != 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(all_ys[:, :, 1].flatten()[msk], cand_momenta[:, :, 0].flatten()[msk], alpha=0.2, marker=\".\")\n", - "plt.xlim(0,10)\n", - "plt.ylim(0,10)\n", - "plt.xlabel(\"True $p_T$\")\n", - "plt.ylabel(\"Reconstructed $p_T$\")\n", - "plt.title(\"Baseline PF block algo regression\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(0, 20, 40)\n", - "plt.hist(all_ys[:, :, 1].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"true\");\n", - "plt.hist(cand_momenta[:, :, 0].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"predicted\");\n", - "plt.yscale(\"log\")\n", - "plt.legend()\n", - "plt.xlabel(\"Candidate $p_T$ [GeV]\")\n", - "plt.title(\"Baseline PF block algo regression\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(all_ys[:, :, 2].flatten()[msk], cand_momenta[:, :, 1].flatten()[msk], alpha=0.2, marker=\".\")\n", - "plt.xlim(-6,6)\n", - "plt.ylim(-6,6)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(-6, 6, 40)\n", - "plt.hist(all_ys[:, :, 2].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"true\");\n", - "plt.hist(cand_momenta[:, :, 1].flatten()[msk], bins=b, histtype=\"step\", lw=2, label=\"predicted\");\n", - "plt.legend()\n", - "plt.xlabel(\"Candidate $p_T$ [GeV]\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(all_ys[:, :, 3].flatten()[msk], cand_momenta[:, :, 2].flatten()[msk], alpha=0.2, marker=\".\")\n", - "plt.xlim(-4,4)\n", - "plt.ylim(-4,4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(-4, 4, 40)\n", - "plt.hist(all_ys[:, :, 3].flatten()[msk], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(cand_momenta[:, :, 2].flatten()[msk], bins=b, histtype=\"step\", lw=2);" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/data.ipynb b/notebooks/old/data.ipynb deleted file mode 100644 index 6447e6ca0..000000000 --- a/notebooks/old/data.ipynb +++ /dev/null @@ -1,234 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "\n", - "import networkx as nx\n", - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run the following command within a CMSSW-aware environment to load the ROOT ntuple produced using `Validation/RecoParticleFlow/plugins/PFAnalysis.cc` and produce python-friendly data files:\n", - "\n", - "```bash\n", - "python test/postprocessing2.py --input data/TTbar_14TeV_TuneCUETP8M1_cfi/pfntuple_1.root \\\n", - " --events-per-file 1 \\\n", - " --save-full-graph \\\n", - " --save-normalized-table \\\n", - " --save-images\n", - "```\n", - "\n", - "The output will be stored in `data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/pfntuple_1_*.pkl`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!wget http://login-1.hep.caltech.edu/~jpata/particleflow/2020-04/TTbar_14TeV_TuneCUETP8M1_cfi/pickle/pfntuple_1_0.pkl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_data = pickle.load(open(\"pfntuple_1_0.pkl\", \"rb\"), encoding='iso-8859-1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "one_event = all_data[0]\n", - "one_event.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "graph = one_event[\"full_graph\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "graph" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_elements = [n for n in graph.nodes if n[0] == \"elem\"]\n", - "len(all_elements)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_gen = [n for n in graph.nodes if n[0] == \"tp\" or n[0] == \"sc\"]\n", - "len(all_gen)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_pfcand = [n for n in graph.nodes if n[0] == \"pfcand\"]\n", - "len(all_pfcand)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def draw_event(g):\n", - " pos = {}\n", - " for node in g.nodes:\n", - " pos[node] = (g.nodes[node][\"eta\"], g.nodes[node][\"phi\"])\n", - "\n", - " fig = plt.figure(figsize=(5,5))\n", - "\n", - " nodes_to_draw = [n for n in g.nodes if n[0]==\"elem\"]\n", - " nx.draw_networkx(g, pos=pos, with_labels=False, node_size=5, nodelist=nodes_to_draw, edgelist=[], node_color=\"red\", node_shape=\"s\", alpha=0.5)\n", - "\n", - " nodes_to_draw = [n for n in g.nodes if n[0]==\"pfcand\"]\n", - " nx.draw_networkx(g, pos=pos, with_labels=False, node_size=10, nodelist=nodes_to_draw, edgelist=[], node_color=\"green\", node_shape=\"x\", alpha=0.5)\n", - "\n", - " nodes_to_draw = [n for n in g.nodes if (n[0]==\"sc\" or n[0]==\"tp\")]\n", - " nx.draw_networkx(g, pos=pos, with_labels=False, node_size=1, nodelist=nodes_to_draw, edgelist=[], node_color=\"blue\", node_shape=\".\", alpha=0.5)\n", - "\n", - " #draw edges between genparticles and elements\n", - " edges_to_draw = [e for e in g.edges if e[0] in nodes_to_draw]\n", - " nx.draw_networkx_edges(g, pos, edgelist=edges_to_draw, arrows=False, alpha=0.1)\n", - "\n", - " plt.xlim(-6,6)\n", - " plt.ylim(-4,4)\n", - " plt.tight_layout()\n", - " plt.axis(\"on\")\n", - " plt.xlabel(\"$\\eta$\")\n", - " plt.xlabel(\"$\\phi$\")\n", - " return fig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "draw_event(one_event[\"full_graph\"]);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Normalized table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame(np.concatenate([one_event[\"Xelem\"], one_event[\"ygen\"], one_event[\"ycand\"]], axis=-1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#show some of the more important columns\n", - "df[[0,2,3,4,13,15,16,17,21,23,24,25]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "#Element to element input distance matrix - graph adjacency\n", - "plt.imshow(one_event[\"dm\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.imshow(one_event[\"dm_elem_cand\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.imshow(one_event[\"dm_elem_gen\"].todense()>0, cmap=\"Greys\", interpolation=\"none\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/old/endtoend_gnn.ipynb b/notebooks/old/endtoend_gnn.ipynb deleted file mode 100644 index d87ecd338..000000000 --- a/notebooks/old/endtoend_gnn.ipynb +++ /dev/null @@ -1,718 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch_geometric\n", - "\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch_geometric.transforms as T\n", - "from torch_geometric.nn import EdgeConv, MessagePassing, EdgePooling\n", - "from torch.nn import Sequential as Seq, Linear as Lin, ReLU\n", - "from torch_scatter import scatter_mean\n", - "from torch_geometric.nn.inits import reset\n", - "from torch_geometric.data import Dataset, Data, DataLoader\n", - "\n", - "from glob import glob\n", - "import numpy as np\n", - "import os.path as osp\n", - "\n", - "import math\n", - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "device = torch.device('cuda')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numba\n", - "\n", - "@numba.njit\n", - "def regularize_X_y(X_elements, y_candidates, X_element_block_id, y_candidate_block_id):\n", - " ret_x = np.zeros_like(X_elements)\n", - " ret_x_id = np.zeros_like(X_element_block_id)\n", - " ret_y = np.zeros((X_elements.shape[0], y_candidates.shape[1]))\n", - " ret_y_id = np.zeros((X_elements.shape[0]), dtype=np.int64)\n", - " \n", - " idx = 0\n", - " for cl in np.unique(X_element_block_id):\n", - " m1 = X_element_block_id == cl\n", - " m2 = y_candidate_block_id == cl\n", - "\n", - " x = X_elements[m1]\n", - " y = y_candidates[m2]\n", - "\n", - " n = x.shape[0]\n", - " ret_x[idx:idx+n] = x[:]\n", - " ret_x_id[idx:idx+n] = cl\n", - " ret_y[idx:idx+y.shape[0]] = y[:]\n", - " ret_y_id[idx:idx+n] = cl\n", - " \n", - " idx += n\n", - " \n", - " return ret_x, ret_y, ret_x_id, ret_y_id" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class PFGraphDataset(Dataset):\n", - " def __init__(self, root, transform=None, pre_transform=None, connect_all=False, max_elements=None, max_candidates=None):\n", - " self._connect_all = connect_all\n", - " self._max_elements = max_elements\n", - " self._max_candidates = max_candidates\n", - " super(PFGraphDataset, self).__init__(root, transform, pre_transform)\n", - " self.raw_dir = root\n", - "\n", - " @property\n", - " def raw_file_names(self):\n", - " raw_list = glob(self.raw_dir+'/*ev*.npz')\n", - " return sorted([l.replace(self.raw_dir,'.') for l in raw_list])\n", - "\n", - " @property\n", - " def processed_file_names(self):\n", - " return ['data_{}.pt'.format(i) for i in range(len(self.raw_file_names))]\n", - "\n", - " def __len__(self):\n", - " return len(self.processed_file_names)\n", - "\n", - " def download(self):\n", - " # Download to `self.raw_dir`.\n", - " pass\n", - "\n", - " def process(self):\n", - " feature_scale = np.array([1., 1., 1., 1., 1., 1., 1., 1.])\n", - " i = 0\n", - " \n", - " for raw_file_name in self.raw_file_names:\n", - " if i%100 == 0:\n", - " print(i, len(self.raw_file_names))\n", - " \n", - " dist_file_name = raw_file_name.replace('ev','dist')\n", - " #print(\"loading data from files: {0}, {1}\".format(osp.join(self.raw_dir, raw_file_name), osp.join(self.raw_dir, dist_file_name)))\n", - " try:\n", - " fi = np.load(osp.join(self.raw_dir, raw_file_name))\n", - " fi_dist = np.load(osp.join(self.raw_dir, dist_file_name))\n", - " except Exception as e:\n", - " print(\"Could not open files: {0}, {1}\".format(osp.join(self.raw_dir, raw_file_name), osp.join(self.raw_dir, dist_file_name)))\n", - " continue\n", - " \n", - " X_elements = fi['elements'][:self._max_elements]\n", - " X_element_block_id = fi['element_block_id'][:self._max_elements]\n", - " y_candidates = fi['candidates'][:self._max_candidates, 1:]\n", - " y_candidate_block_id = fi['candidate_block_id'][:self._max_candidates]\n", - " \n", - " X_elements, y_candidates, X_element_block_id, y_candidate_block_id = regularize_X_y(\n", - " X_elements, y_candidates, X_element_block_id, y_candidate_block_id)\n", - " \n", - " num_elements = X_elements.shape[0]\n", - "\n", - " row_index = fi_dist['row']\n", - " col_index = fi_dist['col']\n", - " num_edges = row_index.shape[0]\n", - "\n", - " edge_index = np.zeros((2, 2*num_edges))\n", - " edge_index[0,:num_edges] = row_index\n", - " edge_index[1,:num_edges] = col_index\n", - " edge_index[0,num_edges:] = col_index\n", - " edge_index[1,num_edges:] = row_index\n", - " edge_index = torch.tensor(edge_index, dtype=torch.long)\n", - "\n", - " edge_data = fi_dist['data']\n", - " edge_attr = np.zeros((2*num_edges,1))\n", - " edge_attr[:num_edges,0] = edge_data\n", - " edge_attr[num_edges:,0] = edge_data\n", - " edge_attr = torch.tensor(edge_attr, dtype=torch.float)\n", - "\n", - " x = torch.tensor(X_elements/feature_scale, dtype=torch.float)\n", - "\n", - " #y = [X_element_block_id[i]==X_element_block_id[j] for (i,j) in edge_index.t().contiguous()]\n", - " y = torch.tensor(y_candidates, dtype=torch.float)\n", - " \n", - " data = Data(x=x, edge_index=edge_index, y=y, edge_attr=edge_attr)\n", - " data.x_cluster_labels = torch.tensor(X_element_block_id, dtype=torch.float)\n", - " data.y_cluster_labels = torch.tensor(y_candidate_block_id, dtype=torch.float)\n", - "# data.y_cluster_labels = torch.nn.functional.pad(\n", - "# data.y_cluster_labels, (0, x.shape[0] - data.y_cluster_labels.shape[0]),\n", - "# value=-1)\n", - "\n", - " if self.pre_filter is not None and not self.pre_filter(data):\n", - " continue\n", - " if self.pre_transform is not None:\n", - " data = self.pre_transform(data)\n", - "\n", - " torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(i)))\n", - " i += 1\n", - "\n", - " def get(self, idx):\n", - " data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(idx)))\n", - " return data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!mkdir ../data/TTBar_run3/processed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "full_dataset = PFGraphDataset(root='../data/TTbar_run3')\n", - "#full_dataset.process()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = full_dataset.get(0)\n", - "input_dim = data.x.shape[1]\n", - "edge_dim = data.edge_attr.shape[1]\n", - "\n", - "batch_size = 4\n", - "n_epochs = 50\n", - "lr = 1e-5\n", - "hidden_dim = 128\n", - "n_iters = 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class EdgeConvWithEdgeAttr(MessagePassing):\n", - " def __init__(self, nn, aggr='max', **kwargs):\n", - " super(EdgeConvWithEdgeAttr, self).__init__(aggr=aggr, **kwargs)\n", - " self.nn = nn\n", - " self.reset_parameters()\n", - "\n", - " def reset_parameters(self):\n", - " reset(self.nn)\n", - "\n", - " def forward(self, x, edge_index, edge_attr):\n", - " \"\"\"\"\"\"\n", - " x = x.unsqueeze(-1) if x.dim() == 1 else x\n", - " pseudo = edge_attr.unsqueeze(-1) if edge_attr.dim() == 1 else edge_attr\n", - " return self.propagate(edge_index, x=x, pseudo=pseudo)\n", - "\n", - " def message(self, x_i, x_j, pseudo):\n", - " return self.nn(torch.cat([x_i, x_j - x_i, pseudo], dim=1))\n", - "\n", - " def __repr__(self):\n", - " return '{}(nn={})'.format(self.__class__.__name__, self.nn)\n", - "\n", - "class PFNet1(nn.Module):\n", - " def __init__(self, input_dim=3, hidden_dim=32, edge_dim=1, output_dim=1, n_iters=1, aggr='add'):\n", - " super(PFNet1, self).__init__()\n", - " \n", - " convnn = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(2*hidden_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.ReLU()\n", - " )\n", - " self.n_iters = n_iters\n", - " \n", - " self.batchnorm1 = nn.BatchNorm1d(input_dim)\n", - " self.batchnorm2 = nn.BatchNorm1d(40)\n", - "\n", - " self.inputnet = nn.Sequential(\n", - " nn.Linear(input_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.ReLU(),\n", - " )\n", - "\n", - "# self.edgenetwork = nn.Sequential(nn.Linear(2*(hidden_dim+input_dim)+edge_dim,2*hidden_dim),\n", - "# nn.ReLU(),\n", - "# nn.Linear(2*hidden_dim, output_dim),\n", - "# nn.Sigmoid())\n", - "\n", - " self.nodenetwork = EdgeConvWithEdgeAttr(nn=convnn, aggr=aggr)\n", - " \n", - " self.pooling1 = EdgePooling(40, dropout=0.2)\n", - " self.pooling2 = EdgePooling(40, dropout=0.2)\n", - " self.pooling3 = EdgePooling(40, dropout=0.2)\n", - " \n", - " self.outnetwork = nn.Sequential(nn.Linear(40, 100),\n", - " nn.ReLU(),\n", - " nn.Linear(100, 100),\n", - " nn.ReLU(),\n", - " nn.Linear(100, 100),\n", - " nn.ReLU(),\n", - " nn.Linear(100, 100),\n", - " nn.ReLU(),\n", - " nn.Linear(100, 3),\n", - " )\n", - "\n", - " def forward(self, data): \n", - " X = self.batchnorm1(data.x)\n", - " H = self.inputnet(X)\n", - " x = torch.cat([H,X],dim=-1)\n", - "\n", - " for i in range(self.n_iters):\n", - " H = self.nodenetwork(x, data.edge_index, data.edge_attr)\n", - " x = torch.cat([H,X],dim=-1)\n", - "\n", - " #row,col = data.edge_index \n", - " #output = self.edgenetwork(torch.cat([x[row], x[col], data.edge_attr],dim=-1)).squeeze(-1)\n", - "\n", - " pooled, edge_index, batch1, unpool_info1 = self.pooling1(x, data.edge_index, data.batch)\n", - " pooled, edge_index, batch2, unpool_info2 = self.pooling2(pooled, edge_index, batch1)\n", - " pooled, edge_index, batch3, unpool_info3 = self.pooling3(pooled, edge_index, batch2)\n", - " \n", - " r = self.outnetwork(self.batchnorm2(pooled))\n", - " \n", - " return r, unpool_info1.cluster, unpool_info2.cluster, unpool_info3.cluster\n", - " \n", - "class PFNet2(nn.Module):\n", - " def __init__(self, input_dim=3, hidden_dim=32, edge_dim=1, output_dim=1, n_iters=1, aggr='mean'):\n", - " super(PFNet2, self).__init__()\n", - " \n", - " convnn = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n", - " nn.LeakyReLU(),\n", - " nn.Linear(2*hidden_dim, hidden_dim),\n", - " nn.LeakyReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.LeakyReLU(),\n", - " )\n", - " convnn2 = nn.Sequential(nn.Linear(2*(hidden_dim + input_dim)+edge_dim, 2*hidden_dim),\n", - " nn.LeakyReLU(),\n", - " nn.Linear(2*hidden_dim, hidden_dim),\n", - " nn.LeakyReLU(),\n", - " nn.Linear(hidden_dim, 3),\n", - " )\n", - "\n", - " self.n_iters = n_iters\n", - " \n", - " self.batchnorm1 = nn.BatchNorm1d(input_dim)\n", - " self.batchnorm2 = nn.BatchNorm1d(hidden_dim + input_dim)\n", - "\n", - " self.inputnet = nn.Sequential(\n", - " nn.Linear(input_dim, hidden_dim),\n", - " nn.LeakyReLU(),\n", - " nn.Linear(hidden_dim, hidden_dim),\n", - " nn.LeakyReLU(),\n", - " )\n", - "\n", - " self.nodenetwork = EdgeConvWithEdgeAttr(nn=convnn, aggr=aggr)\n", - " self.nodenetwork2 = EdgeConvWithEdgeAttr(nn=convnn2, aggr=aggr)\n", - " \n", - "# self.outnetwork = nn.Sequential(nn.Linear(hidden_dim + input_dim, 64),\n", - "# nn.LeakyReLU(),\n", - "# nn.Linear(64, 32),\n", - "# nn.LeakyReLU(),\n", - "# nn.Linear(32, 16),\n", - "# nn.LeakyReLU(),\n", - "# nn.Linear(16, 3),\n", - "# )\n", - "\n", - " def forward(self, data): \n", - " X = self.batchnorm1(data.x)\n", - " H = self.inputnet(X)\n", - " x = torch.cat([H,X],dim=-1)\n", - "\n", - " for i in range(self.n_iters):\n", - " x = self.batchnorm2(x)\n", - " H = self.nodenetwork(x, data.edge_index, data.edge_attr)\n", - " x = torch.cat([H,X],dim=-1)\n", - " \n", - " #r = self.outnetwork(x)\n", - " r = self.nodenetwork2(x, data.edge_index, data.edge_attr)\n", - " \n", - " return r" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numba\n", - "\n", - "@numba.njit\n", - "def reorder_predicted_target(predicted_y, target_y, x_cluster_labels, y_cluster_labels, p1, p2, p3):\n", - " maxvals = max(predicted_y.shape[0], target_y.shape[0])\n", - " chosen_pred = np.zeros(maxvals, dtype=np.int32)\n", - " chosen_target = np.zeros(maxvals, dtype=np.int32)\n", - " \n", - " idx = 0\n", - " for cl in np.unique(y_cluster_labels):\n", - " m1 = y_cluster_labels == cl\n", - " m2 = x_cluster_labels == cl\n", - "\n", - " #get the predicted and target candidates that use elements from this block\n", - " pred = p3[p2[p1[m2]]]\n", - " tgt = np.where(m1)[0]\n", - " n = min(pred.shape[0], tgt.shape[0])\n", - "\n", - " chosen_pred[idx:idx+n] = pred[:n]\n", - " chosen_target[idx:idx+n] = tgt[:n]\n", - " \n", - " idx += n\n", - " return chosen_pred[:idx], chosen_target[:idx]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# train_loader = DataLoader(full_dataset, batch_size=1, pin_memory=True, shuffle=False)\n", - "# model = PFNet1(input_dim=input_dim, hidden_dim=hidden_dim, edge_dim=edge_dim, n_iters=n_iters).to(device)\n", - "# optimizer = torch.optim.Adam(model.parameters(), lr = lr)\n", - "# loss = torch.nn.MSELoss()\n", - "\n", - "# # print(model)\n", - "# # model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n", - "# # params = sum([np.prod(p.size()) for p in model_parameters])\n", - "# # print(params)\n", - "\n", - "# model.train()\n", - "\n", - "# losses = []\n", - "# corrs = []\n", - "# t0 = time.time()\n", - "\n", - "# for j in range(200):\n", - "# losses_batch = []\n", - "# corrs_batch = []\n", - " \n", - "# num_pred = []\n", - "# num_true = []\n", - "# for i, data in enumerate(train_loader):\n", - "# if i>200:\n", - "# break\n", - "# data = data.to(device)\n", - "# optimizer.zero_grad()\n", - "# batch_target = data.y \n", - "# batch_output, pool_clusters1, pool_clusters2, pool_clusters3 = model(data)\n", - " \n", - "# #Find the correspondence between the predicted candidates and true candidates based\n", - "# #on the block id of the input elements\n", - "# chosen_pred, chosen_target = reorder_predicted_target(\n", - "# batch_output.detach().cpu().numpy(),\n", - "# data.y.detach().cpu().numpy(),\n", - "# data.x_cluster_labels.detach().cpu().numpy(),\n", - "# data.y_cluster_labels.detach().cpu().numpy(),\n", - "# pool_clusters1.detach().cpu().numpy(),\n", - "# pool_clusters2.detach().cpu().numpy(),\n", - "# pool_clusters3.detach().cpu().numpy()\n", - "# )\n", - " \n", - "# #Create arrays where each row corresponds to a matched true or predicted candidate\n", - "# preds_cleaned = batch_output[torch.tensor(chosen_pred, dtype=torch.long)]\n", - "# targets_cleaned = data.y[torch.tensor(chosen_target, dtype=torch.long)]\n", - "# batch_loss = loss(\n", - "# preds_cleaned,\n", - "# targets_cleaned\n", - "# )\n", - " \n", - "# batch_loss.backward()\n", - "# batch_loss_item = batch_loss.item()\n", - "# optimizer.step()\n", - "\n", - "# corr_pt = np.corrcoef(\n", - "# preds_cleaned[:, 0].detach().cpu().numpy(),\n", - "# targets_cleaned[:, 0].detach().cpu().numpy())[0,1]\n", - "# corrs_batch += [corr_pt]\n", - "# losses_batch += [batch_loss_item]\n", - " \n", - "# l = np.mean(losses_batch)\n", - "# losses += [l]\n", - "# corrs += [np.mean(corrs_batch)]\n", - "# t1 = time.time()\n", - "# print(\"epoch={0}, dt={1:.1f}s, loss={2:.4f}, corr_pt={3:.4f}\".format(j, t1 - t0, losses[-1], corrs[-1]))\n", - "# t0 = t1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "train_loader = DataLoader(full_dataset, batch_size=batch_size, pin_memory=True, shuffle=False)\n", - "model = PFNet2(input_dim=input_dim, hidden_dim=hidden_dim, edge_dim=edge_dim, n_iters=n_iters).to(device)\n", - "optimizer = torch.optim.Adam(model.parameters(), lr = lr)\n", - "loss = torch.nn.MSELoss()\n", - "loss2 = torch.nn.BCELoss()\n", - "\n", - "print(model)\n", - "model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n", - "params = sum([np.prod(p.size()) for p in model_parameters])\n", - "print(\"params\", params)\n", - "\n", - "model.train()\n", - "\n", - "losses = []\n", - "corrs = []\n", - "t0 = time.time()\n", - "\n", - "for j in range(n_epochs):\n", - " losses_batch = []\n", - " corrs_batch = []\n", - " \n", - " num_pred = []\n", - " num_true = []\n", - " for i, data in enumerate(train_loader):\n", - " if i>500:\n", - " break\n", - " data = data.to(device)\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " \n", - " batch_loss = loss(\n", - " output,\n", - " data.y\n", - " )\n", - " \n", - " if i==0 and j%10==0:\n", - " #print(is_pred.detach().cpu().numpy())\n", - " #print((data.y[:, 0]!=0).to(dtype=torch.float).detach().cpu().numpy())\n", - " print(output[:5].detach().cpu().numpy())\n", - " print(data.y[:5].detach().cpu().numpy())\n", - " \n", - " batch_loss.backward()\n", - " batch_loss_item = batch_loss.item()\n", - " optimizer.step()\n", - "\n", - " corr_pt = np.corrcoef(\n", - " output[:, 0].detach().cpu().numpy(),\n", - " data.y[:, 0].detach().cpu().numpy())[0,1]\n", - " corrs_batch += [corr_pt]\n", - " losses_batch += [batch_loss_item]\n", - " \n", - " l = np.mean(losses_batch)\n", - " losses += [l]\n", - " corrs += [np.mean(corrs_batch)]\n", - " t1 = time.time()\n", - " print(\"epoch={0}, dt={1:.1f}s, loss={2:.4f}, corr_pt={3:.4f}\".format(j, t1 - t0, losses[-1], corrs[-1]))\n", - " t0 = t1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(losses)\n", - "plt.ylim(0.8,2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(corrs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d = data.to(device=device)\n", - "output = model(d)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cl = (data.x_cluster_labels == 0) & (data.batch == 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.x[cl]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.y[cl]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output[cl]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "msk = data.y[:, 0] != 0\n", - "plt.scatter(\n", - " data.y[msk][:, 0].detach().cpu().numpy(),\n", - " output[msk][:, 0].detach().cpu().numpy(),\n", - " marker=\".\", alpha=0.5)\n", - "plt.plot([0,5],[0,5])\n", - "plt.xlim(0,5)\n", - "plt.ylim(0,5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(\n", - " data.y[msk, 1].detach().cpu().numpy(),\n", - " output[msk, 1].detach().cpu().numpy(),\n", - " marker=\".\", alpha=0.5)\n", - "plt.plot([-5,5],[-5,5])\n", - "plt.xlim(-5,5)\n", - "plt.ylim(-5,5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.scatter(\n", - " data.y[msk, 2].detach().cpu().numpy(),\n", - " output[msk, 2].detach().cpu().numpy(),\n", - " marker=\".\", alpha=0.5)\n", - "plt.plot([-5,5],[-5,5])\n", - "plt.xlim(-5,5)\n", - "plt.ylim(-5,5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(0,10,40)\n", - "plt.hist(data.y[msk, 0].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n", - "plt.hist(output[msk, 0].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(-5,5,40)\n", - "plt.hist(data.y[msk, 1].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n", - "plt.hist(output[msk, 1].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = np.linspace(-5,5,40)\n", - "plt.hist(data.y[msk, 2].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");\n", - "plt.hist(output[msk, 2].detach().cpu().numpy(), bins=b, lw=2, histtype=\"step\");" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/genpf.ipynb b/notebooks/old/genpf.ipynb deleted file mode 100644 index a9f9798fb..000000000 --- a/notebooks/old/genpf.ipynb +++ /dev/null @@ -1,1359 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import numba\n", - "import hepaccelerate.backend_cpu as ha\n", - "import uproot\n", - "import sklearn.metrics\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def freqtable(arr, labels):\n", - " ret = {}\n", - " for l in labels:\n", - " ret[l] = np.sum(arr==l)\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_confusion_matrix(cm,\n", - " target_names,\n", - " title='Confusion matrix',\n", - " cmap=None,\n", - " normalize=True):\n", - " \"\"\"\n", - " given a sklearn confusion matrix (cm), make a nice plot\n", - "\n", - " Arguments\n", - " ---------\n", - " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", - "\n", - " target_names: given classification classes such as [0, 1, 2]\n", - " the class names, for example: ['high', 'medium', 'low']\n", - "\n", - " title: the text to display at the top of the matrix\n", - "\n", - " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", - " see http://matplotlib.org/examples/color/colormaps_reference.html\n", - " plt.get_cmap('jet') or plt.cm.Blues\n", - "\n", - " normalize: If False, plot the raw numbers\n", - " If True, plot the proportions\n", - "\n", - " Usage\n", - " -----\n", - " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", - " # sklearn.metrics.confusion_matrix\n", - " normalize = True, # show proportions\n", - " target_names = y_labels_vals, # list of names of the classes\n", - " title = best_estimator_name) # title of graph\n", - "\n", - " Citiation\n", - " ---------\n", - " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", - "\n", - " \"\"\"\n", - " import matplotlib.pyplot as plt\n", - " import numpy as np\n", - " import itertools\n", - "\n", - " accuracy = np.trace(cm) / float(np.sum(cm))\n", - " misclass = 1 - accuracy\n", - "\n", - " if cmap is None:\n", - " cmap = plt.get_cmap('Blues')\n", - "\n", - " if normalize:\n", - " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " cm[np.isnan(cm)] = 0.0\n", - "\n", - " plt.figure(figsize=(8, 6))\n", - " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", - " plt.title(title)\n", - " plt.colorbar()\n", - "\n", - " if target_names is not None:\n", - " tick_marks = np.arange(len(target_names))\n", - " plt.xticks(tick_marks, target_names, rotation=45)\n", - " plt.yticks(tick_marks, target_names)\n", - "\n", - " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", - " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", - " if normalize:\n", - " plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\",\n", - " fontsize=8)\n", - " else:\n", - " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\",\n", - " fontsize=8)\n", - "\n", - "\n", - " plt.ylabel('True label')\n", - " plt.xlim(-1, len(target_names))\n", - " plt.ylim(-1, len(target_names))\n", - " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", - " plt.tight_layout()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t = uproot.open(\"../pfntuple_1.root\").get(\"ana/pftree\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "iev = 0\n", - "cand_pt = t.array('pfcandidate_pt')[iev]\n", - "cand_eta = t.array('pfcandidate_eta')[iev]\n", - "cand_phi = t.array('pfcandidate_phi')[iev]\n", - "cand_pid = t.array('pfcandidate_pdgid')[iev]\n", - "m = np.ones_like(cand_pt, dtype=np.bool)\n", - "\n", - "tp_pt = t.array('trackingparticle_pt')[iev]\n", - "tp_eta = t.array('trackingparticle_eta')[iev]\n", - "tp_phi = t.array('trackingparticle_phi')[iev]\n", - "tp_pid = t.array('trackingparticle_pid')[iev]\n", - "tp_bx = t.array('trackingparticle_bx')[iev]\n", - "\n", - "sc_pt = t.array('simcluster_pt')[iev]\n", - "sc_eta = t.array('simcluster_eta')[iev]\n", - "sc_phi = t.array('simcluster_phi')[iev]\n", - "sc_pid = t.array('simcluster_pid')[iev]\n", - "sc_bx = t.array('simcluster_bx')[iev]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.array(\"trackingparticle_to_element\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.array(\"simcluster_to_element\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "st_x = t.array(\"simtrack_x\")[iev]\n", - "st_y = t.array(\"simtrack_y\")[iev]\n", - "st_z = t.array(\"simtrack_z\")[iev]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rh_x = t.array(\"rechit_x\")[iev]\n", - "rh_y = t.array(\"rechit_y\")[iev]\n", - "rh_z = t.array(\"rechit_z\")[iev]\n", - "rh_eta = t.array(\"rechit_eta\")[iev]\n", - "rh_phi = t.array(\"rechit_phi\")[iev]\n", - "rh_e = t.array(\"rechit_e\")[iev]\n", - "rh_detid = t.array(\"rechit_detid\")[iev]\n", - "rh_det = t.array(\"rechit_det\")[iev]\n", - "rh_subdet = t.array(\"rechit_subdet\")[iev]\n", - "rh_idx_cluster = t.array(\"rechit_idx_cluster\")[iev]\n", - "\n", - "sh_x = t.array(\"simhit_x\")[iev]\n", - "sh_y = t.array(\"simhit_y\")[iev]\n", - "sh_z = t.array(\"simhit_z\")[iev]\n", - "sh_e = t.array(\"simhit_e\")[iev]\n", - "sh_eta = t.array(\"simhit_eta\")[iev]\n", - "sh_phi = t.array(\"simhit_phi\")[iev]\n", - "sh_det = t.array(\"simhit_det\")[iev]\n", - "sh_detid = t.array(\"simhit_detid\")[iev]\n", - "sh_subdet = t.array(\"simhit_subdet\")[iev]\n", - "sh_idx_cluster = t.array(\"simhit_idx_cluster\")[iev]\n", - "sh_idx_caloparticle = t.array(\"simhit_idx_caloparticle\")[iev]\n", - "\n", - "st_x = t.array(\"simtrack_x\")[iev]\n", - "st_y = t.array(\"simtrack_y\")[iev]\n", - "st_z = t.array(\"simtrack_z\")[iev]\n", - "st_idx_cluster = t.array(\"simtrack_idx_cluster\")[iev]\n", - "st_idx_caloparticle = t.array(\"simtrack_idx_caloparticle\")[iev]\n", - "st_pid = t.array(\"simtrack_pid\")[iev]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "candidx = np.nonzero(cand_pid == 130)[0][0]\n", - "icluster = np.nonzero(cl_idx_cand==candidx)[0][0]\n", - "\n", - "detids_r = rh_detid[rh_idx_cluster==icluster]\n", - "shids = []\n", - "for i in range(len(sh_detid)):\n", - " s = sh_detid[i]\n", - " if s in detids_r:\n", - " #print(\"rh\", s)\n", - " shids += [i]\n", - "shids = np.array(shids)\n", - "\n", - "plt.scatter(rh_eta[rh_idx_cluster == icluster], rh_phi[rh_idx_cluster == icluster], s=rh_e[rh_idx_cluster==icluster], alpha=0.2)\n", - "plt.scatter(sh_eta[shids], sh_phi[shids], alpha=0.2)\n", - "\n", - "plt.xlim(-8,8)\n", - "plt.ylim(-4,4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cl_idx_cp[icluster]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cand_pid[:100]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(sh_subdet[sh_det==4])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.scatter(sh_eta[sh_det==4], sh_subdet[sh_det==4])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(sh_det)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(sh_e[sh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype=\"step\", lw=2);\n", - "plt.hist(rh_e[rh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import plotly.graph_objects as go\n", - "\n", - "# fig = go.Figure()\n", - "\n", - "# m1 = (np.abs(sh_eta) < 1.0) & (np.abs(sh_phi) < 1.0) & (sh_e>0.2)\n", - "# m2 = (np.abs(rh_eta) < 1.0) & (np.abs(rh_phi) < 1.0) & (rh_e>0.2)\n", - "\n", - "# # Add traces\n", - "# fig.add_trace(go.Scatter(\n", - "# x=sh_eta[m1], y=sh_phi[m1],\n", - "# mode='markers',\n", - "# name='SimHit',\n", - "# hovertemplate='%{text}',\n", - "# marker={\"size\": 0.1*sh_e[m1], \"symbol\": \"circle\"},\n", - "# text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n", - "# ))\n", - "\n", - "# fig.add_trace(go.Scatter(\n", - "# x=rh_eta[m2], y=rh_phi[m2],\n", - "# mode='markers',\n", - "# name='RecHit',\n", - "# hovertemplate='%{text}',\n", - "# text = ['PFcluster {}'.format(rh_idx_cluster[m2][i]) for i in range(len(rh_eta[m2]))],\n", - "\n", - "# ))\n", - "\n", - "# fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.scatter(sh_eta, sh_e, marker=\".\", alpha=0.5)\n", - "plt.scatter(rh_eta, rh_e, marker=\".\", alpha=0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-500, 500, 100)\n", - "plt.hist(sh_x, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.hist(rh_x, bins=bins, histtype=\"step\", lw=2, density=1.0);\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-500, 500, 100)\n", - "plt.hist(sh_y, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.hist(rh_y, bins=bins, histtype=\"step\", lw=2, density=1.0);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-1000, 1000, 100)\n", - "plt.hist(sh_z, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.hist(rh_z, bins=bins, histtype=\"step\", lw=2, density=1.0);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(0,100,100)\n", - "plt.hist(sh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.hist(rh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(0,10,100)\n", - "plt.hist(sh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.hist(rh_e, bins=bins, histtype=\"step\", lw=2, density=1.0);\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(20, 20))\n", - "m = (sh_e>0.5) & (sh_det == 3)\n", - "m2 = rh_det==3\n", - "plt.scatter(sh_eta[m], sh_phi[m], marker=\"o\", alpha=0.5, s=sh_e, color=\"blue\", label=\"simhits\")\n", - "plt.scatter(rh_eta[m2], rh_phi[m2], marker=\"x\", alpha=0.5, s=5*rh_e, color=\"red\", label=\"rechits\")\n", - "plt.xlim(-6,6)\n", - "plt.ylim(-4,4)\n", - "plt.xlabel(\"eta\")\n", - "plt.ylabel(\"phi\")\n", - "plt.legend()\n", - "plt.savefig(\"hits_ecal.png\", bbox_inches=\"tight\", dpi=100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(20, 20))\n", - "m = (sh_e>0.5) & (sh_det==4)\n", - "m2 = rh_det==4\n", - "plt.scatter(sh_eta[m], sh_phi[m], marker=\"o\", alpha=0.5, s=sh_e, color=\"blue\", label=\"simhits\")\n", - "plt.scatter(rh_eta[m2], rh_phi[m2], marker=\"x\", alpha=0.5, s=5*rh_e, color=\"red\", label=\"rechits\")\n", - "plt.xlim(-6,6)\n", - "plt.ylim(-4,4)\n", - "plt.xlabel(\"eta\")\n", - "plt.ylabel(\"phi\")\n", - "plt.legend()\n", - "plt.savefig(\"hits_hcal.png\", bbox_inches=\"tight\", dpi=100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "icaloparticle = 31\n", - "i = 1\n", - "\n", - "plt.figure(figsize=(20, 20))\n", - "\n", - "for icaloparticle in np.random.permutation(np.nonzero(cp_idx_cluster!=-1)[0])[:100]:\n", - " ax = plt.subplot(10, 10, i)\n", - " plt.title(str(cp_pid[icaloparticle]), y=0.94, va=\"top\")\n", - " mask_simhit = (sh_idx_caloparticle==icaloparticle) & (sh_e > 0.5)\n", - " mask_rechit = rh_idx_cluster==cp_idx_cluster[icaloparticle]\n", - "# print(cp_idx_cluster[icaloparticle], cp_pid[icaloparticle])\n", - "# print(st_pid[st_idx_caloparticle==icaloparticle])\n", - "# print(sh_det[mask_simhit])\n", - "# print(rh_det[mask_rechit])\n", - "# print(np.mean(sh_e[mask_simhit]))\n", - "# print(np.mean(rh_e[mask_rechit]))\n", - "\n", - " mask_sh_ecal = sh_det == 3\n", - " mask_sh_hcal = sh_det == 4\n", - " mask_rh_ecal = rh_det == 3\n", - " mask_rh_hcal = rh_det == 4\n", - " plt.scatter(sh_eta[mask_simhit & mask_sh_ecal], sh_phi[mask_simhit & mask_sh_ecal], s=10, marker=\"s\", label=\"SH ECAL\", alpha=0.5, color=\"lightblue\")\n", - " plt.scatter(sh_eta[mask_simhit & mask_sh_hcal], sh_phi[mask_simhit & mask_sh_hcal], s=10, marker=\"o\", label=\"SH HCAL\", alpha=0.5, color=\"red\")\n", - " plt.scatter(rh_eta[mask_rechit & mask_rh_ecal], rh_phi[mask_rechit & mask_rh_ecal], s=20, marker=\"x\", label=\"RH ECAL\", alpha=0.5, color=\"purple\");\n", - " plt.scatter(rh_eta[mask_rechit & mask_rh_hcal], rh_phi[mask_rechit & mask_rh_hcal], s=20, marker=\"+\", label=\"RH HCAL\", alpha=0.5, color=\"green\");\n", - " if i==1:\n", - " plt.legend(frameon=False, fontsize=8)\n", - " plt.xlim(-8,8)\n", - " plt.ylim(-4,4)\n", - " #plt.xlabel(\"$\\eta$\")\n", - " #plt.ylabel(\"$\\phi$\")\n", - " plt.xticks()\n", - " plt.yticks()\n", - " #ax.set_yticklabels([])\n", - " #ax.set_xticklabels([])\n", - "\n", - " i += 1\n", - "plt.tight_layout()\n", - "plt.savefig(\"hits.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins=np.linspace(-1500, 1500, 100)\n", - "plt.hist(rh_z, bins=bins, density=1.0, lw=2, histtype=\"step\");\n", - "plt.hist(sh_z[sh_x!=0], bins=bins, density=1.0, lw=2, histtype=\"step\");" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.log(rh_e[:100]+1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(rh_det)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "\n", - "points_rechit_ecal = go.Scatter3d(\n", - " x=rh_x[rh_det==3],\n", - " z=rh_y[rh_det==3],\n", - " y=rh_z[rh_det==3],\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"square\",\n", - " \"opacity\": 0.5,\n", - " \"size\": 3,\n", - " \"color\": \"red\"\n", - "\n", - " },\n", - " name=\"ECAL rechits\"\n", - ")\n", - "\n", - "points_rechit_hcal = go.Scatter3d(\n", - " x=rh_x[rh_det==4],\n", - " z=rh_y[rh_det==4],\n", - " y=rh_z[rh_det==4],\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"circle\",\n", - " \"opacity\": 0.5,\n", - " \"size\": 3,\n", - " \"color\": \"orange\"\n", - "\n", - " },\n", - " name=\"HCAL rechits\"\n", - ")\n", - "\n", - "points_simtracks = go.Scatter3d(\n", - " x=st_x[st_idx_caloparticle==icaloparticle],\n", - " z=st_y[st_idx_caloparticle==icaloparticle],\n", - " y=st_z[st_idx_caloparticle==icaloparticle],\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"x\",\n", - " \"opacity\": 1.0,\n", - " \"size\": 5,\n", - " \"color\": \"green\"\n", - "\n", - " },\n", - " name=\"simtracks\"\n", - ")\n", - "\n", - "points_simhit_ecal = go.Scatter3d(\n", - " x=sh_x[sh_det==3],\n", - " z=sh_y[sh_det==3],\n", - " y=sh_z[sh_det==3],\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"circle\",\n", - " \"opacity\": 0.5,\n", - " \"size\": 0.1*sh_e[sh_det==3],\n", - " #\"size\": 1.0,\n", - " \"color\": \"blue\"\n", - " },\n", - " name=\"ECAL simhits\",\n", - " #hovertemplate='%{text}',\n", - " #text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n", - "\n", - ")\n", - "\n", - "points_simhit_hcal = go.Scatter3d(\n", - " x=sh_x[(sh_det==4) & (sh_x != 0)],\n", - " z=sh_y[(sh_det==4) & (sh_x != 0)],\n", - " y=sh_z[(sh_det==4) & (sh_x != 0)],\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"circle\",\n", - " \"opacity\": 0.5,\n", - " \"size\": 0.1*sh_e[(sh_det==4) & (sh_x != 0)],\n", - " #\"size\": 1.0,\n", - " \"color\": \"purple\"\n", - " },\n", - " name=\"HCAL simhits\",\n", - " #hovertemplate='%{text}',\n", - " #text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],\n", - "\n", - ")\n", - "\n", - "data=[\n", - " points_rechit_ecal,\n", - " points_rechit_hcal,\n", - " points_simhit_ecal,\n", - " points_simhit_hcal,\n", - "]\n", - "\n", - "fig = go.Figure(data=data)\n", - "\n", - "fig.update_layout(\n", - " autosize=False,\n", - " width=1000,\n", - " height=1000,\n", - " margin=go.layout.Margin(\n", - " l=0,\n", - " r=0,\n", - " b=0,\n", - " t=0,\n", - " ),\n", - " scene_camera={\n", - " \"eye\": dict(x=0.8, y=0.8, z=0.8)\n", - " }\n", - ")\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import networkx as nx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "iev = 0\n", - "g = nx.DiGraph()\n", - "\n", - "cp_cl = t.array(\"caloparticle_idx_cluster\")[iev]\n", - "cp_pt = t.array(\"caloparticle_pt\")[iev]\n", - "cp_e = t.array(\"caloparticle_energy\")[iev]\n", - "cp_eta = t.array(\"caloparticle_eta\")[iev]\n", - "cp_phi = t.array(\"caloparticle_phi\")[iev]\n", - "cp_pid = t.array(\"caloparticle_pid\")[iev]\n", - "\n", - "cl_cnd = t.array(\"cluster_idx_candidate\")[iev]\n", - "cl_cp = t.array(\"cluster_idx_caloparticle\")[iev]\n", - "cl_e = t.array(\"cluster_energy\")[iev]\n", - "cl_eta = t.array(\"cluster_eta\")[iev]\n", - "cl_phi = t.array(\"cluster_phi\")[iev]\n", - "\n", - "tp_t = t.array(\"trackingparticle_idx_track\")[iev]\n", - "tp_pt = t.array(\"trackingparticle_pt\")[iev]\n", - "tp_eta = t.array(\"trackingparticle_eta\")[iev]\n", - "tp_phi = t.array(\"trackingparticle_phi\")[iev]\n", - "tp_pid = t.array(\"trackingparticle_pid\")[iev]\n", - "\n", - "t_cnd = t.array(\"track_idx_candidate\")[iev]\n", - "t_pt = t.array(\"track_pt\")[iev]\n", - "t_eta = t.array(\"track_eta\")[iev]\n", - "t_phi = t.array(\"track_phi\")[iev]\n", - "\n", - "c_pt = t.array(\"pfcandidate_pt\")[iev]\n", - "c_eta = t.array(\"pfcandidate_eta\")[iev]\n", - "c_phi = t.array(\"pfcandidate_phi\")[iev]\n", - "c_pid = t.array(\"pfcandidate_pdgid\")[iev]\n", - "\n", - "ncaloparticle = len(cp_cl)\n", - "ncluster = len(cl_cnd)\n", - "ncandidate = len(c_pt)\n", - "ntrackingparticle = len(tp_t)\n", - "ntrack = len(t_cnd)\n", - "\n", - "for i in range(ncaloparticle):\n", - " g.add_node((\"caloparticle\", i))\n", - " \n", - "for i in range(ncluster):\n", - " g.add_node((\"cluster\", i))\n", - " \n", - "for i in range(ncandidate):\n", - " g.add_node((\"candidate\", i))\n", - "\n", - "for i in range(ntrackingparticle):\n", - " g.add_node((\"trackingparticle\", i))\n", - " \n", - "for i in range(ntrack):\n", - " g.add_node((\"track\", i))\n", - " \n", - "for i in range(ncaloparticle):\n", - " ind_cluster = cp_cl[i]\n", - " if ind_cluster != -1:\n", - " g.add_edge((\"caloparticle\", i), (\"cluster\", ind_cluster))\n", - " \n", - "for i in range(ncluster):\n", - " ind_cand = cl_cnd[i]\n", - " if ind_cand != -1:\n", - " g.add_edge((\"cluster\", i), (\"candidate\", ind_cand))\n", - " \n", - " ind_cp = cl_cp[i]\n", - " if ind_cp != -1:\n", - " g.nodes[(\"caloparticle\", ind_cp)][\"state\"] = \"matched\"\n", - " \n", - "for i in range(ntrackingparticle):\n", - " ind_track = tp_t[i]\n", - " if ind_track != -1:\n", - " g.add_edge((\"trackingparticle\", i), (\"track\", ind_track))\n", - "\n", - "for i in range(ntrack):\n", - " ind_cand = t_cnd[i]\n", - " if ind_cand != -1:\n", - " g.add_edge((\"track\", i), (\"candidate\", ind_cand))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sgs = [nx.subgraph(g, c) for c in nx.weakly_connected_components(g)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sgs_d = {}\n", - "for sg in sgs:\n", - " cands = [n for n in sg.nodes if n[0] == \"candidate\"]\n", - " if len(cands) == 1:\n", - " pdgid = c_pid[cands[0][1]]\n", - " if not (pdgid in sgs_d):\n", - " sgs_d[pdgid] = []\n", - " sgs_d[pdgid] += [sg]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from networkx.drawing.nx_agraph import write_dot, graphviz_layout" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "colors = {\n", - " \"caloparticle\": \"red\",\n", - " \"cluster\": \"lightblue\",\n", - " \"candidate\": \"green\",\n", - " \"trackingparticle\": \"pink\",\n", - " \"track\": \"orange\"\n", - "}\n", - "\n", - "for pid in sgs_d.keys():\n", - " ip = 0\n", - " for sg in sgs_d[pid][:10]:\n", - " labels = {}\n", - " plt.figure(figsize=(max(3, len(sg.nodes)+1), 10))\n", - " for n in sg.nodes:\n", - " if n[0] == \"caloparticle\":\n", - " d = sg.nodes[n]\n", - " labels[n] = \"{}\\npt: {:.2f}\\ne: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n", - " \"CaloParticle\", cp_pt[n[1]], cp_e[n[1]], cp_eta[n[1]], cp_phi[n[1]], cp_pid[n[1]]\n", - " ) + \"\\n\" + d.get(\"state\", \"\")\n", - " elif n[0] == \"cluster\":\n", - " labels[n] = \"{}\\ne: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\".format(\n", - " \"PFCluster\", cl_e[n[1]], cl_eta[n[1]], cl_phi[n[1]]\n", - " )\n", - " elif n[0] == \"trackingparticle\":\n", - " labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n", - " \"TrackingParticle\", tp_pt[n[1]], tp_eta[n[1]], tp_phi[n[1]], tp_pid[n[1]]\n", - " )\n", - " elif n[0] == \"track\":\n", - " labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\".format(\n", - " \"Track\", t_pt[n[1]], t_eta[n[1]], t_phi[n[1]]\n", - " )\n", - " elif n[0] == \"candidate\":\n", - " labels[n] = \"{}\\npt: {:.2f}\\neta: {:.2f}\\nphi: {:.2f}\\npid: {}\".format(\n", - " \"PFCandidate\", c_pt[n[1]], c_eta[n[1]], c_phi[n[1]], c_pid[n[1]]\n", - " )\n", - " else:\n", - " labels[n] = n\n", - "\n", - " pos = graphviz_layout(sg, prog='dot')\n", - " nx.draw(sg,\n", - " pos=pos,\n", - " with_labels=True,\n", - " node_color=[colors[n[0]] for n in sg.nodes],\n", - " labels=labels,\n", - " edge_color=\"gray\"\n", - " )\n", - " plt.tight_layout()\n", - " plt.savefig(\"graph_ev{}_pid{}_i{}.pdf\".format(iev, pid, ip))\n", - " ip += 1\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tp_idx_track = t.array(\"trackingparticle_idx_track\")\n", - "tp_pt = t.array(\"trackingparticle_pt\")\n", - "tp_eta = t.array(\"trackingparticle_eta\")\n", - "tp_phi = t.array(\"trackingparticle_phi\")\n", - "tp_pid = t.array(\"trackingparticle_pid\")\n", - "\n", - "cp_idx_cluster = t.array(\"caloparticle_idx_cluster\")\n", - "cp_pt = t.array(\"caloparticle_pt\")\n", - "cp_e = t.array(\"caloparticle_energy\")\n", - "cp_eta = t.array(\"caloparticle_eta\")\n", - "cp_phi = t.array(\"caloparticle_phi\")\n", - "cp_pid = t.array(\"caloparticle_pid\")\n", - "\n", - "t_pt = t.array('track_pt')\n", - "t_eta = t.array('track_eta')\n", - "t_phi = t.array('track_phi')\n", - "t_idx_c = t.array('track_idx_candidate')\n", - "\n", - "cl_e = t.array('cluster_energy')\n", - "cl_eta = t.array('cluster_eta')\n", - "cl_phi = t.array('cluster_phi')\n", - "cl_idx_cp = t.array('cluster_idx_caloparticle')\n", - "cl_idx_c = t.array('cluster_idx_candidate')\n", - "\n", - "c_pt = t.array('pfcandidate_pt')\n", - "c_eta = t.array('pfcandidate_eta')\n", - "c_phi = t.array('pfcandidate_phi')\n", - "c_pid = t.array('pfcandidate_pdgid')\n", - "c_idx_tp = t.array('pfcandidate_idx_trackingparticle')\n", - "c_idx_cp = t.array('pfcandidate_idx_caloparticle')\n", - "\n", - "mc = c_idx_tp != -1\n", - "mc2 = (c_idx_tp == -1) & (c_idx_cp != -1)\n", - "mc3 = (c_idx_tp == -1) & (c_idx_cp == -1)\n", - "mtp = tp_idx_track != -1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])\n", - "c1, _ = np.histogram(tp_pt.flatten(), bins=bins)\n", - "c2, _ = np.histogram(tp_pt[tp_idx_track!=-1].flatten(), bins=bins)\n", - "ratio = c2/c1\n", - "errs = np.sqrt(c2)/c1\n", - "ratio[np.isnan(ratio)] = 0\n", - "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n", - "plt.ylabel(\"matching efficiency\\nTrackingParticle to Track\")\n", - "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")\n", - "plt.savefig(\"trackingparticle_matching_eff.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-3,3,100)\n", - "c1, _ = np.histogram(tp_eta.flatten(), bins=bins)\n", - "c2, _ = np.histogram(tp_eta[tp_idx_track!=-1].flatten(), bins=bins)\n", - "ratio = c2/c1\n", - "errs = np.sqrt(c2)/c1\n", - "ratio[np.isnan(ratio)] = 0\n", - "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n", - "plt.ylabel(\"matching efficiency\\nTrackingParticle to Track\")\n", - "plt.xlabel(\"TrackingParticle $\\eta$\")\n", - "plt.savefig(\"trackingparticle_matching_eff_eta.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(tp_pt.flatten(), bins=np.linspace(0,5,101));\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])\n", - "c1, _ = np.histogram(cp_e.flatten(), bins=bins)\n", - "c2, _ = np.histogram(cp_e[cp_idx_cluster!=-1].flatten(), bins=bins)\n", - "ratio = c2/c1\n", - "errs = np.sqrt(c2)/c1\n", - "ratio[np.isnan(ratio)] = 0\n", - "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n", - "plt.ylabel(\"matching efficiency\\nCaloParticle to Cluster\")\n", - "plt.xlabel(\"CaloParticle energy [GeV]\")\n", - "plt.savefig(\"caloparticle_matching_eff.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-6,6,100)\n", - "c1, _ = np.histogram(cp_eta.flatten(), bins=bins)\n", - "c2, _ = np.histogram(cp_eta[cp_idx_cluster!=-1].flatten(), bins=bins)\n", - "ratio = c2/c1\n", - "errs = np.sqrt(c2)/c1\n", - "ratio[np.isnan(ratio)] = 0\n", - "plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=\".\", lw=1, elinewidth=1)\n", - "plt.ylabel(\"matching efficiency\\nCaloParticle to Cluster\")\n", - "plt.xlabel(\"CaloParticle $\\eta$\")\n", - "plt.savefig(\"caloparticle_matching_eff_eta.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(cp_e.flatten(), bins=np.linspace(0,5,101));\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"CaloParticle energy [GeV]\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "a = cp_idx_cluster[cp_idx_cluster != -1]\n", - "vs = sum([\n", - " list(freqtable(_a, np.unique(_a)).values()) for _a in a\n", - "], [])\n", - "\n", - "plt.hist(vs, bins=np.linspace(0, 20, 21))\n", - "plt.xlabel(\"CaloParticles per Cluster\")\n", - "plt.ylabel(\"Number of Clusters\")\n", - "plt.yscale(\"log\")\n", - "plt.title(\"CaloParticles per Cluster: {:.2f}\".format(np.mean(vs)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "a = cl_idx_c[cl_idx_c != -1]\n", - "vs1 = sum([\n", - " list(freqtable(_a, np.unique(_a)).values()) for _a in a\n", - "], [])\n", - "\n", - "plt.hist(vs1, bins=np.linspace(0, 10, 11))\n", - "plt.xlabel(\"Cluster per Candidate\")\n", - "plt.ylabel(\"Number of Candidates\")\n", - "plt.yscale(\"log\")\n", - "plt.title(\"Cluster per Candidate: {:.2f}\".format(np.mean(vs1)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "a = t_idx_c[t_idx_c != -1]\n", - "vs2 = sum([\n", - " list(freqtable(_a, np.unique(_a)).values()) for _a in a\n", - "], [])\n", - "plt.hist(vs2, bins=np.linspace(0, 20, 11))\n", - "plt.xlabel(\"Tracks per Candidate\")\n", - "plt.ylabel(\"Number of Candidates\")\n", - "plt.yscale(\"log\")\n", - "plt.title(\"Tracks per Candidate: {:.2f}\".format(np.mean(vs2)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.mean(vs1), np.mean(vs2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(vs1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(vs2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(3*3, 2*3))\n", - "\n", - "plt.suptitle(\"GenParticle kinematics\", y=1.01)\n", - "plt.subplot(2, 3, 1)\n", - "plt.hist(tp_pt.flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"all TrackingParticles\", density=1);\n", - "plt.hist(tp_pt[tp_idx_track!=-1].flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"matched to reco track\", density=1);\n", - "plt.ylim(1e-3, 1000)\n", - "plt.yscale(\"log\")\n", - "plt.legend(frameon=False)\n", - "plt.xlabel(\"$p_T$ [GeV]\")\n", - "\n", - "plt.subplot(2, 3, 2)\n", - "plt.hist(tp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.hist(tp_eta[tp_idx_track!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"$\\eta$\")\n", - "\n", - "plt.subplot(2, 3, 3)\n", - "plt.hist(tp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.hist(tp_phi[tp_idx_track!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"$\\phi$\")\n", - "\n", - "plt.subplot(2, 3, 4)\n", - "plt.hist(cp_pt.flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"all CaloParticles\", density=1);\n", - "plt.hist(cp_pt[cp_idx_cluster!=-1].flatten(), bins=np.linspace(0,10,100), histtype=\"step\", lw=2, label=\"matched to reco cluster\", density=1);\n", - "plt.yscale(\"log\")\n", - "plt.ylim(1e-3, 100)\n", - "plt.legend(frameon=False)\n", - "plt.xlabel(\"$p_T$ [GeV]\")\n", - "\n", - "plt.subplot(2, 3, 5)\n", - "plt.hist(cp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.hist(cp_eta[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"$\\eta$\")\n", - "\n", - "plt.subplot(2, 3, 6)\n", - "plt.hist(cp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.hist(cp_phi[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype=\"step\", lw=2, density=1);\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"$\\phi$\")\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig(\"genparticle_to_reco.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "inds = np.random.permutation(range(len(tp_pt[tp_idx_track!=-1].flatten())))[:1000]\n", - "\n", - "plt.figure(figsize=(3*3, 2*3))\n", - "plt.suptitle(\"GenParticle to Track or Cluster\", y=1.01)\n", - "plt.subplot(2,3,1)\n", - "plt.scatter(\n", - " tp_pt[tp_idx_track!=-1].flatten()[inds],\n", - " t_pt[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n", - " alpha=0.2, marker=\".\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"TrackingParticle pT [GeV]\")\n", - "plt.ylabel(\"recoTrack pT [GeV]\")\n", - "plt.xlim(0.1, 100)\n", - "plt.ylim(0.1, 100)\n", - "\n", - "plt.subplot(2,3,2)\n", - "plt.scatter(\n", - " tp_eta[tp_idx_track!=-1].flatten()[inds],\n", - " t_eta[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n", - " alpha=0.2, marker=\".\")\n", - "plt.xlim(-4, 4)\n", - "plt.ylim(-4, 4)\n", - "plt.xlabel(\"TrackingParticle $\\eta$\")\n", - "plt.ylabel(\"recoTrack $\\eta$\")\n", - "\n", - "plt.subplot(2,3,3)\n", - "plt.scatter(\n", - " tp_phi[tp_idx_track!=-1].flatten()[inds],\n", - " t_phi[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],\n", - " alpha=0.2, marker=\".\")\n", - "plt.xlim(-4, 4)\n", - "plt.ylim(-4, 4)\n", - "plt.xlabel(\"TrackingParticle $\\phi$\")\n", - "plt.ylabel(\"recoTrack $\\phi$\")\n", - "\n", - "inds = np.random.permutation(range(len(cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten())))[:1000]\n", - "\n", - "plt.subplot(2,3,4)\n", - "plt.scatter(\n", - " cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n", - " cl_e[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"CaloParticle energy\")\n", - "plt.ylabel(\"PFCluster energy [GeV]\")\n", - "plt.xlim(0.1, 1000)\n", - "plt.ylim(0.1, 1000)\n", - "\n", - "plt.subplot(2,3,5)\n", - "plt.scatter(\n", - " cp_eta[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n", - " cl_eta[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n", - "plt.xlim(-6, 6)\n", - "plt.ylim(-6, 6)\n", - "plt.xlabel(\"CaloParticle $\\eta$\")\n", - "plt.ylabel(\"PFCluster $\\eta$\")\n", - "\n", - "plt.subplot(2,3,6)\n", - "plt.scatter(\n", - " cp_phi[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],\n", - " cl_phi[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=\".\")\n", - "plt.xlim(-4, 4)\n", - "plt.ylim(-4, 4)\n", - "plt.xlabel(\"CaloParticle $\\phi$\")\n", - "plt.ylabel(\"PFCluster $\\phi$\")\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig(\"genparticle_to_reco_scatter.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "unmatched_pids = c_pid[mc3].flatten()\n", - "matched_pids = c_pid[~mc3].flatten()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labels = np.unique(c_pid.flatten())\n", - "f1 = freqtable(matched_pids, labels)\n", - "f2 = freqtable(unmatched_pids, labels)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "numtot = len(c_pid.flatten())\n", - "b1 = np.array([f1[l]/numtot for l in labels])\n", - "b2 = np.array([f2[l]/numtot for l in labels])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "xs = np.arange(len(b1))\n", - "plt.bar(xs, b1, label=\"matched PFCandidates\")\n", - "plt.bar(xs, b2, bottom=b1, label=\"unmatched PFCandidates\")\n", - "plt.xticks(xs, labels);\n", - "plt.legend(frameon=False)\n", - "plt.ylabel(\"fraction of PFCandidates\")\n", - "plt.xlabel(\"PFCandidate PDGID\")\n", - "plt.title(\"PFCandidate GenParticle matching efficiency\")\n", - "plt.savefig(\"candidate_matching_efficiency.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labels=np.unique(c_pid.flatten())\n", - "cm = sklearn.metrics.confusion_matrix(cp_pid[c_idx_cp[mc2]].flatten(), c_pid[mc2].flatten(), labels=labels)\n", - "plot_confusion_matrix(cm, labels, normalize=False)\n", - "plt.title(\"CaloParticle to PFCandidate\")\n", - "plt.ylabel(\"CaloParticle PDGID\")\n", - "plt.xlabel(\"PFCandidate PDGID\")\n", - "plt.savefig(\"caloparticle_to_candidate_confusion.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cm = sklearn.metrics.confusion_matrix(tp_pid[c_idx_tp[mc]].flatten(), c_pid[mc].flatten(), labels=labels)\n", - "plot_confusion_matrix(cm, labels, normalize=False)\n", - "plt.title(\"TrackingParticle to PFCandidate\")\n", - "plt.ylabel(\"TrackingParticle PDGID\")\n", - "plt.xlabel(\"PFCandidate PDGID\")\n", - "plt.savefig(\"trackingparticle_to_pfcandidate_confusion.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "inds = np.random.permutation(range(len(tp_pt[c_idx_tp[mc]].flatten())))[:1000]\n", - "\n", - "plt.figure(figsize=(3*3, 2*3))\n", - "plt.suptitle(\"GenParticle to PFCandidate\", y=1.01)\n", - "plt.subplot(2,3,1)\n", - "plt.scatter(tp_pt[c_idx_tp[mc]].flatten()[inds], c_pt[mc].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"TrackingParticle $p_T$ [GeV]\")\n", - "plt.ylabel(\"PFCand $p_T$ [GeV]\")\n", - "plt.xlim(0.1, 100)\n", - "plt.ylim(0.1, 100)\n", - "\n", - "plt.subplot(2,3,2)\n", - "plt.scatter(tp_eta[c_idx_tp[mc]].flatten()[inds], c_eta[mc].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xlabel(\"TrackingParticle $\\eta$\")\n", - "plt.ylabel(\"PFCand $\\eta$\")\n", - "plt.xlim(-5, 5)\n", - "plt.ylim(-5, 5)\n", - "\n", - "plt.subplot(2,3,3)\n", - "plt.scatter(tp_phi[c_idx_tp[mc]].flatten()[inds], c_phi[mc].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xlabel(\"TrackingParticle $\\phi$\")\n", - "plt.ylabel(\"PFCand $\\phi$\")\n", - "plt.xlim(-4, 4)\n", - "plt.ylim(-4, 4)\n", - "\n", - "inds = np.random.permutation(range(len(cp_pt[c_idx_cp[mc2]].flatten())))[:1000]\n", - "\n", - "plt.subplot(2,3,4)\n", - "plt.scatter(cp_pt[c_idx_cp[mc2]].flatten()[inds], c_pt[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"CaloParticle $p_T$ [GeV]\")\n", - "plt.ylabel(\"PFCandidate $p_T$ [GeV]\")\n", - "plt.xlim(0.1, 100)\n", - "plt.ylim(0.1, 100)\n", - "\n", - "plt.subplot(2,3,5)\n", - "plt.scatter(cp_eta[c_idx_cp[mc2]].flatten()[inds], c_eta[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xlabel(\"CaloParticle $\\eta$\")\n", - "plt.ylabel(\"PFCandidate $\\eta$\")\n", - "plt.xlim(-5, 5)\n", - "plt.ylim(-5, 5)\n", - "\n", - "plt.subplot(2,3,6)\n", - "plt.scatter(cp_phi[c_idx_cp[mc2]].flatten()[inds], c_phi[mc2].flatten()[inds], marker=\".\", alpha=0.2)\n", - "plt.xlabel(\"CaloParticle $\\phi$\")\n", - "plt.ylabel(\"PFCandidate $\\phi$\")\n", - "plt.xlim(-4, 4)\n", - "plt.ylim(-4, 4)\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig(\"genparticle_to_candidate_scatter.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "bins = np.linspace(-0.5, 0.5, 101)\n", - "plt.subplot(2,1,1)\n", - "\n", - "plt.title(\"TrackingParticle to Track\")\n", - "plt.hist(((tp_pt[mtp] - t_pt[tp_idx_track[mtp]])/tp_pt[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$p_T$\");\n", - "plt.hist(((tp_eta[mtp] - t_eta[tp_idx_track[mtp]])/tp_eta[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\eta$\");\n", - "plt.hist(((tp_phi[mtp] - t_phi[tp_idx_track[mtp]])/tp_phi[mtp]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\phi$\");\n", - "plt.yscale(\"log\")\n", - "plt.legend(frameon=False)\n", - "plt.xlabel(\"(gen - reco) / gen\")\n", - "\n", - "\n", - "plt.subplot(2,1,2)\n", - "plt.title(\"TrackingParticle to PFCandidate\")\n", - "plt.hist(((tp_pt[c_idx_tp[mc]] - c_pt[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$p_T$\");\n", - "plt.hist(((tp_eta[c_idx_tp[mc]] - c_eta[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\eta$\");\n", - "plt.hist(((tp_phi[c_idx_tp[mc]] - c_phi[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype=\"step\", lw=2, label=\"$\\phi$\");\n", - "plt.yscale(\"log\")\n", - "plt.legend(frameon=False)\n", - "plt.xlabel(\"(gen - reco) / gen\")\n", - "\n", - "plt.tight_layout()\n", - "plt.savefig(\"reco_resolutions.pdf\", bbox_inches=\"tight\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/python_ntuple.ipynb b/notebooks/old/python_ntuple.ipynb deleted file mode 100644 index 98af593c9..000000000 --- a/notebooks/old/python_ntuple.ipynb +++ /dev/null @@ -1,695 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"-1\" \n", - "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", - "\n", - "import numpy as np\n", - "import glob\n", - "import matplotlib.pyplot as plt\n", - "import numba\n", - "from collections import Counter\n", - "import math\n", - "import sklearn\n", - "import sklearn.metrics\n", - "import sklearn.ensemble\n", - "import sklearn.cluster\n", - "import scipy.sparse\n", - "import keras\n", - "import sys\n", - "import pickle\n", - "import matplotlib\n", - "\n", - "sys.path += [\"../test\"]\n", - "from train_clustering import encode_triu, decode_triu\n", - "from train_regression import get_unique_X_y\n", - "from benchmark_solution import create_points\n", - "\n", - "from matplotlib.colors import LogNorm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@numba.njit\n", - "def get_types_in_block(X, y, blk):\n", - " return [int(x) for x in sorted(X[y==blk, 0])]\n", - "\n", - "def get_blocksize_candsize_matrix(el_bl_id, cand_bl_id):\n", - " blids = np.unique(el_bl_id)\n", - " sizes = np.zeros((len(blids), 2), dtype=np.float32)\n", - " i = 0\n", - " els_counter = Counter(el_bl_id)\n", - " cands_counter = Counter(cand_bl_id)\n", - " for bl in blids:\n", - " sizes[i, 0] = els_counter[bl]\n", - " sizes[i, 1] = cands_counter[bl]\n", - " i += 1\n", - " \n", - " b = np.linspace(0,20,21)\n", - " c, _, _ = np.histogram2d(sizes[:, 0], sizes[:, 1], bins=(b, b))\n", - " return c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load all elements" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_sgs = []\n", - "\n", - "num_clusters = []\n", - "num_tracks = []\n", - "num_cands = []\n", - "num_blocks = []\n", - "\n", - "blsize_candsize_matrices = []\n", - "\n", - "for fi in glob.glob(\"../data/TTbar_run3/*ev*.npz\"):\n", - " fi = open(fi, \"rb\")\n", - " data = np.load(fi)\n", - " \n", - " #list of PF input elements in the event\n", - " X = data[\"elements\"]\n", - " \n", - " #tracks have type=1\n", - " num_clusters += [np.sum(X[:, 0] != 1)]\n", - " num_tracks += [np.sum(X[:, 0] == 1)]\n", - " \n", - " #unique ID for each cluster/block of elements that the PFAlgo considered independently\n", - " #this can be considered as the target output of an improved PFBlockAlgo\n", - " y = data[\"element_block_id\"]\n", - " num_blocks += [len(np.unique(y))]\n", - "\n", - " #List of candidates produced in the event.\n", - " #This can be considered as the output of PFAlgo\n", - " cands = data[\"candidates\"]\n", - " num_cands += [len(cands)]\n", - "\n", - " #get the types of the elements for each cluster/block\n", - " sgs = [tuple(get_types_in_block(X, y, blk)) for blk in np.unique(y)]\n", - " all_sgs += sgs\n", - " \n", - " blsize_candsize_matrices += [get_blocksize_candsize_matrix(data[\"element_block_id\"], data[\"candidate_block_id\"])]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(0,20,21)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6,6))\n", - "cmat = sum(blsize_candsize_matrices)\n", - "plt.imshow(cmat, norm=LogNorm(vmin=1, vmax=10*np.sum(cmat)), origin=\"lower\", interpolation=None)\n", - "\n", - "plt.colorbar()\n", - "plt.xticks(bins);\n", - "plt.yticks(bins);\n", - "\n", - "plt.title(\"Miniblock size to number of\\nproduced PFCandidates\")\n", - "plt.xlabel(\"number of candidates\")\n", - "plt.ylabel(\"number of elements in block\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(bins[:-1], cmat.sum(axis=1).cumsum()/np.sum(cmat), marker=\"o\")\n", - "plt.xticks(bins);\n", - "plt.xlabel(\"maximum block size\")\n", - "plt.ylabel(\"fraction of candidates\")\n", - "plt.xlim(0,3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(bins[:-1], cmat.sum(axis=1).cumsum()/np.sum(cmat), marker=\"o\")\n", - "plt.xticks(bins);\n", - "plt.xlabel(\"maximum block size\")\n", - "plt.ylabel(\"fraction of candidates\")\n", - "plt.ylim(0.9, 1.0)\n", - "plt.xlim(2,20)\n", - "plt.savefig(\"cand_blocksize.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(num_clusters, bins=np.linspace(0, 5000, 100), label=\"clusters\", histtype=\"step\", lw=2);\n", - "plt.hist(num_tracks, bins=np.linspace(0, 5000, 100), label=\"tracks\", histtype=\"step\", lw=2);\n", - "plt.hist(num_blocks, bins=np.linspace(0, 5000, 100), label=\"blocks\", histtype=\"step\", lw=2);\n", - "plt.hist(num_cands, bins=np.linspace(0, 5000, 100), label=\"candidates\", histtype=\"step\", lw=2);\n", - "plt.legend(frameon=False)\n", - "plt.xlabel(\"number of els/cands/blocks\")\n", - "plt.ylabel(\"number of events\")\n", - "plt.savefig(\"num_elems.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we look at the number of blocks of a certain size." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "block_sizes = Counter([len(sg) for sg in all_sgs])\n", - "print(\"block sizes\", block_sizes)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist([len(sg) for sg in all_sgs], bins=np.linspace(0,100,101));\n", - "plt.xlabel(\"block size\")\n", - "plt.ylabel(\"Number of blocks\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist([len(sg) for sg in all_sgs], bins=np.linspace(0,100,101), histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")\n", - "plt.xlabel(\"block size\")\n", - "plt.ylabel(\"number of blocks\")\n", - "plt.savefig(\"block_sizes.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's look at what the blocks f size, 1, 2, 3 and 4 are made of." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_block_nelem(blocks_nelem):\n", - " kv = list(blocks_nelem.items())\n", - " xs = np.arange(len(kv))\n", - " ys = np.array([v for k, v in kv])\n", - "\n", - " plt.bar(xs, ys)\n", - " plt.xticks(xs, [k for k, v in kv], rotation=90)\n", - " \n", - "\n", - "for blocksize in range(1,5):\n", - " sizes = [\",\".join(map(str, sg)) for sg in all_sgs if len(sg)==blocksize]\n", - " blocks_nelem = Counter(sizes)\n", - " print(\"{0}-element blocks\".format(blocksize), blocks_nelem)\n", - " plt.figure(figsize=(4,4))\n", - " plt.title(\"Blocks of size {0}: {1} ({2:.0f}%)\".format(blocksize, len(sizes), 100.0*len(sizes)/len(all_sgs)))\n", - " plot_block_nelem(blocks_nelem)\n", - " plt.xlabel(\"Block element types\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Look at the first 10 blocks." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fi = open(\"../data/TTbar_run3/step3_ntuple_10_ev39.npz\", \"rb\")\n", - "data = np.load(fi)\n", - "\n", - "dm = scipy.sparse.load_npz(open(\"../data/TTbar_run3/step3_ntuple_10_dist39.npz\", \"rb\")).todense()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "largest_blocks = sorted(Counter(data[\"element_block_id\"]).items(), key=lambda x: x[1], reverse=True)\n", - "largest_blocks[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "block_ids = data[\"element_block_id\"]\n", - "inds_elem = np.arange(len(data[\"elements\"]))\n", - "inds_cand = np.arange(len(data[\"candidates\"]))\n", - "for blk, blksize in largest_blocks[:10]:\n", - " candidates_from_block = data[\"candidate_block_id\"] == blk\n", - " elems_in_block = data[\"element_block_id\"] == blk\n", - " tps = get_types_in_block(data[\"elements\"], data[\"element_block_id\"], blk)\n", - " print(\"in block\", blk, \"had the following elements: {0}\".format(Counter(tps)))\n", - " for ielem in inds_elem[elems_in_block]:\n", - " print(\" elements[{0}]: type={1} energy={2:.2f}\".format(ielem, int(data[\"elements\"][ielem, 0]), data[\"elements\"][ielem, 1]))\n", - " print(\"from which the following {0} candidates were produced\".format(len(inds_cand[candidates_from_block])))\n", - " for icand in inds_cand[candidates_from_block]:\n", - " print(\" candidates[{0}]: pdgid={1} pt={2:.2f}\".format(icand, int(data[\"candidates\"][icand, 0]), data[\"candidates\"][icand, 1]))\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Scratchpad" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# NONE=0,\n", - "# TRACK=1, \n", - "# PS1=2, \n", - "# PS2=3, \n", - "# ECAL=4, \n", - "# HCAL=5,\n", - "# GSF=6,\n", - "# BREM=7,\n", - "# HFEM=8,\n", - "# HFHAD=9,\n", - "# SC=10,\n", - "# HO=11" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import networkx as nx\n", - "import pandas" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def make_df(points_data, points_pos, points_to_elem, elems_block_id):\n", - " df = pandas.DataFrame(points_data.copy(),\n", - " columns=[\"id\", \"type\", \"layer\"],\n", - " index=points_data[:, 0])\n", - "\n", - " df[\"block_id\"] = [elems_block_id[points_to_elem[ip]] for ip in range(len(df))]\n", - " df[\"pos_eta\"] = np.array(points_pos[:, 0])\n", - " df[\"pos_phi\"] = np.array(points_pos[:, 1])\n", - " df[\"energy\"] = np.array(points_pos[:, 2])\n", - " df[\"size\"] = 1\n", - " df[\"symbol\"] = \"dot\"\n", - " df[\"color\"] = df[\"type\"]\n", - " df[\"layer\"] = 1 + 2*df[\"layer\"]\n", - "\n", - " df[\"pos_x\"] = 2*df[\"pos_eta\"]\n", - " df[\"pos_y\"] = df[\"layer\"]*np.cos(df[\"pos_phi\"])\n", - " df[\"pos_z\"] = df[\"layer\"]*np.sin(df[\"pos_phi\"])\n", - "\n", - " df.loc[df[\"type\"]==1, \"size\"] = 0.2\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import itertools\n", - "def color_largest_blocks(block_ids, highlight_blocks):\n", - " colors = []\n", - " cols_to_take = itertools.cycle([\"red\", \"green\", \"blue\", \"orange\", \"purple\", \"cyan\", \"yellow\", \"brown\"])\n", - " colmap = {t: next(cols_to_take) for t in highlight_blocks}\n", - " for i in block_ids:\n", - " if i in highlight_blocks:\n", - " colors.append(colmap[i])\n", - " else:\n", - " colors.append(\"gray\")\n", - " return colors\n", - "\n", - "\n", - "def cluster_pfblockalgo(Nelem, distance_matrix):\n", - " dm2 = distance_matrix.copy()\n", - " dm2[dm2>0] = 1\n", - " g = nx.from_numpy_matrix(dm2)\n", - "\n", - " block_id_aspf = np.zeros((Nelem, ), dtype=np.int32)\n", - " for ibl, conn in enumerate(nx.connected_components(g)):\n", - " block_id_aspf[np.array(list(conn), dtype=np.int32)] = ibl\n", - " return block_id_aspf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "import colorlover as cl\n", - "\n", - "def draw_plot(dfsel, highlight_blocks, point_to_point_link, title, layers_to_plot=[1,3,5,7], do_tracks=True):\n", - " \n", - " \n", - " msk_blocks = np.vstack([dfsel[\"block_id\"] == b for b in highlight_blocks]).sum(axis=0)>=1\n", - " msk_layers = np.vstack([dfsel[\"layer\"] == b for b in layers_to_plot]).sum(axis=0)>=1\n", - " \n", - " trk = (dfsel[\"type\"]==1) | (dfsel[\"type\"]==6)\n", - "\n", - " points_trk_blk = go.Scatter3d(\n", - " x=dfsel.loc[trk & msk_blocks & msk_layers, 'pos_x'].values,\n", - " y=dfsel.loc[trk&msk_blocks & msk_layers, 'pos_y'].values,\n", - " z=dfsel.loc[trk&msk_blocks & msk_layers, 'pos_z'].values,\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"cross\",\n", - " \"opacity\": 0.8,\n", - " \"size\": 5,\n", - " \"color\": color_largest_blocks(dfsel.loc[trk&msk_blocks&msk_layers, \"block_id\"], highlight_blocks),\n", - " #\"colorscale\": cl.scales['11']['qual'][\"Set3\"]\n", - " },\n", - " name=\"track point in block\"\n", - " )\n", - "\n", - " points_trk = go.Scatter3d(\n", - " x=dfsel.loc[trk & ~msk_blocks, 'pos_x'].values,\n", - " y=dfsel.loc[trk & ~msk_blocks, 'pos_y'].values,\n", - " z=dfsel.loc[trk & ~msk_blocks, 'pos_z'].values,\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"cross\",\n", - " \"opacity\": 0.05,\n", - " \"size\": 5,\n", - " \"color\": \"gray\"\n", - " #\"colorscale\": cl.scales['11']['qual'][\"Set3\"]\n", - " },\n", - " name=\"track point\"\n", - " )\n", - "\n", - " points_other_blk = go.Scatter3d(\n", - " x=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_x'].values,\n", - " y=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_y'].values,\n", - " z=dfsel.loc[(~trk) & msk_blocks & msk_layers, 'pos_z'].values,\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"circle\",\n", - " \"opacity\": 0.8,\n", - " \"size\": 5,\n", - " \"color\": color_largest_blocks(dfsel.loc[~trk&msk_blocks&msk_layers, \"block_id\"], highlight_blocks),\n", - " },\n", - " name=\"calo cluster in block\"\n", - " )\n", - "\n", - "\n", - " points_other = go.Scatter3d(\n", - " x=dfsel.loc[~trk & ~msk_blocks, 'pos_x'].values,\n", - " y=dfsel.loc[~trk & ~msk_blocks, 'pos_y'].values,\n", - " z=dfsel.loc[~trk & ~msk_blocks, 'pos_z'].values,\n", - " mode=\"markers\",\n", - " marker={\n", - " \"symbol\": \"circle\",\n", - " \"opacity\": 0.05,\n", - " \"size\": 5,\n", - " \"color\": \"gray\"\n", - " },\n", - " name=\"calo cluster\"\n", - " )\n", - "\n", - " line_points_x = []\n", - " line_points_y = []\n", - " line_points_z = []\n", - " \n", - " for ip in np.array(range(len(point_to_point_link))):\n", - " p0 = point_to_point_link[ip, 0]\n", - " p1 = point_to_point_link[ip, 1]\n", - " if dfsel.loc[p0, \"block_id\"] in highlight_blocks or dfsel.loc[p1, \"block_id\"] in highlight_blocks:\n", - " if p0 in dfsel.index and p1 in dfsel.index:\n", - " line_points_x += [dfsel.loc[p0, \"pos_x\"], dfsel.loc[p1, \"pos_x\"], None]\n", - " line_points_y += [dfsel.loc[p0, \"pos_y\"], dfsel.loc[p1, \"pos_y\"], None]\n", - " line_points_z += [dfsel.loc[p0, \"pos_z\"], dfsel.loc[p1, \"pos_z\"], None]\n", - "\n", - "\n", - " tracks = go.Scatter3d(\n", - " x=line_points_x,\n", - " y=line_points_y,\n", - " z=line_points_z,\n", - " mode=\"lines\",\n", - " opacity=0.2,\n", - " line={\"color\": \"black\"},\n", - " name=\"track between layers\")\n", - " \n", - " data=[\n", - " points_trk,\n", - " points_other,\n", - " points_trk_blk,\n", - " points_other_blk,\n", - " ]\n", - "\n", - " if do_tracks:\n", - " data += [tracks]\n", - " fig = go.Figure(data=data)\n", - "\n", - " fig.update_layout(\n", - " autosize=False,\n", - " width=700,\n", - " height=500,\n", - " margin=go.layout.Margin(\n", - " l=50,\n", - " r=0,\n", - " b=0,\n", - " t=50,\n", - " ),\n", - " title=title,\n", - " scene_camera={\n", - " \"eye\": dict(x=0.8, y=0.8, z=0.8)\n", - " }\n", - " )\n", - "\n", - " fig.show()\n", - " return fig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "points_data, points_pos, point_to_point_link, point_to_elem = create_points(data[\"elements\"])\n", - "df = make_df(points_data, points_pos, point_to_elem, data[\"element_block_id\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "largest_blocks = sorted(Counter(df[\"block_id\"]).items(), key=lambda x: x[1], reverse=True)\n", - "largest_blocks[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df, [22, 189, 229], point_to_point_link, \"PFAlgo-based true blocks\")\n", - "fig.write_image(\"blocks_true.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_pfalgo = make_df(points_data, points_pos, point_to_elem, cluster_pfblockalgo(len(data[\"elements\"]), dm))\n", - "largest_blocks = sorted(Counter(df_pfalgo[\"block_id\"][df_pfalgo[\"type\"]==1]).items(), key=lambda x: x[1], reverse=True)\n", - "largest_blocks[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, tracker surface\", [1])\n", - "fig.write_image(\"blocks_pfblockalgo_tracker.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, tracker surface\", [1], do_tracks=False)\n", - "fig.write_image(\"blocks_pfblockalgo_tracker_notracks.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, ECAL surface\", [3])\n", - "fig.write_image(\"blocks_pfblockalgo_ecal.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, ECAL surface\", [3], do_tracks=False)\n", - "fig.write_image(\"blocks_pfblockalgo_ecal_notracks.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, HCAL surface\", [5])\n", - "fig.write_image(\"blocks_pfblockalgo_hcal.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks, HCAL surface\", [5], do_tracks=False)\n", - "fig.write_image(\"blocks_pfblockalgo_hcal_notracks.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, [0, 1, 2], point_to_point_link, \"PFBlockAlgo-based blocks\")\n", - "fig.write_image(\"blocks_pfblockalgo.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df_pfalgo, np.unique(df_pfalgo[\"block_id\"]), point_to_point_link, \"PFBlockAlgo-based blocks\")\n", - "fig.write_image(\"blocks_pfblockalgo_all.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = draw_plot(df, np.unique(df[\"block_id\"]), point_to_point_link, \"PFBAlgo-based true blocks\",)\n", - "fig.write_image(\"blocks_true_all.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/simrec.ipynb b/notebooks/old/simrec.ipynb deleted file mode 100644 index 322c4ac75..000000000 --- a/notebooks/old/simrec.ipynb +++ /dev/null @@ -1,463 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import mplhep\n", - "plt.style.use(mplhep.style.CMS)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import scipy\n", - "import scipy.sparse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dm = scipy.sparse.load_npz(\"../dist_0.npz\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.imshow(dm.todense()>0.0, cmap=\"Greys\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gen = pd.read_csv(\"../gen_0.csv\", index_col=0)\n", - "reco = pd.read_csv(\"../reco_0.csv\", index_col=0)\n", - "\n", - "fi0 = np.load(\"../ev_0.npz\")\n", - "rg = fi0[\"reco_gen\"]\n", - "rc = fi0[\"reco_cand\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gen[\"num_matched\"] = (rg > 0.0).sum(axis=0)\n", - "reco[\"num_matched\"] = (rg > 0.0).sum(axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "gen[\"pt\"].hist(bins=np.linspace(0,10,100))\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Xs = []\n", - "ys = []\n", - "ycs = []\n", - "for i in range(10):\n", - " fi = np.load(\"../ev_{}.npz\".format(i))\n", - " X = fi[\"X\"]\n", - " y = fi[\"ygen\"]\n", - " yc = fi[\"ycand\"]\n", - " \n", - " Xs += [X]\n", - " ys += [y]\n", - " ycs += [yc]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.hist(rg[rg>0], bins=np.linspace(0,200,100));\n", - "# plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for igen in range(20):\n", - " idx_max = np.argmax(rg[:, igen])\n", - " inds_max = np.argsort(rg[:, igen])[::-1][:3]\n", - " rgs = rg[inds_max, igen]\n", - " print(igen, inds_max, rgs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", - "plt.imshow(rg>0.0, cmap=\"Greys\", interpolation='None')\n", - "plt.xlabel(\"genparticle index\")\n", - "plt.ylabel(\"recoparticle index\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", - "\n", - "rgs = rg>0.0\n", - "plt.hist(rgs.sum(axis=0), bins=np.linspace(0, 20, 21), density=1.0, histtype=\"step\", lw=2, label=\"gen\");\n", - "plt.hist(rgs.sum(axis=1), bins=np.linspace(0, 20, 21), density=1.0, histtype=\"step\", lw=2, label=\"reco\");\n", - "plt.legend()\n", - "#plt.yscale(\"log\")\n", - "plt.xlabel(\"number of associations\")\n", - "plt.ylabel(\"fraction of total\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", - "plt.imshow(rc>0.0, cmap=\"Greys\", interpolation='None')\n", - "plt.xlabel(\"PFCandidate index\")\n", - "plt.ylabel(\"recoparticle index\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5, 5))\n", - "rcs = rc>0.0\n", - "plt.hist(rcs.sum(axis=0), bins=np.linspace(0, 10, 11), density=1.0, histtype=\"step\", lw=2, label=\"candidate\");\n", - "plt.hist(rcs.sum(axis=1), bins=np.linspace(0, 10, 11), density=1.0, histtype=\"step\", lw=2, label=\"reco\");\n", - "plt.legend()\n", - "#plt.yscale(\"log\")\n", - "plt.xlabel(\"number of associations\")\n", - "plt.ylabel(\"fraction of total\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_id_pairs(yc[:, 0], X[:, 0])\n", - "plt.title(\"reco to PFCandidate\", y=1.0)\n", - "plt.ylabel(\"PFCandidate PDGID\")\n", - "plt.xlabel(\"Reco object type\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_id_pairs(yc[m2, 0], y[m2, 0])\n", - "plt.title(\"gen to PFCandidate\", y=1.0)\n", - "plt.xlabel(\"GenParticle PDGID\")\n", - "plt.ylabel(\"PFCandidate PDGID\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "has_cand = (yc[:, 0] != 0)\n", - "has_gen = (y[:, 0] != 0)\n", - "\n", - "is_track = X[:, 0] == 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(X[has_gen, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Counter(X[~has_gen, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X[is_track].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X[is_track & has_cand & has_gen].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X[is_track & has_cand & ~has_gen].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X[is_track & ~has_cand].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "yc[has_cand & has_gen, 0].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "yc[has_cand & ~has_gen, 0].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pids = np.unique(yc[has_cand, 0])\n", - "c1 = Counter(yc[has_cand&has_gen, 0])\n", - "c2 = Counter(yc[has_cand&~has_gen, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "yvals = [c1[p]/np.sum(has_cand) for p in pids]\n", - "yvals2 = [c2[p]/np.sum(has_cand) for p in pids]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(9,5))\n", - "xs = np.arange(len(pids))\n", - "plt.bar(xs, yvals, label=\"matched\")\n", - "plt.bar(xs, yvals2, bottom=yvals, label=\"not matched\")\n", - "plt.xticks(xs, [int(x) for x in pids]);\n", - "plt.ylabel(\"fraction of total candidates\")\n", - "plt.xlabel(\"PFCandidate PDGID\")\n", - "plt.title(\"PFCandidate to Gen match\")\n", - "plt.legend()\n", - "plt.ylim(0,0.4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df2 = pd.DataFrame(np.hstack([X, y, yc]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df2[df2[0]==4]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "randinds = np.random.permutation(range(len(df2)))[:100]\n", - "df3 = df2[[0, 4, 6, 12]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df3.loc[randinds]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "ax = plt.axes()\n", - "\n", - "bigmask = (np.abs(X[:, 2]) < 0.2) & (np.abs(X[:, 3]) < 0.2)\n", - "plt.scatter(X[bigmask, 2], X[bigmask, 3], marker=\".\", label=\"reco\")\n", - "plt.scatter(y[bigmask & m, 2], y[bigmask & m, 3], marker=\"x\", label=\"gen\")\n", - "plt.scatter(yc[bigmask & m2, 2], yc[bigmask & m2, 3], marker=\".\", label=\"PF\")\n", - "\n", - "for idx in np.nonzero(bigmask)[0]:\n", - " _x1, _y1 = X[idx, 2], X[idx, 3]\n", - " _x2, _y2 = y[idx, 2], y[idx, 3]\n", - " _x3, _y3 = yc[idx, 2], yc[idx, 3]\n", - " if _x2 != 0 and abs(_x2) < 0.2 and abs(_y2) < 0.2:\n", - " plt.plot([_x1, _x2], [_y1, _y2], color=\"gray\")\n", - " if _x3 != 0 and abs(_x3) < 0.2 and abs(_y3) < 0.2:\n", - " plt.plot([_x1, _x3], [_y1, _y3], color=\"gray\")\n", - " \n", - "plt.xlim(-0.2, 0.2)\n", - "plt.ylim(-0.2, 0.2)\n", - "plt.xlabel(\"eta\")\n", - "plt.ylabel(\"phi\")\n", - "plt.legend(loc=(1.01,0.1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.legend?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/tensorflow-model.ipynb b/notebooks/old/tensorflow-model.ipynb deleted file mode 100644 index a0282a6d4..000000000 --- a/notebooks/old/tensorflow-model.ipynb +++ /dev/null @@ -1,438 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", - "\n", - "import pickle\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from sklearn.metrics import confusion_matrix, accuracy_score" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import keras\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from keras.layers import Input, Dense\n", - "from keras.models import Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "elem_labels = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]\n", - "class_labels = [0., -211., -13., -11., 1., 2., 11.0, 13., 22., 130., 211.]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Xs = []\n", - "ys = []\n", - "for iev in range(1, 60):\n", - " for ifile in range(10):\n", - " data = pickle.load(open(\"../data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/pfntuple_{}_{}.pkl\".format(iev, ifile), \"rb\"), encoding='iso-8859-1')\n", - " Xelem = data[0][\"Xelem\"]\n", - " ygen = data[0][\"ygen\"]\n", - " Xelem[:, 0] = [int(elem_labels.index(i)) for i in Xelem[:, 0]]\n", - " ygen[:, 0] = [int(class_labels.index(i)) for i in ygen[:, 0]]\n", - " Xs += [Xelem.copy()]\n", - " ys += [ygen.copy()]\n", - " del data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Xs_training = Xs[:500]\n", - "ys_training = ys[:500]\n", - "\n", - "Xs_testing = Xs[500:]\n", - "ys_testing = ys[500:]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def dist(A,B):\n", - " na = tf.reduce_sum(tf.square(A), 1)\n", - " nb = tf.reduce_sum(tf.square(B), 1)\n", - "\n", - " na = tf.reshape(na, [-1, 1])\n", - " nb = tf.reshape(nb, [1, -1])\n", - " D = tf.sqrt(tf.maximum(na - 2*tf.matmul(A, B, False, True) + nb, 0.0))\n", - " return D" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class InputEncoding(tf.keras.layers.Layer):\n", - " def __init__(self, num_input_classes):\n", - " super(InputEncoding, self).__init__()\n", - " self.num_input_classes = num_input_classes\n", - " \n", - " def call(self, X):\n", - " Xid = tf.one_hot(tf.cast(X[:, 0], tf.int32), self.num_input_classes)\n", - " Xprop = X[:, 1:]\n", - " return tf.concat([Xid, Xprop], axis=-1)\n", - " \n", - "class Distance(tf.keras.layers.Layer):\n", - "\n", - " def __init__(self, *args, **kwargs):\n", - " super(Distance, self).__init__(*args, **kwargs)\n", - "\n", - " def call(self, inputs):\n", - " \n", - " #compute the pairwise distance matrix between the vectors defined by the first two components of the input array\n", - " D = dist(inputs[:, :2], inputs[:, :2])\n", - " \n", - " #closer nodes have higher weight, could also consider exp(-D) or such here\n", - " D = tf.math.divide_no_nan(1.0, D)\n", - " \n", - " #turn edges on or off based on activation with an arbitrary shift parameter\n", - " D = tf.keras.activations.sigmoid(D - 5.0)\n", - " \n", - " #keep only upper triangular matrix (unidirectional edges)\n", - " D = tf.linalg.band_part(D, 0, -1)\n", - " return D\n", - " \n", - "class GraphConv(tf.keras.layers.Dense):\n", - " def __init__(self, *args, **kwargs):\n", - " super(GraphConv, self).__init__(*args, **kwargs)\n", - " \n", - " def call(self, inputs, adj):\n", - " W = self.weights[0]\n", - " b = self.weights[1]\n", - " support = tf.matmul(inputs, W) + b\n", - " out = tf.matmul(adj, support)\n", - " return self.activation(out)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class PFNet(tf.keras.Model):\n", - " \n", - " def __init__(self, activation=tf.keras.activations.relu, hidden_dim=256):\n", - " super(PFNet, self).__init__()\n", - " self.enc = InputEncoding(len(elem_labels))\n", - " self.layer_input1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input1\")\n", - " self.layer_input2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input2\")\n", - " self.layer_input3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"input3\")\n", - " \n", - " self.layer_dist = Distance(name=\"distance\")\n", - " self.layer_conv = GraphConv(hidden_dim, activation=activation, name=\"conv\")\n", - " \n", - " self.layer_id1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id1\")\n", - " self.layer_id2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id2\")\n", - " self.layer_id3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"id3\")\n", - " self.layer_id = tf.keras.layers.Dense(len(class_labels), activation=\"linear\", name=\"out_id\")\n", - " \n", - " self.layer_momentum1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum1\")\n", - " self.layer_momentum2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum2\")\n", - " self.layer_momentum3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name=\"momentum3\")\n", - " self.layer_momentum = tf.keras.layers.Dense(3, activation=\"linear\", name=\"out_momentum\")\n", - " \n", - " def call(self, inputs):\n", - " x = self.enc(inputs)\n", - " x = self.layer_input1(x)\n", - " x = self.layer_input2(x)\n", - " x = self.layer_input3(x)\n", - " \n", - " dm = self.layer_dist(x)\n", - " x = self.layer_conv(x, dm)\n", - " \n", - " a = self.layer_id1(x)\n", - " a = self.layer_id2(a)\n", - " a = self.layer_id3(a)\n", - " out_id = self.layer_id(a)\n", - " \n", - " b = self.layer_momentum1(x)\n", - " b = self.layer_momentum2(b)\n", - " b = self.layer_momentum3(b)\n", - " out_momentum = self.layer_momentum(b)\n", - " \n", - " return out_id, out_momentum, dm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = PFNet(hidden_dim=256)\n", - "opt = tf.keras.optimizers.Adam(lr=0.001)\n", - "ret = model(Xs[0]);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(ret[2].numpy().flatten(), bins=np.linspace(0,1,100));" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def loss(model, inputs, targets, epoch, training):\n", - " pred_id, pred_momentum, _ = model(inputs)\n", - " pred_inds = tf.argmax(pred_id, axis=-1)\n", - " #mask_correct = (pred_inds==targets[:, 0])\n", - " l1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n", - " tf.one_hot(tf.cast(targets[:, 0], tf.int32), depth=len(class_labels)), pred_id))\n", - " l0 = 0*tf.reduce_mean(tf.keras.losses.mse(targets[:, 1:4], pred_momentum[:]))\n", - " return l1 + l0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def grad(model, inputs, targets, epoch):\n", - " with tf.GradientTape() as tape:\n", - " loss_value = loss(model, inputs, targets, epoch, training=True)\n", - " return loss_value, tape.gradient(loss_value, model.trainable_variables)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in model.trainable_variables]\n", - "\n", - "for i in range(10):\n", - " loss_tot = 0.0\n", - " \n", - " ibatch = 0\n", - " \n", - " true_ids = []\n", - " pred_ids = []\n", - " \n", - " for Xelem, ygen in zip(Xs_training, ys_training):\n", - " loss_value, grads = grad(model, Xelem, ygen, i)\n", - " for igrad, gv in enumerate(grads):\n", - " accum_vars[igrad].assign_add(gv)\n", - " \n", - " loss_tot += loss_value.numpy()\n", - " if ibatch == 5:\n", - " opt.apply_gradients([(accum_vars[igrad] / 5, model.trainable_variables[igrad]) for igrad in range(len(accum_vars))])\n", - " ibatch = 0\n", - " for igrad in range(len(accum_vars)):\n", - " accum_vars[igrad].assign(tf.zeros_like(accum_vars[igrad]))\n", - "\n", - " pred_id, pred_momentum, dm = model(Xelem)\n", - " pred_ids += [tf.argmax(pred_id, axis=-1).numpy()]\n", - " true_ids += [ygen[:, 0]]\n", - " ibatch += 1\n", - " true_ids = np.concatenate(true_ids)\n", - " pred_ids = np.concatenate(pred_ids)\n", - " \n", - " true_ids_testing = []\n", - " pred_ids_testing = []\n", - " loss_tot_testing = 0.0\n", - " for Xelem, ygen in zip(Xs_testing, ys_testing):\n", - " pred_id, pred_momentum, _ = model(Xelem)\n", - " true_ids_testing += [ygen[:, 0]]\n", - " pred_ids_testing += [tf.argmax(pred_id, axis=-1).numpy()]\n", - " true_ids_testing = np.concatenate(true_ids_testing)\n", - " pred_ids_testing = np.concatenate(pred_ids_testing)\n", - "\n", - " acc = accuracy_score(true_ids, pred_ids)\n", - " acc_testing = accuracy_score(true_ids_testing, pred_ids_testing)\n", - " print(\"epoch={epoch} loss={loss:.2f} acc={acc:.4f}/{acc_testing:.4f}\".format(epoch=i, loss=loss_tot, acc=acc, acc_testing=acc_testing))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_confusion_matrix(cm,\n", - " target_names,\n", - " title='Confusion matrix',\n", - " cmap=None,\n", - " normalize=True):\n", - " \"\"\"\n", - " given a sklearn confusion matrix (cm), make a nice plot\n", - "\n", - " Arguments\n", - " ---------\n", - " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", - "\n", - " target_names: given classification classes such as [0, 1, 2]\n", - " the class names, for example: ['high', 'medium', 'low']\n", - "\n", - " title: the text to display at the top of the matrix\n", - "\n", - " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", - " see http://matplotlib.org/examples/color/colormaps_reference.html\n", - " plt.get_cmap('jet') or plt.cm.Blues\n", - "\n", - " normalize: If False, plot the raw numbers\n", - " If True, plot the proportions\n", - "\n", - " Usage\n", - " -----\n", - " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", - " # sklearn.metrics.confusion_matrix\n", - " normalize = True, # show proportions\n", - " target_names = y_labels_vals, # list of names of the classes\n", - " title = best_estimator_name) # title of graph\n", - "\n", - " Citiation\n", - " ---------\n", - " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", - "\n", - " \"\"\"\n", - " import matplotlib.pyplot as plt\n", - " import numpy as np\n", - " import itertools\n", - "\n", - " accuracy = np.trace(cm) / float(np.sum(cm))\n", - " misclass = 1 - accuracy\n", - "\n", - " if cmap is None:\n", - " cmap = plt.get_cmap('Blues')\n", - "\n", - " if normalize:\n", - " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " cm[np.isnan(cm)] = 0.0\n", - "\n", - " plt.figure(figsize=(8, 6))\n", - " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", - " plt.title(title)\n", - " plt.colorbar()\n", - "\n", - " if target_names is not None:\n", - " tick_marks = np.arange(len(target_names))\n", - " plt.xticks(tick_marks, target_names, rotation=45)\n", - " plt.yticks(tick_marks, target_names)\n", - "\n", - " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", - " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", - " if normalize:\n", - " plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - " else:\n", - " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - "\n", - "\n", - " plt.ylabel('True label')\n", - " plt.xlim(-1, len(target_names))\n", - " plt.ylim(-1, len(target_names))\n", - " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", - " plt.tight_layout()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "true_ids_testing = np.array(true_ids_testing)\n", - "pred_ids_testing = np.array(pred_ids_testing)\n", - "msk = (true_ids_testing!=0) & (pred_ids_testing!=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cm = confusion_matrix(true_ids_testing[msk], pred_ids_testing[msk], range(1,len(class_labels)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_confusion_matrix(cm, class_labels[1:], normalize=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/test_delphes.ipynb b/notebooks/old/test_delphes.ipynb deleted file mode 100644 index 8cee079ba..000000000 --- a/notebooks/old/test_delphes.ipynb +++ /dev/null @@ -1,705 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.chdir(\"../delphes\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "from torch_geometric.data import Dataset, DataLoader\n", - "import train\n", - "from sklearn.metrics import confusion_matrix\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_confusion_matrix(cm,\n", - " target_names,\n", - " title='Confusion matrix',\n", - " cmap=None,\n", - " normalize=True):\n", - " \"\"\"\n", - " given a sklearn confusion matrix (cm), make a nice plot\n", - "\n", - " Arguments\n", - " ---------\n", - " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", - "\n", - " target_names: given classification classes such as [0, 1, 2]\n", - " the class names, for example: ['high', 'medium', 'low']\n", - "\n", - " title: the text to display at the top of the matrix\n", - "\n", - " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", - " see http://matplotlib.org/examples/color/colormaps_reference.html\n", - " plt.get_cmap('jet') or plt.cm.Blues\n", - "\n", - " normalize: If False, plot the raw numbers\n", - " If True, plot the proportions\n", - "\n", - " Usage\n", - " -----\n", - " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", - " # sklearn.metrics.confusion_matrix\n", - " normalize = True, # show proportions\n", - " target_names = y_labels_vals, # list of names of the classes\n", - " title = best_estimator_name) # title of graph\n", - "\n", - " Citiation\n", - " ---------\n", - " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", - "\n", - " \"\"\"\n", - " import matplotlib.pyplot as plt\n", - " import numpy as np\n", - " import itertools\n", - "\n", - " accuracy = np.trace(cm) / float(np.sum(cm))\n", - " misclass = 1 - accuracy\n", - "\n", - " if cmap is None:\n", - " cmap = plt.get_cmap('Blues')\n", - "\n", - " if normalize:\n", - " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " cm[np.isnan(cm)] = 0.0\n", - "\n", - " plt.figure(figsize=(8, 6))\n", - " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", - " plt.title(title)\n", - " plt.colorbar()\n", - "\n", - " if target_names is not None:\n", - " tick_marks = np.arange(len(target_names))\n", - " plt.xticks(tick_marks, target_names, rotation=45)\n", - " plt.yticks(tick_marks, target_names)\n", - "\n", - " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", - " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", - " if normalize:\n", - " plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - " else:\n", - " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - "\n", - "\n", - " plt.ylabel('True label')\n", - " plt.xlim(-1, len(target_names))\n", - " plt.ylim(-1, len(target_names))\n", - " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", - " plt.tight_layout()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "device = torch.device(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds = train.DelphesDataset(\".\", 5000)\n", - "ds.raw_dir = \"raw2\"\n", - "ds.processed_dir = \"processed2\"\n", - "ds = torch.utils.data.Subset(ds, np.arange(start=4000, stop=5000))\n", - "d = DataLoader(ds, batch_size=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = train.PFNet(10, 512).to(device=device)\n", - "model.load_state_dict(torch.load(\"model_20.pth\"))\n", - "model.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n", - "params = sum([np.prod(p.size()) for p in model_parameters])\n", - "print(\"model has {:.2E} parameters\".format(params))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cms = []\n", - "cms2 = []\n", - "pred_ps = []\n", - "true_ps = []\n", - "pred_ps2 = []\n", - "true_ps2 = []\n", - "ncand_true = []\n", - "ncand_pred = []\n", - "\n", - "for _d in d:\n", - " _d = _d.to(device=device)\n", - " \n", - " cands_tower_id, cands_trk_id, cands_tower_p, cands_trk_p = model(_d)\n", - " \n", - " cands_tower_p[:, 0] = torch.exp(cands_tower_p[:, 0])\n", - " cands_trk_p[:, 0] = torch.exp(cands_trk_p[:, 0])\n", - " \n", - " pred_ids_tower = model.decode_ids(cands_tower_id)\n", - " true_ids_tower = _d.y_tower[:, 0]\n", - " \n", - " pred_ids_trk = model.decode_ids(cands_trk_id)\n", - " true_ids_trk = _d.y_trk[:, 0]\n", - " \n", - "# y_id_pred = torch.argmax(_pred_id, axis=-1)\n", - "# _pred_p[y_id_pred!=0, 0] = torch.exp(_pred_p[y_id_pred!=0, 0])\n", - "\n", - " cm = confusion_matrix(true_ids_trk.flatten().cpu(), pred_ids_trk.flatten().detach().cpu(),\n", - " labels=range(len(train.map_candid_to_numid)))\n", - " cm2 = confusion_matrix(true_ids_tower.flatten().cpu(), pred_ids_tower.flatten().detach().cpu(),\n", - " labels=range(len(train.map_candid_to_numid)))\n", - " \n", - " msk = (pred_ids_tower!=0) & (true_ids_tower!=0)\n", - " pred_ps += cands_tower_p[msk].detach().cpu()\n", - " true_ps += _d.y_tower[msk][:, 1:].detach().cpu()\n", - " \n", - " msk = (pred_ids_trk!=0) & (true_ids_trk!=0)\n", - " pred_ps2 += cands_trk_p[msk].detach().cpu()\n", - " true_ps2 += _d.y_trk[msk][:, 1:].detach().cpu()\n", - " \n", - " cms += [cm]\n", - " cms2 += [cm2]\n", - " \n", - " ncand_true += [int((true_ids_tower!=0).sum() + (true_ids_trk!=0).sum())]\n", - " ncand_pred += [int((pred_ids_tower!=0).sum() + (pred_ids_trk!=0).sum())]\n", - "cm = sum(cms)\n", - "cm2 = sum(cms2)\n", - "pred_p = np.stack(pred_ps)\n", - "true_p = np.stack(true_ps)\n", - "pred_p2 = np.stack(pred_ps2)\n", - "true_p2 = np.stack(true_ps2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import networkx as nx\n", - "import torch_geometric" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adj_matrix = torch_geometric.utils.to_dense_adj(_d.edge_index).cpu().numpy()[0, :, :]\n", - "x = _d.x.cpu().numpy()\n", - "# y_p = _d.y_tower.cpu().numpy()\n", - "# y_p_pred = _pred_p[(y_id_pred!=0).cpu().numpy(), :].detach().cpu().numpy()\n", - "\n", - "# y_id = _d.y_id.cpu().numpy()\n", - "\n", - "colors = {\n", - " 0: \"r\",\n", - " 1: \"b\",\n", - "}\n", - "\n", - "g = nx.from_numpy_matrix(adj_matrix)\n", - "for i in range(len(x)):\n", - " g.nodes[i][\"color\"] = colors[x[i, 0]]\n", - " g.nodes[i][\"s\"] = np.abs(x[i, 4] + x[i, 3])\n", - " if x[i, 0] == 0:\n", - " g.nodes[i][\"eta\"] = x[i, 1]\n", - " g.nodes[i][\"phi\"] = x[i, 2]\n", - " elif x[i, 0] == 1:\n", - " g.nodes[i][\"eta\"] = x[i, 5]\n", - " g.nodes[i][\"phi\"] = x[i, 6]\n", - " \n", - "pos = {i: (g.nodes[i][\"eta\"], g.nodes[i][\"phi\"]) for i in range(len(g))}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10, 10))\n", - "plt.scatter(\n", - " x[:, 1],\n", - " x[:, 2],\n", - " s=np.abs(x[:, 4] + x[:, 3]),\n", - " label=\"det\")\n", - "\n", - "m = _d.y_tower[:, 0] != 0\n", - "plt.scatter(\n", - " _d.y_tower[m][:, 2].cpu(),\n", - " _d.y_tower[m][:, 3].cpu(),\n", - " s=_d.y_tower[m][:, 1].cpu(),\n", - " label=\"gen\")\n", - "\n", - "m = _d.y_trk[:, 0] != 0\n", - "plt.scatter(\n", - " _d.y_trk[m][:, 2].cpu(),\n", - " _d.y_trk[m][:, 3].cpu(),\n", - " s=_d.y_trk[m][:, 1].cpu(),\n", - " label=\"gen-trk\")\n", - "\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = plt.figure(figsize=(10, 10))\n", - "plt.title(\"input graph\")\n", - "edges = np.array(list(g.edges))\n", - "edges = list(edges[np.random.permutation(len(edges))][:500])\n", - "\n", - "nx.draw_networkx(g, pos,\n", - " node_size=[g.nodes[n][\"s\"] for n in g.nodes],\n", - " node_color=[g.nodes[n][\"color\"] for n in g.nodes],\n", - " with_labels=False, alpha=0.2, edgelist=edges)\n", - "plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)\n", - "plt.xlim(-6, 6)\n", - "plt.ylim(-4, 4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(3*4, 4))\n", - "\n", - "ax = plt.subplot(1,3,1)\n", - "plt.title(\"input graph\")\n", - "nx.draw_networkx(g, pos,\n", - " node_size=[g.nodes[n][\"s\"] for n in g.nodes],\n", - " node_color=[g.nodes[n][\"color\"] for n in g.nodes],\n", - " with_labels=False, alpha=0.2, ax=ax)\n", - "ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)\n", - "plt.xlim(-6, 6)\n", - "plt.ylim(-4, 4)\n", - "\n", - "plt.subplot(1,3,2)\n", - "m = _d.y_tower[:, 0] != 0\n", - "plt.scatter(\n", - " _d.y_tower[m][:, 2].cpu(),\n", - " _d.y_tower[m][:, 3].cpu(),\n", - " s=_d.y_tower[m][:, 1].cpu(),\n", - " label=\"gen-tower\")\n", - "\n", - "m = pred_ids_tower != 0\n", - "plt.scatter(\n", - " cands_tower_p[m][:, 1].detach().cpu(),\n", - " cands_tower_p[m][:, 2].detach().cpu(),\n", - " s=cands_tower_p[m][:, 0].detach().cpu(), alpha=0.5, label=\"pred\")\n", - "plt.xlim(-6, 6)\n", - "plt.ylim(-4, 4)\n", - "plt.tight_layout()\n", - "plt.legend(loc=1)\n", - "\n", - "plt.subplot(1,3,3)\n", - "m = _d.y_trk[:, 0] != 0\n", - "plt.scatter(\n", - " _d.y_trk[m][:, 2].cpu(),\n", - " _d.y_trk[m][:, 3].cpu(),\n", - " s=_d.y_trk[m][:, 1].cpu(),\n", - " label=\"gen-trk\")\n", - "\n", - "m = pred_ids_trk != 0\n", - "plt.scatter(\n", - " cands_trk_p[m][:, 1].detach().cpu(),\n", - " cands_trk_p[m][:, 2].detach().cpu(),\n", - " s=cands_trk_p[m][:, 0].detach().cpu(), alpha=0.5, label=\"pred\")\n", - "plt.xlim(-6, 6)\n", - "plt.ylim(-4, 4)\n", - "plt.tight_layout()\n", - "plt.legend(loc=1)\n", - "\n", - "plt.savefig(\"test.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "target_names = [\"none\", \"211\", \"-211\", \"130\", \"22\", \"11\", \"-11\", \"13\", \"-13\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_confusion_matrix(cm, target_names, normalize=False)\n", - "plt.xlim(-0.5, cm.shape[0]-0.5)\n", - "plt.ylim(-0.5, cm.shape[0]-0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_confusion_matrix(cm2, target_names, normalize=False)\n", - "plt.xlim(-0.5, cm.shape[0]-0.5)\n", - "plt.ylim(-0.5, cm.shape[0]-0.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.xlim(0, 3000)\n", - "plt.ylim(0, 3000)\n", - "plt.plot([0,5000],[0,5000], color=\"black\")\n", - "plt.scatter(ncand_true, ncand_pred, marker=\".\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(0, 10, 100)\n", - "plt.hist(true_p[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")\n", - "#plt.ylim(100,1e4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(0, 10, 100)\n", - "plt.hist(true_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")\n", - "#plt.ylim(100,1e4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(0, 100, 100)\n", - "plt.hist(true_p[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(0, 100, 100)\n", - "plt.hist(true_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p2[:, 0], bins=b, histtype=\"step\", lw=2);\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(-6, 6, 100)\n", - "plt.hist(true_p[:, 1], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p[:, 1], bins=b, histtype=\"step\", lw=2);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(-6, 6, 100)\n", - "plt.hist(true_p2[:, 1], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p2[:, 1], bins=b, histtype=\"step\", lw=2);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(-4, 4, 100)\n", - "plt.hist(true_p[:, 2], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p[:, 2], bins=b, histtype=\"step\", lw=2);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(6, 4))\n", - "b = np.linspace(-4, 4, 100)\n", - "plt.hist(true_p2[:, 2], bins=b, histtype=\"step\", lw=2);\n", - "plt.hist(pred_p2[:, 2], bins=b, histtype=\"step\", lw=2);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rp = np.random.permutation(len(pred_p))[:5000]\n", - "rp2 = np.random.permutation(len(pred_p2))[:5000]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.corrcoef(true_p[:, 0], pred_p[:, 0])[0,1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.corrcoef(true_p2[:, 0], pred_p2[:, 0])[0,1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.scatter(true_p[rp, 0], pred_p[rp, 0], marker=\".\", alpha=0.5)\n", - "plt.xlim(0, 10)\n", - "plt.ylim(0, 10)\n", - "plt.plot([0, 10], [0, 10], color=\"black\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.scatter(true_p[rp, 0], pred_p[rp, 0], marker=\".\", alpha=0.5)\n", - "plt.xlim(0.1, 1000)\n", - "plt.ylim(0.1, 1000)\n", - "plt.plot([0.1, 1000], [0.1, 1000], color=\"black\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.scatter(true_p2[rp2, 0], pred_p2[rp2, 0], marker=\".\", alpha=0.5)\n", - "plt.xlim(0.1, 1000)\n", - "plt.ylim(0.1, 1000)\n", - "plt.plot([0.1, 1000], [0.1, 1000], color=\"black\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.scatter(true_p[rp, 1], pred_p[rp, 1], marker=\".\", alpha=0.5)\n", - "plt.plot([-7, 7], [-7, 7], color=\"black\")\n", - "plt.xlim(-7,7)\n", - "plt.ylim(-7,7)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10,10))\n", - "plt.scatter(true_p[rp, 2], pred_p[rp, 2], marker=\".\", alpha=0.5)\n", - "plt.plot([-4, 4], [-4, 4], color=\"black\")\n", - "plt.xlim(-4,4)\n", - "plt.ylim(-4,4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = _d.x[:, 0] == 1\n", - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 1].cpu(), _d.y_trk[:, 2].cpu(), marker=\".\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 2].cpu(), _d.y_trk[:, 3].cpu(), marker=\".\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = _d.x[:, 0] == 0\n", - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 1].cpu(), _d.y_tower[:, 2].cpu(), marker=\".\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = _d.x[:, 0] == 0\n", - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 2].cpu(), _d.y_tower[:, 3].cpu(), marker=\".\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = _d.x[:, 0] == 0\n", - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 3].cpu() + _d.x[msk, 4].cpu(), _d.y_tower[:, 1].cpu(), marker=\".\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = _d.x[:, 0] == 1\n", - "plt.figure(figsize=(4,4))\n", - "plt.scatter(_d.x[msk, 4].cpu(), _d.y_trk[:, 1].cpu(), marker=\".\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/old/test_end2end.ipynb b/notebooks/old/test_end2end.ipynb deleted file mode 100644 index 8ae7950b9..000000000 --- a/notebooks/old/test_end2end.ipynb +++ /dev/null @@ -1,1257 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sklearn\n", - "import sklearn.metrics\n", - "\n", - "import numpy as np\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "import pandas\n", - "import mplhep\n", - "\n", - "import sys\n", - "sys.path += [\"../test\"]\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tf_model import class_labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):\n", - " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)\n", - " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)\n", - " plt.figtext(x2, y,'Run 3 (14 TeV)', wrap=True, horizontalalignment='left', fontsize=10)\n", - "\n", - "def sample_label(ax, y=0.98):\n", - " plt.text(0.03, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)\n", - " \n", - "pid_to_text = {\n", - " 211: r\"charged hadrons ($\\pi^-$, ...)\",\n", - " -211: r\"charged hadrons ($\\pi^+$, ...)\",\n", - " 130: r\"neutral hadrons (K, ...)\",\n", - " 1: r\"HF hadron (EM)\",\n", - " 2: r\"HF-HAD hadron (HAD)\",\n", - "}\n", - "def particle_label(ax, pid):\n", - " plt.text(0.03, 0.92, pid_to_text[pid], va=\"top\", ha=\"left\", size=10, transform=ax.transAxes)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_confusion_matrix(cm,\n", - " target_names,\n", - " title='Confusion matrix',\n", - " cmap=None,\n", - " normalize=True):\n", - " import matplotlib.pyplot as plt\n", - " import numpy as np\n", - " import itertools\n", - "\n", - " accuracy = np.trace(cm) / float(np.sum(cm))\n", - " misclass = 1 - accuracy\n", - "\n", - " if cmap is None:\n", - " cmap = plt.get_cmap('Blues')\n", - "\n", - " if normalize:\n", - " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", - " cm[np.isnan(cm)] = 0.0\n", - "\n", - " fig = plt.figure(figsize=(8, 6))\n", - " ax = plt.axes()\n", - " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", - " plt.title(title)\n", - " plt.colorbar()\n", - "\n", - " if target_names is not None:\n", - " tick_marks = np.arange(len(target_names))\n", - " plt.xticks(tick_marks, target_names, rotation=45)\n", - " plt.yticks(tick_marks, target_names)\n", - "\n", - " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", - " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", - " if normalize:\n", - " plt.text(j, i, \"{:0.2f}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - " else:\n", - " plt.text(j, i, \"{:0.1f}\".format(cm[i, j]),\n", - " horizontalalignment=\"center\",\n", - " color=\"white\" if cm[i, j] > thresh else \"black\")\n", - "\n", - "\n", - " plt.ylabel('True label')\n", - " plt.xlim(-1, len(target_names))\n", - " plt.ylim(-1, len(target_names))\n", - " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", - " plt.tight_layout()\n", - " \n", - " return fig, ax" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!rm *.pdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def deltaphi(phi1, phi2):\n", - " return np.fmod(phi1 - phi2 + np.pi, 2*np.pi) - np.pi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# model = \"PFNet7_TTbar_14TeV_TuneCUETP8M1_cfi_gen__npar_3440145__cfg_4ff944b45d__user_jpata__ntrain_3600__lr_1e-05__1587059029\"\n", - "# epoch = 45\n", - "# big_df = pandas.read_pickle(\"../data/{}/epoch_{}/df.pkl.bz2\".format(model, epoch))\n", - "# #big_df = pandas.read_pickle(\"../test/TTbar_14TeV_TuneCUETP8M1_cfi.pkl.bz2\")\n", - "\n", - "big_df = pandas.read_pickle(\"/storage/user/jpata/particleflow/experiments/run_1/df_1.pkl.bz2\")\n", - "big_df[\"pred_phi\"] = np.arctan2(np.sin(big_df[\"pred_phi\"]), np.cos(big_df[\"pred_phi\"]))\n", - "\n", - "# big_df2 = pandas.read_pickle(\"../experiments/run_2/df_1.pkl.bz2\")\n", - "# big_df2[\"pred_phi\"] = np.arctan2(np.sin(big_df2[\"pred_phi\"]), np.cos(big_df2[\"pred_phi\"]))\n", - "\n", - "targettype = \"cand\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "big_df.loc[big_df[\"pred_pid\"]==211, [\"target_e\", \"pred_e\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#msk = (big_df[\"target_pid\"] != 0) & ((big_df[\"pred_pid\"] != 0))\n", - "msk = np.ones(len(big_df), dtype=np.bool)\n", - "confusion2 = sklearn.metrics.confusion_matrix(\n", - " big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk],\n", - " labels=class_labels\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax = plot_confusion_matrix(\n", - " cm=100.0*confusion2/np.sum(confusion2), target_names=[int(x) for x in class_labels], normalize=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax = plot_confusion_matrix(\n", - " cm=confusion2, target_names=[int(x) for x in class_labels], normalize=True\n", - ")\n", - "\n", - "acc = sklearn.metrics.accuracy_score(big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk])\n", - "plt.title(\"\")\n", - "#plt.title(\"ML-PF, accuracy={:.2f}\".format(acc))\n", - "plt.ylabel(\"reco PF candidate PID\\nassociated to input PFElement\")\n", - "plt.xlabel(\"predicted PID\\nML-PF candidate,\\naccuracy: {:.2f}\".format(acc))\n", - "cms_label(x0=0.20, x1=0.26, y=0.95)\n", - "sample_label(ax, y=0.995)\n", - "plt.savefig(\"confusion_mlpf.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins, counts = np.unique(big_df.loc[big_df[\"target_pid\"]!=0, \"target_pid\"], return_counts=True)\n", - "bins, counts2 = np.unique(big_df.loc[big_df[\"pred_pid\"]!=0, \"pred_pid\"], return_counts=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(4,4))\n", - "ax = plt.axes()\n", - "xs = np.arange(len(counts))\n", - "plt.bar(xs, counts/500.0, width=0.4, label=\"offline PF\")\n", - "plt.bar(xs+0.4, counts2/500.0, width=0.4, label=\"ML-PF\")\n", - "plt.xticks(xs+0.2, bins);\n", - "plt.ylabel(\"average number of particles per event\")\n", - "plt.xlabel(\"particle PID\")\n", - "plt.ylim(0,2000)\n", - "plt.legend(loc=\"best\", frameon=False)\n", - "cms_label()\n", - "sample_label(ax)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_cands = []\n", - "num_preds = []\n", - "\n", - "sum_e_true = []\n", - "sum_e_pred = []\n", - "\n", - "for k, v in big_df.groupby(\"iev\"):\n", - " \n", - " num_cand = 0\n", - " num_pred = 0\n", - " for pid in [211, -211, 130, 22, 11, -11, 13, -13, 1, 2]:\n", - " if pid == 0:\n", - " continue\n", - " num_cand += np.sum(v[\"target_pid\"] == pid)\n", - " num_pred += np.sum(v[\"pred_pid\"] == pid)\n", - " num_cands += [num_cand]\n", - " num_preds += [num_pred]\n", - " \n", - " sum_e_true += [np.sum(v[\"target_e\"])/len(v)]\n", - " sum_e_pred += [np.sum(v[\"pred_e\"])/len(v)]\n", - " \n", - "sum_e_true = np.array(sum_e_true)\n", - "sum_e_pred = np.array(sum_e_pred)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(4,4))\n", - "min_num = 1000\n", - "max_num = 6000\n", - "hist = np.histogram2d(num_cands, num_preds, bins=(np.linspace(min_num, max_num,100), np.linspace(min_num, max_num,100)))\n", - "mplhep.hist2dplot(hist[0], hist[1], hist[2], cbar=False, cmap=\"Blues\")\n", - "plt.plot([min_num, max_num], [min_num, max_num], ls=\"--\", lw=0.5, color=\"black\")\n", - "\n", - "#plt.xlabel(\"True hadron multiplicity $|\\eta| < 3.0$\")\n", - "#plt.ylabel(\"Predicted hadron multiplicity $|\\eta| < 3.0$\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_eff(df, pid):\n", - " v0 = np.sum(df==pid)\n", - " return v0 / len(df), np.sqrt(v0)/len(df)\n", - "\n", - "def get_fake(df, pid):\n", - " v0 = np.sum(df!=pid)\n", - " return v0 / len(df), np.sqrt(v0)/len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_E_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"E_val\"], bins[\"E_val\"]))\n", - " mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n", - " plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"E_xlabel\"])\n", - " plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"E_xlabel\"])\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.plot(\n", - " [bins[\"E_val\"][0], bins[\"E_val\"][-1]],\n", - " [bins[\"E_val\"][0], bins[\"E_val\"][-1]],\n", - " color=\"black\", ls=\"--\", lw=0.5)\n", - " plt.savefig(\"energy_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " plt.hist(v0[msk_true, 0], bins=bins[\"E_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n", - " plt.hist(v0[msk_pred, 1], bins=bins[\"E_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n", - " plt.xlabel(bins[\"E_xlabel\"])\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " plt.legend(frameon=False)\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n", - " plt.savefig(\"energy_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])\n", - "\n", - " res = (v0[msk_both, 1] - v0[msk_both, 0])/v0[msk_both, 0]\n", - " res[np.isnan(res)] = -1\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " ax.text(0.98, 0.98, \"avg. $\\Delta E / E$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n", - " plt.hist(res, bins=bins[\"E_res\"], density=1.0);\n", - " plt.xlabel(\"$\\Delta E / E$\")\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.savefig(\"energy_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " #efficiency vs fake rate\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " big_df[\"bins_target_e\"] = np.searchsorted(bins[\"E_val\"], big_df[\"target_e\"])\n", - " big_df[\"bins_pred_e\"] = np.searchsorted(bins[\"E_val\"], big_df[\"pred_e\"])\n", - "\n", - " vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_e\")[\"pred_pid\"].apply(get_eff, pid)\n", - " vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_e\")[\"target_pid\"].apply(get_fake, pid)\n", - "\n", - " out_eff = np.zeros((len(bins[\"E_val\"]), 2))\n", - " out_fake = np.zeros((len(bins[\"E_val\"]), 2))\n", - " for ib in range(len(bins[\"E_val\"])):\n", - " if ib in vals_eff.keys():\n", - " out_eff[ib, 0] = vals_eff[ib][0]\n", - " out_eff[ib, 1] = vals_eff[ib][1]\n", - " if ib in vals_fake.keys():\n", - " out_fake[ib, 0] = vals_fake[ib][0]\n", - " out_fake[ib, 1] = vals_fake[ib][1]\n", - "\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - "\n", - " plt.errorbar(bins[\"E_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n", - " plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n", - " ax.set_ylim(0, 1.5)\n", - " plt.xlabel(bins[\"E_xlabel\"])\n", - "\n", - " ax2 = ax.twinx()\n", - " col = \"red\"\n", - " plt.errorbar(bins[\"E_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n", - " plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n", - " plt.xlabel(bins[\"E_xlabel\"])\n", - " ax2.set_ylim(0, 1.5)\n", - " lines, labels = ax.get_legend_handles_labels()\n", - " lines2, labels2 = ax2.get_legend_handles_labels()\n", - " ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n", - " plt.savefig(\"energy_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_eta_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"eta_val\"], bins[\"eta_val\"]))\n", - " mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n", - " plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"eta_xlabel\"])\n", - " plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"eta_xlabel\"])\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.plot(\n", - " [bins[\"eta_val\"][0], bins[\"eta_val\"][-1]],\n", - " [bins[\"eta_val\"][0], bins[\"eta_val\"][-1]],\n", - " color=\"black\", ls=\"--\", lw=0.5)\n", - " plt.savefig(\"eta_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " plt.hist(v0[msk_true, 0], bins=bins[\"eta_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n", - " plt.hist(v0[msk_pred, 1], bins=bins[\"eta_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n", - " plt.xlabel(bins[\"eta_xlabel\"])\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " plt.legend(frameon=False)\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n", - " plt.savefig(\"eta_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])\n", - "\n", - " res = (v0[msk_both, 1] - v0[msk_both, 0])\n", - " res[np.isnan(res)] = -1\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " ax.text(0.98, 0.98, \"avg. $\\Delta \\eta$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n", - " plt.hist(res, bins=bins[\"eta_res\"], density=1.0);\n", - " plt.xlabel(\"$\\Delta \\eta$\")\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.savefig(\"eta_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " #efficiency vs fake rate\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " big_df[\"bins_target_eta\"] = np.searchsorted(bins[\"eta_val\"], big_df[\"target_eta\"])\n", - " big_df[\"bins_pred_eta\"] = np.searchsorted(bins[\"eta_val\"], big_df[\"pred_eta\"])\n", - "\n", - " vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_eta\")[\"pred_pid\"].apply(get_eff, pid)\n", - " vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_eta\")[\"target_pid\"].apply(get_fake, pid)\n", - "\n", - " out_eff = np.zeros((len(bins[\"eta_val\"]), 2))\n", - " out_fake = np.zeros((len(bins[\"eta_val\"]), 2))\n", - " for ib in range(len(bins[\"eta_val\"])):\n", - " if ib in vals_eff.keys():\n", - " out_eff[ib, 0] = vals_eff[ib][0]\n", - " out_eff[ib, 1] = vals_eff[ib][1]\n", - " if ib in vals_fake.keys():\n", - " out_fake[ib, 0] = vals_fake[ib][0]\n", - " out_fake[ib, 1] = vals_fake[ib][1]\n", - "\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - "\n", - " plt.errorbar(bins[\"eta_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n", - " plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n", - " ax.set_ylim(0, 1.5)\n", - " plt.xlabel(bins[\"eta_xlabel\"])\n", - "\n", - " ax2 = ax.twinx()\n", - " col = \"red\"\n", - " plt.errorbar(bins[\"eta_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n", - " plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n", - " plt.xlabel(bins[\"eta_xlabel\"])\n", - " ax2.set_ylim(0, 1.5)\n", - " lines, labels = ax.get_legend_handles_labels()\n", - " lines2, labels2 = ax2.get_legend_handles_labels()\n", - " ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n", - " plt.savefig(\"eta_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_phi_reso(pid, v0, msk_true, msk_pred, msk_both, bins):\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " hist = np.histogram2d(v0[msk_both, 0], v0[msk_both, 1], bins=(bins[\"phi_val\"], bins[\"phi_val\"]))\n", - " mplhep.hist2dplot(hist[0], hist[1], hist[2], cmap=\"Blues\", cbar=False);\n", - " plt.xlabel(bins[\"true_val\"] + \" \" + bins[\"phi_xlabel\"])\n", - " plt.ylabel(bins[\"pred_val\"]+ \" \" + bins[\"phi_xlabel\"])\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.plot(\n", - " [bins[\"phi_val\"][0], bins[\"phi_val\"][-1]],\n", - " [bins[\"phi_val\"][0], bins[\"phi_val\"][-1]],\n", - " color=\"black\", ls=\"--\", lw=0.5)\n", - " plt.savefig(\"phi_2d_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " plt.hist(v0[msk_true, 0], bins=bins[\"phi_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"true_val\"]);\n", - " plt.hist(v0[msk_pred, 1], bins=bins[\"phi_val\"], density=1.0, histtype=\"step\", lw=2, label=bins[\"pred_val\"]);\n", - " plt.xlabel(bins[\"phi_xlabel\"])\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " plt.legend(frameon=False)\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.savefig(\"phi_hist_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " ax.set_ylim(ax.get_ylim()[0], 1.5*ax.get_ylim()[1])\n", - "\n", - " res = (v0[msk_both, 1] - v0[msk_both, 0])\n", - " res[np.isnan(res)] = -1\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " ax.text(0.98, 0.98, \"avg. $\\Delta \\phi$\\n$%.2f \\pm %.2f$\"%(np.mean(res), np.std(res)), transform=ax.transAxes, ha=\"right\", va=\"top\")\n", - " plt.hist(res, bins=bins[\"phi_res\"], density=1.0);\n", - " plt.xlabel(\"$\\Delta \\phi$\")\n", - " plt.ylabel(\"number of particles\\n(normalized, a.u.)\")\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - " plt.savefig(\"phi_ratio_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - " \n", - " #efficiency vs fake rate\n", - " plt.figure(figsize=(4,4))\n", - " ax = plt.axes()\n", - " big_df[\"bins_target_phi\"] = np.searchsorted(bins[\"phi_val\"], big_df[\"target_phi\"])\n", - " big_df[\"bins_pred_phi\"] = np.searchsorted(bins[\"phi_val\"], big_df[\"pred_phi\"])\n", - "\n", - " vals_eff = big_df[(big_df[\"target_pid\"]==pid)].groupby(\"bins_target_phi\")[\"pred_pid\"].apply(get_eff, pid)\n", - " vals_fake = big_df[(big_df[\"pred_pid\"]==pid)].groupby(\"bins_pred_phi\")[\"target_pid\"].apply(get_fake, pid)\n", - "\n", - " out_eff = np.zeros((len(bins[\"phi_val\"]), 2))\n", - " out_fake = np.zeros((len(bins[\"phi_val\"]), 2))\n", - " for ib in range(len(bins[\"phi_val\"])):\n", - " if ib in vals_eff.keys():\n", - " out_eff[ib, 0] = vals_eff[ib][0]\n", - " out_eff[ib, 1] = vals_eff[ib][1]\n", - " if ib in vals_fake.keys():\n", - " out_fake[ib, 0] = vals_fake[ib][0]\n", - " out_fake[ib, 1] = vals_fake[ib][1]\n", - "\n", - " cms_label()\n", - " sample_label(ax)\n", - " particle_label(ax, pid)\n", - "\n", - " plt.errorbar(bins[\"phi_val\"], out_eff[:, 0], out_eff[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=\"green\", label=\"efficiency\")\n", - " plt.ylabel(\"efficiency\\nN(pred|true) / N(true)\")\n", - " ax.set_ylim(0, 1.5)\n", - " plt.xlabel(bins[\"phi_xlabel\"])\n", - "\n", - " ax2 = ax.twinx()\n", - " col = \"red\"\n", - " plt.errorbar(bins[\"phi_val\"], out_fake[:, 0], out_fake[:, 1], marker=\".\", lw=0, elinewidth=1.0, color=col, label=\"fake rate\")\n", - " plt.ylabel(\"fake rate\\nN(true|pred) / N(pred)\")\n", - " plt.xlabel(bins[\"phi_xlabel\"])\n", - " ax2.set_ylim(0, 1.5)\n", - " lines, labels = ax.get_legend_handles_labels()\n", - " lines2, labels2 = ax2.get_legend_handles_labels()\n", - " ax2.legend(lines + lines2, labels + labels2, loc=0, frameon=False)\n", - " plt.savefig(\"phi_eff_fake_pid{}.pdf\".format(pid), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def prepare_resolution_plots(big_df, pid, bins):\n", - "\n", - " msk_true = (big_df[\"target_pid\"]==pid)\n", - " msk_pred = (big_df[\"pred_pid\"]==pid)\n", - " msk_both = msk_true&msk_pred\n", - " v0 = big_df[[\"target_e\", \"pred_e\"]].values\n", - " v1 = big_df[[\"target_eta\", \"pred_eta\"]].values\n", - " v2 = big_df[[\"target_phi\", \"pred_phi\"]].values\n", - " \n", - " plot_E_reso(pid, v0, msk_true, msk_pred, msk_both, bins)\n", - " plot_eta_reso(pid, v1, msk_true, msk_pred, msk_both, bins)\n", - " plot_phi_reso(pid, v2, msk_true, msk_pred, msk_both, bins)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = {\n", - " 211: {\n", - " \"E_val\": np.linspace(0, 5, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"eta_val\": np.linspace(-4, 4, 61),\n", - " \"eta_res\": np.linspace(-0.5, 0.5, 61),\n", - " \"E_xlabel\": \"Energy [GeV]\",\n", - " \"eta_xlabel\": \"$\\eta$\",\n", - " \"phi_val\": np.linspace(-4, 4, 61),\n", - " \"phi_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"phi_xlabel\": \"Energy [GeV]\",\n", - " \"phi_xlabel\": \"$\\phi$\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " },\n", - " -211: {\n", - " \"E_val\": np.linspace(0, 5, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"eta_val\": np.linspace(-4, 4, 61),\n", - " \"eta_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"E_xlabel\": \"Energy [GeV]\",\n", - " \"eta_xlabel\": \"$\\eta$\",\n", - " \"phi_val\": np.linspace(-4, 4, 61),\n", - " \"phi_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"phi_xlabel\": \"Energy [GeV]\",\n", - " \"phi_xlabel\": \"$\\phi$\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " },\n", - " 130: {\n", - " \"E_val\": np.linspace(0, 10, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"eta_val\": np.linspace(-4, 4, 61),\n", - " \"eta_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"E_xlabel\": \"Energy [GeV]\",\n", - " \"eta_xlabel\": \"$\\eta$\",\n", - " \"phi_val\": np.linspace(-4, 4, 61),\n", - " \"phi_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"phi_xlabel\": \"Energy [GeV]\",\n", - " \"phi_xlabel\": \"$\\phi$\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " },\n", - " 22: {\n", - " \"E_val\": np.linspace(0, 10, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"eta_val\": np.linspace(-4, 4, 61),\n", - " \"eta_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"E_xlabel\": \"Energy [GeV]\",\n", - " \"eta_xlabel\": \"$\\eta$\",\n", - " \"phi_val\": np.linspace(-4, 4, 61),\n", - " \"phi_res\": np.linspace(-0.5, 0.5, 41),\n", - " \"phi_xlabel\": \"Energy [GeV]\",\n", - " \"phi_xlabel\": \"$\\phi$\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " },\n", - " 1: {\n", - " \"E_val\": np.linspace(0, 100, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"xlabel\": \"Energy [GeV]\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " },\n", - " 2: {\n", - " \"E_val\": np.linspace(0, 50, 61),\n", - " \"E_res\": np.linspace(-1, 1, 61),\n", - " \"xlabel\": \"Energy [GeV]\",\n", - " \"true_val\": \"reco PF\",\n", - " \"pred_val\": \"ML-PF\",\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "prepare_resolution_plots(big_df, 211, bins[211])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "prepare_resolution_plots(big_df, 130, bins[130])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "prepare_resolution_plots(big_df, 22, bins[22])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "msk = (big_df[\"target_pid\"] != 0) & ((big_df[\"pred_pid\"] != 0))\n", - "#msk = np.ones(len(big_df), dtype=np.bool)\n", - "confusion2 = sklearn.metrics.confusion_matrix(\n", - " big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk],\n", - " labels=class_labels[1:]\n", - ")\n", - "plot_confusion_matrix(\n", - " cm=confusion2, target_names=[int(x) for x in class_labels][1:], normalize=True\n", - ")\n", - "\n", - "acc = sklearn.metrics.accuracy_score(big_df[\"target_pid\"][msk], big_df[\"pred_pid\"][msk])\n", - "plt.title(\"ML-PF, accuracy={:.2f}\".format(acc))\n", - "plt.ylabel(\"reco PF candidate PID\")\n", - "plt.xlabel(\"ML-PF candidate PID\")\n", - "plt.savefig(\"confusion_mlpf.pdf\", bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins_eta = np.linspace(-4, 4, 21)\n", - "bins_e = np.logspace(-1, 3, 21)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_eff(df, target_pid=None):\n", - " v0 = np.sum(df==target_pid)\n", - " return (v0 / len(df), np.sqrt(v0)/len(df))\n", - " vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"cand_pid\"].apply(get_eff, target_pid=pid)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_effs_cand_pred(pid, by, bins):\n", - " bs = by + \"_bins\"\n", - " big_df[bs] = np.searchsorted(bins, big_df[by])\n", - " \n", - " vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"cand_pid\"].apply(get_eff, target_pid=pid)\n", - " xs1 = [bins[min(k, len(bins)-1)] for k in vals.keys()][:-1]\n", - " ys1 = [v[0] for v in vals.values][:-1]\n", - " es1 = [v[1] for v in vals.values][:-1]\n", - "\n", - " vals = big_df[(big_df[\"gen_pid\"]==pid)].groupby(bs)[\"pred_pid\"].apply(get_eff, target_pid=pid)\n", - " xs2 = [bins[min(k, len(bins)-1)] for k in vals.keys()][:-1]\n", - " ys2 = [v[0] for v in vals.values][:-1]\n", - " es2 = [v[1] for v in vals.values][:-1]\n", - " \n", - " return xs1, (ys1, es1), (ys2, es2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#for pid in [211, -211, 130, 22, 1, 2, -11, 11]:\n", - "for pid in [211, -211, 130, 22, 11, -11, 1, 2]:\n", - " plt.figure(figsize=(4,4))\n", - " xs, (ys1, es1), (ys2, es2) = get_effs_cand_pred(pid, \"gen_e\", bins_e)\n", - " plt.errorbar(xs, ys1, es1, lw=1, elinewidth=1, marker=\".\", label=\"standard PF\")\n", - " plt.errorbar(xs, ys2, es2, lw=1, elinewidth=1, marker=\".\", label=\"ML-PF\")\n", - " plt.legend(frameon=False)\n", - " plt.ylim(0, 1.5)\n", - " plt.xscale(\"log\")\n", - " plt.xlabel(\"gen E\")\n", - " plt.ylabel(\"reconstruction efficiency\")\n", - " plt.title(\"pid={}\".format(pid))\n", - " plt.savefig(\"eff_pt_pid{}.pdf\".format(pid), bbox_inches=\"tight\")\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " xs, (ys1, es1), (ys2, es2) = get_effs_cand_pred(pid, \"gen_eta\", bins_eta)\n", - " plt.errorbar(xs, ys1, es1, lw=1, elinewidth=1, marker=\".\", label=\"standard PF\")\n", - " plt.errorbar(xs, ys2, es2, lw=1, elinewidth=1, marker=\".\", label=\"ML-PF\")\n", - " plt.legend(frameon=False)\n", - " plt.ylim(0, 1.5)\n", - " plt.xlabel(\"gen eta\")\n", - " plt.ylabel(\"reconstruction efficiency\")\n", - " plt.title(\"pid={}\".format(pid))\n", - " plt.savefig(\"eff_eta_pid{}.pdf\".format(pid), bbox_inches=\"tight\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def make_plot_reg(big_df, pid, pred_type, val, bins):\n", - "\n", - " m = big_df[(big_df[\"cand_pid\"]==pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)][[\"cand_{}\".format(val), \"{}_{}\".format(pred_type, val)]].values\n", - " corr = np.corrcoef(m[:, 0], m[:, 1])[0,1]\n", - " \n", - " plt.figure(figsize=(4,4))\n", - " plt.hist(m[:, 0], bins=bins, histtype=\"step\", lw=2, label=\"true\")\n", - " plt.hist(m[:, 1], bins=bins, histtype=\"step\", lw=2, label=\"pred\")\n", - " plt.xlabel(val)\n", - " plt.legend(frameon=False)\n", - " plt.savefig(\"pred_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n", - "\n", - " ngen = np.sum((big_df[\"cand_pid\"]==pid))\n", - " eff = np.sum((big_df[\"cand_pid\"]==pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)) / float(np.sum((big_df[\"cand_pid\"]==pid)))\n", - " fake = np.sum((big_df[\"cand_pid\"]!=pid) & (big_df[\"{}_pid\".format(pred_type)]==pid)) / float(np.sum((big_df[\"{}_pid\".format(pred_type)]==pid)))\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " plt.title(\"ngen={} corr={:.2f}\\neff={:.2f} fake={:.2f}\".format(ngen, corr, eff, fake))\n", - " h = np.histogram2d(m[:, 0], m[:, 1], bins=(bins, bins))\n", - " mplhep.hist2dplot(h[0], h[1], h[2], cmap=\"Blues\", cbar=False)\n", - " plt.xlabel(\"True {}\".format(val))\n", - " plt.ylabel(\"Predicted {}\".format(val))\n", - " plt.savefig(\"pred_corr_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n", - "\n", - " plt.figure(figsize=(4,4))\n", - " var = np.abs(m[:, 1] / m[:, 0])\n", - " var[var > 100] = 100\n", - " var[var < 0] = 0\n", - " plt.hist(var, bins=np.linspace(0.5, 1.5, 101))\n", - " plt.xlabel(\"true {} / predicted {}\".format(val, val))\n", - " plt.title(\"mu={:.4f} s={:.4f}\".format(np.mean(var), np.std(var)))\n", - " plt.savefig(\"pred_ratio_{}_{}_{}.pdf\".format(val, pred_type, pid), bbox_inches=\"tight\")\n", - " #plt.axvline(1.0, color=\"black\")\n", - " ##plt.yscale(\"log\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(0, 5, 101)\n", - "pid = 211\n", - "val = \"e\"\n", - "\n", - "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n", - "make_plot_reg(big_df, pid, \"pred\", val, bins)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-4, 4, 61)\n", - "pid = 211\n", - "val = \"eta\"\n", - "\n", - "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n", - "make_plot_reg(big_df, pid, \"pred\", val, bins)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.linspace(-4, 4, 61)\n", - "pid = 211\n", - "val = \"phi\"\n", - "\n", - "#make_plot_reg(big_df, pid, \"cand\", val, bins)\n", - "make_plot_reg(big_df, pid, \"pred\", val, bins)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# n_preds = []\n", - "# n_trues = []\n", - "# for i in range(len(pred_ids)):\n", - "# n_true = np.sum(true_ids[i]!=0)\n", - "# n_pred = np.sum(pred_ids[i]!=0)\n", - "# n_preds += [n_pred]\n", - "# n_trues += [n_true]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "# ax = plt.axes()\n", - "# plt.plot([1500,5000],[1500,5000], color=\"black\", lw=0.5)\n", - "# plt.scatter(n_trues, n_preds, marker=\".\", alpha=0.5)\n", - "# plt.xlim(1500,5000)\n", - "# plt.ylim(1500,5000)\n", - "# plt.xlabel(\"Number of Target PF Candidates\",fontsize=13)\n", - "# plt.ylabel(\"Number of Predicted GNN Candidates\",fontsize=13)\n", - "# #plt.title(\"QCD Run3\")\n", - "\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# #plt.savefig(\"num_pred.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# cms = []\n", - "# for i in range(len(pred_ids)):\n", - "# cm = sklearn.metrics.confusion_matrix(\n", - "# true_ids[i],\n", - "# pred_ids[i], labels=range(len(train_end2end.class_labels))\n", - "# )\n", - "# cms += [cm]\n", - "# cm = sum(cms)\n", - "# cm = cm / 1000.0\n", - "# cm = np.round(cm, 1)#.astype(np.int)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# train_end2end.plot_confusion_matrix(cm, [int(x) for x in train_end2end.class_labels], normalize=True)\n", - "# #plt.xlim(-0.5, 9.5)\n", - "# #plt.ylim(-0.5, 9.5)\n", - "# plt.title(\"Normalized Confusion Matrix (QCD Run3)\")\n", - "# #plt.text(0.02, 0.98, \"CMS Simulation, preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\")\n", - "# #plt.tight_layout()\n", - "# plt.savefig(\"cm.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# pm = np.concatenate(pred_momenta)\n", - "# tm = np.concatenate(true_momenta)\n", - "# ti = np.concatenate(true_ids)\n", - "# pi = np.concatenate(pred_ids)\n", - "\n", - "\n", - "# pm[:, 0] = np.power(10, pm[:, 0])\n", - "# tm[:, 0] = np.power(10, tm[:, 0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "\n", - "# ax = plt.axes()\n", - "# bins = np.linspace(0, 50, 100)\n", - "# h0 = plt.hist(pm[pi!=0, 0], bins=bins, histtype=\"step\", lw=1, label=\"PF\");\n", - "# h1 = plt.hist(tm[ti!=0, 0], bins=bins, histtype=\"step\", lw=1, label=\"GNN\");\n", - "# plt.yscale(\"log\")\n", - "# plt.legend(frameon=False)\n", - "# plt.ylim(10, 1e7)\n", - "\n", - "# plt.xlabel(\"Candidate $p_{\\mathrm{T}}$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n", - "# #plt.title(\"QCD Run 3\")\n", - "\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"pt_hist.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "# ax = plt.axes()\n", - "\n", - "# bins = np.linspace(-4, 4, 100)\n", - "# plt.hist(pm[pi!=0, 1], bins=bins, histtype=\"step\", lw=1);\n", - "# plt.hist(tm[ti!=0, 1], bins=bins, histtype=\"step\", lw=1);\n", - "# plt.yscale(\"log\")\n", - "\n", - "# plt.ylim(1000, 1e6)\n", - "# plt.xlabel(\"Candidate $\\eta$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n", - "# #plt.title(\"QCD Run 3\")\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"eta_hist.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "\n", - "# ax = plt.axes()\n", - "# bins = np.linspace(-3, 3, 60)\n", - "# plt.hist(pm[pi!=0, 2], bins=bins, histtype=\"step\", lw=1);\n", - "# plt.hist(tm[ti!=0, 2], bins=bins, histtype=\"step\", lw=1);\n", - "# plt.yscale(\"log\")\n", - "# plt.ylim(1000, 1e6)\n", - "\n", - "# plt.xlabel(\"Candidate $\\phi$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Number of Candidates\",fontsize=13)\n", - "# #plt.title(\"QCD Run 3\")\n", - "\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"phi_hist.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "# ax = plt.axes()\n", - "\n", - "# subidx = np.where((pi!=0)&(ti!=0))[0]\n", - "# rp = np.random.permutation(range(len(subidx)))[:1000]\n", - "\n", - "# plt.scatter(pm[subidx[rp], 0], tm[subidx[rp], 0], marker=\".\", alpha=0.5)\n", - "# plt.xlim(0,2)\n", - "# plt.ylim(0,2)\n", - "# plt.plot([0,2],[0,2], color=\"black\")\n", - "\n", - "# plt.xlabel(\"Target PF Candidate $p_{\\mathrm{T}}$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Predicted GNN Candidate $p_{\\mathrm{T}}$ (a.u.)\", fontsize=13)\n", - "# #plt.title(\"QCD Run 3, 1000 candidates\")\n", - "\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"pt_corr.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "# ax = plt.axes()\n", - "\n", - "# plt.plot([-7, 7], [-7, 7], color=\"black\", lw=0.5)\n", - "# plt.scatter(pm[subidx[rp], 1], tm[subidx[rp], 1], marker=\".\", alpha=0.5)\n", - "# plt.xlim(-7, 7)\n", - "# plt.ylim(-7, 7)\n", - "\n", - "# plt.xlabel(\"Target PF Candidate $\\eta$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Predicted GNN Candidate $\\eta$ (a.u.)\",fontsize=13)\n", - "# #plt.title(\"QCD Run 3, 1000 candidates\")\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"eta_corr.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# plt.figure(figsize=(5, 5))\n", - "# ax = plt.axes()\n", - "\n", - "# plt.plot([-5, 5], [-5, 5], color=\"black\", lw=0.5)\n", - "# plt.scatter(pm[subidx[rp], 2], tm[subidx[rp], 2], marker=\".\", alpha=0.5)\n", - "# plt.xlim(-3,3)\n", - "# plt.ylim(-3,3)\n", - "\n", - "\n", - "# plt.xlabel(\"Target PF Candidate $\\phi$ (a.u.)\",fontsize=13)\n", - "# plt.ylabel(\"Predicted GNN Candidate $\\phi$ (a.u.)\",fontsize=13)\n", - "# #plt.title(\"QCD Run3, 1000 candidates\")\n", - "\n", - "# plt.text(0.67, 1.05, \"Run 3 (14 TeV)\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.02, 0.98, \"CMS\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=16, fontweight='bold')\n", - "# plt.text(0.18, 0.975, \"Simulation Preliminary\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12,style='italic')\n", - "# #plt.text(0.03, 0.92, \"QCD dijet events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.text(0.03, 0.92, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", transform=ax.transAxes, va=\"top\", ha=\"left\",size=12)\n", - "# plt.tight_layout()\n", - "# plt.savefig(\"phi_corr.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import pandas as pd\n", - "# import tqdm\n", - "\n", - "# import matplotlib as mpl\n", - "# mpl.rcParams['figure.figsize'] = [8.0, 6.0]\n", - "# mpl.rcParams['font.size'] = 12\n", - "# mpl.rcParams['legend.fontsize'] = 'large'\n", - "# mpl.rcParams['figure.titlesize'] = 'medium'\n", - "\n", - "# d = full_dataset.get(1)\n", - "# d.batch = torch.zeros((len(d.x)), dtype=torch.long)\n", - "# d = d.to(device=device)\n", - "# train_end2end.data_prep(d, device=device)\n", - "# edges, cand_id_onehot, cand_momentum = model(d)\n", - "# output = edges.detach().cpu().numpy()\n", - "# d = full_dataset.get(1)\n", - "# x_data = d.x.detach().cpu().numpy()\n", - "# mask = ((x_data[:,4]==0) & (x_data[:,5]==0) & (x_data[:,6]==0) & (x_data[:,7]==0))\n", - "# good_index = np.zeros((x_data.shape[0],1,2),dtype=int)\n", - "# good_x = x_data[:,2:4].copy() \n", - "# good_x[~mask] = x_data[~mask,2:4].copy()\n", - "# df = pd.DataFrame(good_x, columns=['eta','phi'])\n", - "# df['isTrack'] = ~mask\n", - "# row, col = d.edge_index.cpu().detach().numpy()\n", - "# y_truth = d.ycand.cpu().detach().numpy()\n", - "\n", - "# min_phi = -1.25\n", - "# max_phi = 1.25\n", - "# min_eta = -1.25\n", - "# max_eta = 1.25\n", - "# extra = 1.0\n", - "# x = 'eta'\n", - "# y = 'phi'\n", - "# for plot_type in [['input'],['truth'],['output']]: \n", - "# k = 0\n", - "# plt.figure(figsize=(8, 6)) \n", - "# for i, j in tqdm.tqdm(zip(row, col),total=len(y_truth)):\n", - "# x1 = df[x][i]\n", - "# x2 = df[x][j]\n", - "# y1 = df[y][i]\n", - "# y2 = df[y][j]\n", - "# if (x1 < min_eta-extra or x1 > max_eta+extra) or (x2 < min_eta-extra or x2 > max_eta+extra): continue\n", - "# if (y1 < min_phi-extra or y1 > max_phi+extra) or (y2 < min_phi-extra or y2 > max_phi+extra): continue\n", - "# if 'input' in plot_type:\n", - "# seg_args = dict(c='b',alpha=0.1,zorder=1)\n", - "# plt.plot([df[x][i], df[x][j]],\n", - "# [df[y][i], df[y][j]], '-', **seg_args)\n", - "# if 'truth' in plot_type and y_truth[k]:\n", - "# seg_args = dict(c='r',alpha=0.8,zorder=2)\n", - "# plt.plot([df[x][i], df[x][j]],\n", - "# [df[y][i], df[y][j]], '-', **seg_args)\n", - "# if 'output' in plot_type:\n", - "# seg_args = dict(c='g',alpha=output[k].item(),zorder=3)\n", - "# plt.plot([df[x][i], df[x][j]],\n", - "# [df[y][i], df[y][j]], '-', **seg_args)\n", - "# k+=1\n", - "# cut_mask = (df[x] > min_eta-extra) & (df[x] < max_eta+extra) & (df[y] > min_phi-extra) & (df[y] < max_phi+extra)\n", - "# cluster_mask = cut_mask & ~df['isTrack']\n", - "# track_mask = cut_mask & df['isTrack']\n", - "# plt.scatter(df[x][cluster_mask], df[y][cluster_mask],c='g',marker='o',s=50,zorder=4,alpha=1)\n", - "# plt.scatter(df[x][track_mask], df[y][track_mask],c='b',marker='p',s=50,zorder=5,alpha=1)\n", - "# plt.xlabel(\"Track or Cluster $\\eta$\",fontsize=18)\n", - "# plt.ylabel(\"Track or Cluster $\\phi$\",fontsize=18)\n", - "# plt.xlim(min_eta, max_eta)\n", - "# plt.ylim(min_phi, max_phi)\n", - "# plt.figtext(0.12, 0.90,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=20)\n", - "# plt.figtext(0.22, 0.90,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=18)\n", - "# plt.figtext(0.67, 0.90,'Run 3 (14 TeV)', wrap=True, horizontalalignment='left', fontsize=18)\n", - "# plt.savefig('graph_%s_%s_%s.pdf'%(x,y,'_'.join(plot_type)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d = pickle.load(open(\"../raw/pfntuple_1_0.pkl\", \"rb\"))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/pfnet-debug.ipynb b/notebooks/pfnet-debug.ipynb index d37af9248..36dd4c91b 100644 --- a/notebooks/pfnet-debug.ipynb +++ b/notebooks/pfnet-debug.ipynb @@ -23,6 +23,7 @@ "import sys\n", "\n", "sys.path.append(\"/home/joosep/particleflow/mlpf\")\n", + "sys.path.append(\"/home/joosep/particleflow/hep_tfds/\")\n", "import tfmodel.model\n", "import tfmodel.data\n", "import tfmodel.model_setup\n", @@ -38,6 +39,23 @@ "from matplotlib import cm" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c58b7a2", + "metadata": {}, + "outputs": [], + "source": [ + "def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):\n", + " plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)\n", + " plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)\n", + " plt.figtext(x2, y,'Run 3 (14 TeV)', wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)\n", + " \n", + "def sample_label(ax, x=0.01, y=0.93):\n", + " plt.text(x, y, \"$\\mathrm{t}\\overline{\\mathrm{t}}$ events\", ha=\"left\", size=10, transform=ax.transAxes)\n", + " " + ] + }, { "cell_type": "code", "execution_count": null, @@ -45,8 +63,8 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"/home/joosep/particleflow/parameters/cms.yaml\") as f:\n", - " config = yaml.load(f)\n", + "with open(\"/home/joosep/particleflow/experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/config.yaml\") as f:\n", + " config = yaml.safe_load(f)\n", "config[\"setup\"][\"multi_output\"] = True\n", "config[\"parameters\"][\"debug\"] = True" ] @@ -64,10 +82,12 @@ { "cell_type": "code", "execution_count": null, - "id": "c4107771", + "id": "d9fbca7a", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "config[\"datasets\"][\"cms_pf_ttbar\"]" + ] }, { "cell_type": "code", @@ -78,6 +98,7 @@ "source": [ "cds = config[\"dataset\"]\n", "\n", + "config[\"datasets\"][\"cms_pf_ttbar\"][\"data_dir\"] = \"/home/joosep/tensorflow_datasets/\"\n", "config[\"datasets\"][\"cms_pf_ttbar\"][\"batch_per_gpu\"] = 1\n", "ds_val, ds_info = tfmodel.utils.get_heptfds_dataset(\n", " \"cms_pf_ttbar\",\n", @@ -92,19 +113,32 @@ "metadata": {}, "outputs": [], "source": [ - "ret = model.build((1, 6400, 18))\n", + "ret = model.build((1, 6400, 25))\n", "#model.set_trainable_classification()\n", - "model.load_weights(\"/home/joosep/particleflow/experiments/cms_20210917_142344_403761.gpu0.local/weights/weights-200-0.059240.hdf5\")" + "model.load_weights(\"/home/joosep/particleflow/experiments/all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010/weights/weights-200-0.074496.hdf5\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "aa7c2864", + "id": "18732bbe", "metadata": {}, "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa7c2864", + "metadata": { + "scrolled": false + }, + "outputs": [], "source": [ "for X, y, w in ds_val:\n", + " X = tf.expand_dims(X, axis=0)\n", " X_val = X.numpy()\n", " ret = model.predict_on_batch(X)\n", " break" @@ -149,11 +183,17 @@ " colorlist = [cm.Dark2(x) for x in evenly_spaced_interval]\n", " bin_idx = get_bin_index(ret[layer_name][\"bins\"][0])\n", "\n", - " plt.figure(figsize=(4,4))\n", + " plt.figure(figsize=(7,7))\n", + " ax = plt.axes()\n", " plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=\".\", s=energy)\n", " plt.xlabel(\"PFElement $\\eta$\")\n", " plt.ylabel(\"PFElement $\\phi$\")\n", " plt.title(\"Binning in {}\".format(layer_name))\n", + " cms_label(ax)\n", + " sample_label(ax)\n", + " plt.ylim(-3.8, 3.8)\n", + " plt.text(0.5, 0.05, \"Each point corresponds to a PFElement in a simulated event.\\nUnique colors correspond to the bin assignment in this layer.\",\n", + " ha=\"center\", va=\"center\", transform=ax.transAxes)\n", " plt.savefig(\"bins_{}.pdf\".format(layer_name), bbox_inches=\"tight\")\n", " plt.savefig(\"bins_{}.png\".format(layer_name), bbox_inches=\"tight\", dpi=300)" ] @@ -178,6 +218,16 @@ "plot_binning_in_layer(\"cg_1\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8f0f81f", + "metadata": {}, + "outputs": [], + "source": [ + "plot_binning_in_layer(\"cg_2\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -206,9 +256,9 @@ "outputs": [], "source": [ "def plot_dms(dms):\n", - " fig = plt.figure(figsize=(4*4, 3*4))\n", - " for i in range(min(len(dms), 49)):\n", - " ax = plt.subplot(7,7,i+1)\n", + " fig = plt.figure(figsize=(10*4, 10*4))\n", + " for i in range(min(len(dms), 100)):\n", + " ax = plt.subplot(10,10,i+1)\n", " plt.axes(ax)\n", " plt.imshow(dms[i], interpolation=\"none\", norm=matplotlib.colors.Normalize(vmin=0, vmax=1), cmap=\"Blues\")\n", " #plt.colorbar()\n", @@ -295,14 +345,6 @@ "plt.savefig(\"dm_cg_energy_1.pdf\", bbox_inches=\"tight\")\n", "plt.savefig(\"dm_cg_energy_1.png\", bbox_inches=\"tight\", dpi=300)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c853c865", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/scripts/local_test_cms_pipeline.sh b/scripts/local_test_cms_pipeline.sh index cb3a67ba9..e023d3b91 100755 --- a/scripts/local_test_cms_pipeline.sh +++ b/scripts/local_test_cms_pipeline.sh @@ -34,6 +34,6 @@ python3 mlpf/pipeline.py train -c parameters/cms.yaml --nepochs 2 --customize pi ls ./experiments/cms_*/weights/ #Generate the pred.npz file of predictions -python3 mlpf/pipeline.py evaluate -c parameters/cms.yaml -t ./experiments/cms_* +python3 mlpf/pipeline.py evaluate -t ./experiments/cms_* python3 mlpf/pipeline.py train -c parameters/cms-transformer.yaml --nepochs 2 --customize pipeline_test diff --git a/scripts/local_test_delphes_pipeline.sh b/scripts/local_test_delphes_pipeline.sh index 6e2b18e4f..60c60b8e2 100755 --- a/scripts/local_test_delphes_pipeline.sh +++ b/scripts/local_test_delphes_pipeline.sh @@ -19,4 +19,4 @@ python3 mlpf/pipeline.py train -c parameters/delphes.yaml --nepochs 2 --ntrain 5 ls ./experiments/delphes_*/weights/ #Generate the pred.npz file of predictions -python3 mlpf/pipeline.py evaluate -c parameters/delphes.yaml -t ./experiments/delphes_* +python3 mlpf/pipeline.py evaluate -t ./experiments/delphes_*