towards v1.7: new CMS datasets, CLIC hit-based datasets, TF backward-…

…compat optimizations (#285) * training with cms 1.7.0 * fix postprocessing for new uproot * track memory usage of pandora * readd dim decrease options * optimizer save/restore * hypertune * update track feats * add pytorch training on clic hits --------- Co-authored-by: Joosep Pata <joosep.pata@kbfi.ee>
jpata · Jan 31, 2024 · d335cd3 · d335cd3
1 parent bae2907
commit d335cd3
Show file tree

Hide file tree

Showing 99 changed files with 2,418 additions and 1,229 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -9,10 +9,10 @@ on:
 jobs:
   lint:
     name: Lint PR or Push to main
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: [3.8.10]
+        python-version: [3.10.12]
 
     steps:
       - name: Checkout

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
   deps-pyg:
@@ -25,7 +25,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: pip3 install torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
@@ -38,7 +38,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: PYTHONPATH=. python3 -m unittest tests/test_tf.py
@@ -50,7 +50,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: ./scripts/local_test_clic_pipeline.sh
@@ -62,7 +62,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: ./scripts/local_test_clic_hits_pipeline.sh
@@ -74,7 +74,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: ./scripts/local_test_delphes_pipeline.sh
@@ -86,7 +86,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: ./scripts/local_test_cms_pipeline.sh
@@ -98,7 +98,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: pip3 install torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
@@ -112,7 +112,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.10.12"
           cache: "pip"
       - run: pip install -r requirements.txt
       - run: pip3 install torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

diff --git a/mlpf/data_cms/genjob_nopu.sh b/mlpf/data_cms/genjob_nopu.sh
@@ -71,8 +71,8 @@ pwd
 ls -lrt
 
 echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py
-cmsRun step2_phase1_new.py
-cmsRun step3_phase1_new.py
+cmsRun step2_phase1_new.py > /dev/null
+cmsRun step3_phase1_new.py > /dev/null
 #cmsRun $CMSSWDIR/src/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
 mv pfntuple.root pfntuple_${SEED}.root
 python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ --save-normalized-table

diff --git a/mlpf/data_cms/genjob_pu55to75.sh b/mlpf/data_cms/genjob_pu55to75.sh
@@ -72,8 +72,8 @@ pwd
 ls -lrt
 
 echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py
-cmsRun step2_phase1_new.py
-cmsRun step3_phase1_new.py
+cmsRun step2_phase1_new.py > /dev/null
+cmsRun step3_phase1_new.py > /dev/null
 #cmsRun $CMSSWDIR/src/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
 mv pfntuple.root pfntuple_${SEED}.root
 python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ --save-normalized-table

diff --git a/mlpf/data_cms/prepare_args.py b/mlpf/data_cms/prepare_args.py
@@ -16,14 +16,16 @@
     ("TTbar_14TeV_TuneCUETP8M1_cfi",                           700000, 701000, "genjob_nopu.sh", outdir + "/nopu"),
     ("MultiParticlePFGun50_cfi",                               800000, 810000, "genjob_nopu.sh", outdir + "/nopu"),
 
-    ("SingleElectronFlatPt1To1000_pythia8_cfi",                900000, 901000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SingleGammaFlatPt1To1000_pythia8_cfi",                  1000000,1001000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SingleMuFlatPt1To1000_pythia8_cfi",                     1100000,1101000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SingleNeutronFlatPt0p7To1000_cfi",                      1200000,1201000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SinglePi0Pt1To1000_pythia8_cfi",                        1300000,1301000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SinglePiMinusFlatPt0p7To1000_cfi",                      1400000,1401000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SingleProtonMinusFlatPt0p7To1000_cfi",                  1500000,1501000, "genjob_nopu.sh", outdir + "/nopu"),
-    ("SingleTauFlatPt1To1000_cfi",                            1600000,1601000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleElectronFlatPt1To1000_pythia8_cfi",                900000, 910000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleGammaFlatPt1To1000_pythia8_cfi",                  1000000,1010000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleMuFlatPt1To1000_pythia8_cfi",                     1100000,1110000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleNeutronFlatPt0p7To1000_cfi",                      1200000,1210000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SinglePi0Pt1To1000_pythia8_cfi",                        1300000,1310000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SinglePiMinusFlatPt0p7To1000_cfi",                      1400000,1410000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleProtonMinusFlatPt0p7To1000_cfi",                  1500000,1510000, "genjob_nopu.sh", outdir + "/nopu"),
+    ("SingleTauFlatPt1To1000_cfi",                            1600000,1610000, "genjob_nopu.sh", outdir + "/nopu"),
+
+    ("VBF_TuneCP5_14TeV_pythia8_cfi",                         1700000,1705010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
 ]
 
 if __name__ == "__main__":

diff --git a/mlpf/heptfds/clic_pf_edm4hep/single_gamma.py b/mlpf/heptfds/clic_pf_edm4hep/single_gamma.py
@@ -0,0 +1,78 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+import numpy as np
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with single gamma particle gun
+  - X: reconstructed tracks and clusters, variable number N per event
+  - ygen: stable generator particles, zero-padded to N per event
+  - ycand: baseline particle flow particles, zero-padded to N per event
+"""
+
+_CITATION = """
+Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023).
+Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set].
+Zenodo. https://doi.org/10.5281/zenodo.8260741
+"""
+
+
+class ClicEdmSingleGammaPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.5.0")
+    RELEASE_NOTES = {
+        "1.5.0": "Regenerate with ARRAY_RECORD",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    For the raw input files in ROOT EDM4HEP format, please see the citation above.
+
+    The processed tensorflow_dataset can also be downloaded from:
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep/ ./
+    """
+
+    def __init__(self, *args, **kwargs):
+        kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD
+        super(ClicEdmSingleGammaPf, self).__init__(*args, **kwargs)
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "gamma/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/single_kaon0L.py b/mlpf/heptfds/clic_pf_edm4hep/single_kaon0L.py
@@ -0,0 +1,78 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample,
+)
+
+import tensorflow_datasets as tfds
+import numpy as np
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with single kaon0L particle gun
+  - X: reconstructed tracks and clusters, variable number N per event
+  - ygen: stable generator particles, zero-padded to N per event
+  - ycand: baseline particle flow particles, zero-padded to N per event
+"""
+
+_CITATION = """
+Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023).
+Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set].
+Zenodo. https://doi.org/10.5281/zenodo.8260741
+"""
+
+
+class ClicEdmSingleKaon0lPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.5.0")
+    RELEASE_NOTES = {
+        "1.5.0": "Regenerate with ARRAY_RECORD",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    For the raw input files in ROOT EDM4HEP format, please see the citation above.
+
+    The processed tensorflow_dataset can also be downloaded from:
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep/ ./
+    """
+
+    def __init__(self, *args, **kwargs):
+        kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD
+        super(ClicEdmSingleKaon0lPf, self).__init__(*args, **kwargs)
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        path = dl_manager.manual_dir
+        return split_sample(Path(path / "kaon0L/"))
+
+    def _generate_examples(self, files):
+        return generate_examples(files)
diff --git a/mlpf/heptfds/clic_pf_edm4hep/single_pi.py b/mlpf/heptfds/clic_pf_edm4hep/single_pi.py
@@ -0,0 +1,78 @@
+from pathlib import Path
+
+import tensorflow as tf
+from utils_edm import (
+    X_FEATURES_CL,
+    X_FEATURES_TRK,
+    Y_FEATURES,
+    generate_examples,
+    split_sample_several,
+)
+
+import tensorflow_datasets as tfds
+import numpy as np
+
+_DESCRIPTION = """
+CLIC EDM4HEP dataset with single-pion particle gun
+  - X: reconstructed tracks and clusters, variable number N per event
+  - ygen: stable generator particles, zero-padded to N per event
+  - ycand: baseline particle flow particles, zero-padded to N per event
+"""
+
+_CITATION = """
+Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023).
+Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set].
+Zenodo. https://doi.org/10.5281/zenodo.8260741
+"""
+
+
+class ClicEdmSinglePiPf(tfds.core.GeneratorBasedBuilder):
+    VERSION = tfds.core.Version("1.5.0")
+    RELEASE_NOTES = {
+        "1.5.0": "Regenerate with ARRAY_RECORD",
+    }
+    MANUAL_DOWNLOAD_INSTRUCTIONS = """
+    For the raw input files in ROOT EDM4HEP format, please see the citation above.
+
+    The processed tensorflow_dataset can also be downloaded from:
+    rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep/ ./
+    """
+
+    def __init__(self, *args, **kwargs):
+        kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD
+        super(ClicEdmSinglePiPf, self).__init__(*args, **kwargs)
+
+    def _info(self) -> tfds.core.DatasetInfo:
+        """Returns the dataset metadata."""
+        return tfds.core.DatasetInfo(
+            builder=self,
+            description=_DESCRIPTION,
+            features=tfds.features.FeaturesDict(
+                {
+                    "X": tfds.features.Tensor(
+                        shape=(
+                            None,
+                            max(len(X_FEATURES_TRK), len(X_FEATURES_CL)),
+                        ),
+                        dtype=tf.float32,
+                    ),
+                    "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                    "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32),
+                }
+            ),
+            supervised_keys=None,
+            homepage="",
+            citation=_CITATION,
+            metadata=tfds.core.MetadataDict(
+                x_features_track=X_FEATURES_TRK,
+                x_features_cluster=X_FEATURES_CL,
+                y_features=Y_FEATURES,
+            ),
+        )
+
+    def _split_generators(self, dl_manager: tfds.download.DownloadManager):
+        path = dl_manager.manual_dir
+        return split_sample_several([Path(path / "pi-/"), Path(path / "pi+/")])
+
+    def _generate_examples(self, files):
+        return generate_examples(files)