format black (#154)

* format black Former-commit-id: c5dce3b
jpata · Jan 11, 2023 · 34d8695 · 34d8695
1 parent 9f79c85
commit 34d8695
Show file tree

Hide file tree

Showing 48 changed files with 2,651 additions and 454 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,9 +29,13 @@ repos:
 - repo: https://github.com/psf/black
   rev: 22.12.0
   hooks:
-  - id: black-jupyter
-    language_version: python3
-    args: [--line-length=125]
+    - id: black
+      # It is recommended to specify the latest version of Python
+      # supported by your project here, or alternatively use
+      # pre-commit's default_language_version, see
+      # https://pre-commit.com/#top_level-default_language_version
+      language_version: python3
+      args: [--line-length=125]
 
 - repo: https://github.com/PyCQA/flake8
   rev: 6.0.0
@@ -42,4 +46,4 @@ repos:
     # E203 is not PEP8 compliant
     # E402 due to logging.basicConfig in pipeline.py
     args: ['--max-line-length=125',  # github viewer width
-           '--extend-ignore=E203,W605,E402']
+           '--extend-ignore=E203,E402']
diff --git a/delphes/ntuplizer.py b/delphes/ntuplizer.py
@@ -225,7 +225,13 @@ def make_triplets(g, tracks, towers, particles, pfparticles):
         # determine the GenParticle to reconstruct from this tower
         if len(lvs) > 0:
             lv = sum(lvs[1:], lvs[0])
-            gen_ptcl = {"pid": pid, "pt": lv.pt, "eta": lv.eta, "phi": lv.phi, "energy": lv.energy}
+            gen_ptcl = {
+                "pid": pid,
+                "pt": lv.pt,
+                "eta": lv.eta,
+                "phi": lv.phi,
+                "energy": lv.energy,
+            }
 
             # charged gen particles outside the tracker acceptance should be reconstructed as neutrals
             if gen_ptcl["pid"] == 211 and abs(gen_ptcl["eta"]) > 2.5:
@@ -250,7 +256,11 @@ def make_triplets(g, tracks, towers, particles, pfparticles):
             pf_ptcl = None
 
         triplets.append((t, gen_ptcl, pf_ptcl))
-    return triplets, list(remaining_particles), list(remaining_pfcandidates)
+    return (
+        triplets,
+        list(remaining_particles),
+        list(remaining_pfcandidates),
+    )
 
 
 def process_chunk(infile, ev_start, ev_stop, outfile):
@@ -380,7 +390,10 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
 
         # write the full graph, mainly for study purposes
         if iev < 10 and save_full_graphs:
-            nx.readwrite.write_gpickle(graph, outfile.replace(".pkl.bz2", "_graph_{}.pkl".format(iev)))
+            nx.readwrite.write_gpickle(
+                graph,
+                outfile.replace(".pkl.bz2", "_graph_{}.pkl".format(iev)),
+            )
 
         # now clean up the graph, keeping only reconstructable genparticles
         # we also merge neutral genparticles within towers, as they are otherwise not reconstructable
@@ -390,7 +403,11 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
         tracks = [n for n in graph.nodes if n[0] == "track"]
         towers = [n for n in graph.nodes if n[0] == "tower"]
 
-        triplets, remaining_particles, remaining_pfcandidates = make_triplets(graph, tracks, towers, particles, pfcand)
+        (
+            triplets,
+            remaining_particles,
+            remaining_pfcandidates,
+        ) = make_triplets(graph, tracks, towers, particles, pfcand)
         print("remaining PF", len(remaining_pfcandidates))
         for pf in remaining_pfcandidates:
             print(pf, graph.nodes[pf])
@@ -433,7 +450,16 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
         ygen = np.stack(ygen)
         ygen_remaining = np.stack(ygen_remaining)
         ycand = np.stack(ycand)
-        print("X", X.shape, "ygen", ygen.shape, "ygen_remaining", ygen_remaining.shape, "ycand", ycand.shape)
+        print(
+            "X",
+            X.shape,
+            "ygen",
+            ygen.shape,
+            "ygen_remaining",
+            ygen_remaining.shape,
+            "ycand",
+            ycand.shape,
+        )
 
         X_all.append(X)
         ygen_all.append(ygen)

diff --git a/mlpf/adv_training.py b/mlpf/adv_training.py
@@ -9,7 +9,10 @@
 # A deep sets conditional discriminator
 def make_disc_model(config, reco_features):
     input_elems = tf.keras.layers.Input(
-        shape=(config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"])
+        shape=(
+            config["dataset"]["padded_num_elem_size"],
+            config["dataset"]["num_input_features"],
+        )
     )
     input_reco = tf.keras.layers.Input(shape=(config["dataset"]["padded_num_elem_size"], reco_features))
 
@@ -79,14 +82,21 @@ def main(config):
     tb.set_model(model_pf)
 
     cp_callback = tf.keras.callbacks.ModelCheckpoint(
-        filepath="logs/weights-{epoch:02d}.hdf5", save_weights_only=True, verbose=0
+        filepath="logs/weights-{epoch:02d}.hdf5",
+        save_weights_only=True,
+        verbose=0,
     )
     cp_callback.set_model(model_pf)
 
-    x = np.random.randn(1, config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"])
+    x = np.random.randn(
+        1,
+        config["dataset"]["padded_num_elem_size"],
+        config["dataset"]["num_input_features"],
+    )
     ypred = concat_pf([model_pf(x), x])
     model_pf.load_weights(
-        "experiments/cms_20210909_132136_111774.gpu0.local/weights/weights-100-1.280379.hdf5", by_name=True
+        "experiments/cms_20210909_132136_111774.gpu0.local/weights/weights-100-1.280379.hdf5",
+        by_name=True,
     )
     # model_pf.load_weights("./logs/weights-02.hdf5", by_name=True)
 
@@ -105,18 +115,26 @@ def main(config):
     cb.set_model(model_pf)
 
     input_elems = tf.keras.layers.Input(
-        shape=(config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"]),
+        shape=(
+            config["dataset"]["padded_num_elem_size"],
+            config["dataset"]["num_input_features"],
+        ),
         batch_size=2 * batch_size,
         name="input_detector_elements",
     )
     input_reco = tf.keras.layers.Input(
-        shape=(config["dataset"]["padded_num_elem_size"], ypred.shape[-1]), name="input_reco_particles"
+        shape=(config["dataset"]["padded_num_elem_size"], ypred.shape[-1]),
+        name="input_reco_particles",
     )
     pf_out = tf.keras.layers.Lambda(concat_pf)([model_pf(input_elems), input_elems])
     disc_out1 = model_disc([input_elems, pf_out])
     disc_out2 = model_disc([input_elems, input_reco])
     m1 = tf.keras.models.Model(inputs=[input_elems], outputs=[disc_out1], name="model_mlpf_disc")
-    m2 = tf.keras.models.Model(inputs=[input_elems, input_reco], outputs=[disc_out2], name="model_reco_disc")
+    m2 = tf.keras.models.Model(
+        inputs=[input_elems, input_reco],
+        outputs=[disc_out2],
+        name="model_reco_disc",
+    )
 
     def loss(x, y):
         return tf.keras.losses.binary_crossentropy(x, y, from_logits=True)
@@ -159,7 +177,10 @@ def loss(x, y):
 
             mlpf_train_outputs = tf.concat([yb, yp], axis=0)
             mlpf_train_disc_targets = tf.concat([batch_size * [0.99], batch_size * [0.01]], axis=0)
-            loss2 = m2.train_on_batch([mlpf_train_inputs, mlpf_train_outputs], mlpf_train_disc_targets)
+            loss2 = m2.train_on_batch(
+                [mlpf_train_inputs, mlpf_train_outputs],
+                mlpf_train_disc_targets,
+            )
 
             # Train the MLPF reconstruction (generative) model with an inverted target
             disc_train_disc_targets = tf.concat([batch_size * [1.0]], axis=0)
@@ -189,7 +210,10 @@ def loss(x, y):
             mlpf_train_inputs = tf.concat([xb, xb], axis=0)
             mlpf_train_outputs = tf.concat([yb, yp], axis=0)
             mlpf_train_disc_targets = tf.concat([batch_size * [0.99], batch_size * [0.01]], axis=0)
-            loss2 = m2.test_on_batch([mlpf_train_inputs, mlpf_train_outputs], mlpf_train_disc_targets)
+            loss2 = m2.test_on_batch(
+                [mlpf_train_inputs, mlpf_train_outputs],
+                mlpf_train_disc_targets,
+            )
 
             # Train the MLPF reconstruction (generative) model with an inverted target
             disc_train_disc_targets = tf.concat([batch_size * [1.0]], axis=0)

diff --git a/mlpf/data_clic/postprocessing.py b/mlpf/data_clic/postprocessing.py
@@ -54,15 +54,32 @@ def track_as_array(df_tr, itr):
 def cluster_as_array(df_cl, icl):
     row = df_cl[icl]
     return np.array(
-        [2, row["x"], row["y"], row["z"], row["nhits_ecal"], row["nhits_hcal"], row["energy"]]  # clusters are type 2
+        [
+            2,
+            row["x"],
+            row["y"],
+            row["z"],
+            row["nhits_ecal"],
+            row["nhits_hcal"],
+            row["energy"],
+        ]  # clusters are type 2
     )
 
 
 # this defines the genparticle features
 def gen_as_array(df_gen, igen):
     if igen:
         row = df_gen[igen]
-        return np.array([abs(row["pdgid"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
+        return np.array(
+            [
+                abs(row["pdgid"]),
+                row["charge"],
+                row["px"],
+                row["py"],
+                row["pz"],
+                row["energy"],
+            ]
+        )
     else:
         return np.zeros(6)
 
@@ -71,7 +88,16 @@ def gen_as_array(df_gen, igen):
 def pf_as_array(df_pfs, igen):
     if igen:
         row = df_pfs[igen]
-        return np.array([abs(row["type"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
+        return np.array(
+            [
+                abs(row["type"]),
+                row["charge"],
+                row["px"],
+                row["py"],
+                row["pz"],
+                row["energy"],
+            ]
+        )
     else:
         return np.zeros(6)
 
@@ -145,9 +171,15 @@ def flatten_event(df_tr, df_cl, df_gen, df_pfs, pairs):
 
     # Here we pad the tracks and clusters to the same shape along the feature dimension
     if Xs_tracks.shape[1] > Xs_clusters.shape[-1]:
-        Xs_clusters = np.pad(Xs_clusters, [(0, 0), (0, Xs_tracks.shape[1] - Xs_clusters.shape[-1])])
+        Xs_clusters = np.pad(
+            Xs_clusters,
+            [(0, 0), (0, Xs_tracks.shape[1] - Xs_clusters.shape[-1])],
+        )
     elif Xs_tracks.shape[1] < Xs_clusters.shape[-1]:
-        Xs_clusters = np.pad(Xs_clusters, [(0, 0), (0, Xs_clusters.shape[-1] - Xs_tracks.shape[1])])
+        Xs_clusters = np.pad(
+            Xs_clusters,
+            [(0, 0), (0, Xs_clusters.shape[-1] - Xs_tracks.shape[1])],
+        )
 
     Xs = np.concatenate([Xs_tracks, Xs_clusters], axis=0)  # [Ntracks+Nclusters, max(Nfeat_cluster, Nfeat_track)]
     ys_gen = np.stack(ys_gen, axis=-1).T

diff --git a/mlpf/data_cms/multicrab.py b/mlpf/data_cms/multicrab.py
@@ -7,15 +7,27 @@
 def submit(config):
     crabCommand("submit", config=config)
     # save crab config for the future
-    with open(config.General.workArea + "/crab_" + config.General.requestName + "/crab_config.py", "w") as fi:
+    with open(
+        config.General.workArea + "/crab_" + config.General.requestName + "/crab_config.py",
+        "w",
+    ) as fi:
         fi.write(config.pythonise_())
 
 
 # https://cmsweb.cern.ch/das/request?view=plain&limit=50&instance=prod%2Fglobal&input=%2FRelVal*%2FCMSSW_11_0_0_pre4*%2FGEN-SIM-DIGI-RAW
 samples = [
-    ("/RelValQCD_FlatPt_15_3000HS_14/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "QCD_run3"),
-    ("/RelValNuGun/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "NuGun_run3"),
-    ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "TTbar_run3"),
+    (
+        "/RelValQCD_FlatPt_15_3000HS_14/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
+        "QCD_run3",
+    ),
+    (
+        "/RelValNuGun/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
+        "NuGun_run3",
+    ),
+    (
+        "/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
+        "TTbar_run3",
+    ),
     # ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU140-v1/GEN-SIM-DIGI-RAW",
     # "TTbar_run4_pu140"),
     # ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU200-v1/GEN-SIM-DIGI-RAW",
@@ -37,7 +49,10 @@ def submit(config):
         conf.JobType.psetName = "step3_dump.py"
         conf.JobType.maxJobRuntimeMin = 8 * 60
         conf.JobType.allowUndistributedCMSSW = True
-        conf.JobType.outputFiles = ["step3_inMINIAODSIM.root", "step3_AOD.root"]
+        conf.JobType.outputFiles = [
+            "step3_inMINIAODSIM.root",
+            "step3_AOD.root",
+        ]
         conf.JobType.maxMemoryMB = 6000
         conf.JobType.numCores = 2