Skip to content

Commit

Permalink
format black (#154)
Browse files Browse the repository at this point in the history
* format black

Former-commit-id: c5dce3b
  • Loading branch information
jpata authored Jan 11, 2023
1 parent 9f79c85 commit 34d8695
Show file tree
Hide file tree
Showing 48 changed files with 2,651 additions and 454 deletions.
12 changes: 8 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ repos:
- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black-jupyter
language_version: python3
args: [--line-length=125]
- id: black
# It is recommended to specify the latest version of Python
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3
args: [--line-length=125]

- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
Expand All @@ -42,4 +46,4 @@ repos:
# E203 is not PEP8 compliant
# E402 due to logging.basicConfig in pipeline.py
args: ['--max-line-length=125', # github viewer width
'--extend-ignore=E203,W605,E402']
'--extend-ignore=E203,E402']
36 changes: 31 additions & 5 deletions delphes/ntuplizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,13 @@ def make_triplets(g, tracks, towers, particles, pfparticles):
# determine the GenParticle to reconstruct from this tower
if len(lvs) > 0:
lv = sum(lvs[1:], lvs[0])
gen_ptcl = {"pid": pid, "pt": lv.pt, "eta": lv.eta, "phi": lv.phi, "energy": lv.energy}
gen_ptcl = {
"pid": pid,
"pt": lv.pt,
"eta": lv.eta,
"phi": lv.phi,
"energy": lv.energy,
}

# charged gen particles outside the tracker acceptance should be reconstructed as neutrals
if gen_ptcl["pid"] == 211 and abs(gen_ptcl["eta"]) > 2.5:
Expand All @@ -250,7 +256,11 @@ def make_triplets(g, tracks, towers, particles, pfparticles):
pf_ptcl = None

triplets.append((t, gen_ptcl, pf_ptcl))
return triplets, list(remaining_particles), list(remaining_pfcandidates)
return (
triplets,
list(remaining_particles),
list(remaining_pfcandidates),
)


def process_chunk(infile, ev_start, ev_stop, outfile):
Expand Down Expand Up @@ -380,7 +390,10 @@ def process_chunk(infile, ev_start, ev_stop, outfile):

# write the full graph, mainly for study purposes
if iev < 10 and save_full_graphs:
nx.readwrite.write_gpickle(graph, outfile.replace(".pkl.bz2", "_graph_{}.pkl".format(iev)))
nx.readwrite.write_gpickle(
graph,
outfile.replace(".pkl.bz2", "_graph_{}.pkl".format(iev)),
)

# now clean up the graph, keeping only reconstructable genparticles
# we also merge neutral genparticles within towers, as they are otherwise not reconstructable
Expand All @@ -390,7 +403,11 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
tracks = [n for n in graph.nodes if n[0] == "track"]
towers = [n for n in graph.nodes if n[0] == "tower"]

triplets, remaining_particles, remaining_pfcandidates = make_triplets(graph, tracks, towers, particles, pfcand)
(
triplets,
remaining_particles,
remaining_pfcandidates,
) = make_triplets(graph, tracks, towers, particles, pfcand)
print("remaining PF", len(remaining_pfcandidates))
for pf in remaining_pfcandidates:
print(pf, graph.nodes[pf])
Expand Down Expand Up @@ -433,7 +450,16 @@ def process_chunk(infile, ev_start, ev_stop, outfile):
ygen = np.stack(ygen)
ygen_remaining = np.stack(ygen_remaining)
ycand = np.stack(ycand)
print("X", X.shape, "ygen", ygen.shape, "ygen_remaining", ygen_remaining.shape, "ycand", ycand.shape)
print(
"X",
X.shape,
"ygen",
ygen.shape,
"ygen_remaining",
ygen_remaining.shape,
"ycand",
ycand.shape,
)

X_all.append(X)
ygen_all.append(ygen)
Expand Down
42 changes: 33 additions & 9 deletions mlpf/adv_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
# A deep sets conditional discriminator
def make_disc_model(config, reco_features):
input_elems = tf.keras.layers.Input(
shape=(config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"])
shape=(
config["dataset"]["padded_num_elem_size"],
config["dataset"]["num_input_features"],
)
)
input_reco = tf.keras.layers.Input(shape=(config["dataset"]["padded_num_elem_size"], reco_features))

Expand Down Expand Up @@ -79,14 +82,21 @@ def main(config):
tb.set_model(model_pf)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
filepath="logs/weights-{epoch:02d}.hdf5", save_weights_only=True, verbose=0
filepath="logs/weights-{epoch:02d}.hdf5",
save_weights_only=True,
verbose=0,
)
cp_callback.set_model(model_pf)

x = np.random.randn(1, config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"])
x = np.random.randn(
1,
config["dataset"]["padded_num_elem_size"],
config["dataset"]["num_input_features"],
)
ypred = concat_pf([model_pf(x), x])
model_pf.load_weights(
"experiments/cms_20210909_132136_111774.gpu0.local/weights/weights-100-1.280379.hdf5", by_name=True
"experiments/cms_20210909_132136_111774.gpu0.local/weights/weights-100-1.280379.hdf5",
by_name=True,
)
# model_pf.load_weights("./logs/weights-02.hdf5", by_name=True)

Expand All @@ -105,18 +115,26 @@ def main(config):
cb.set_model(model_pf)

input_elems = tf.keras.layers.Input(
shape=(config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"]),
shape=(
config["dataset"]["padded_num_elem_size"],
config["dataset"]["num_input_features"],
),
batch_size=2 * batch_size,
name="input_detector_elements",
)
input_reco = tf.keras.layers.Input(
shape=(config["dataset"]["padded_num_elem_size"], ypred.shape[-1]), name="input_reco_particles"
shape=(config["dataset"]["padded_num_elem_size"], ypred.shape[-1]),
name="input_reco_particles",
)
pf_out = tf.keras.layers.Lambda(concat_pf)([model_pf(input_elems), input_elems])
disc_out1 = model_disc([input_elems, pf_out])
disc_out2 = model_disc([input_elems, input_reco])
m1 = tf.keras.models.Model(inputs=[input_elems], outputs=[disc_out1], name="model_mlpf_disc")
m2 = tf.keras.models.Model(inputs=[input_elems, input_reco], outputs=[disc_out2], name="model_reco_disc")
m2 = tf.keras.models.Model(
inputs=[input_elems, input_reco],
outputs=[disc_out2],
name="model_reco_disc",
)

def loss(x, y):
return tf.keras.losses.binary_crossentropy(x, y, from_logits=True)
Expand Down Expand Up @@ -159,7 +177,10 @@ def loss(x, y):

mlpf_train_outputs = tf.concat([yb, yp], axis=0)
mlpf_train_disc_targets = tf.concat([batch_size * [0.99], batch_size * [0.01]], axis=0)
loss2 = m2.train_on_batch([mlpf_train_inputs, mlpf_train_outputs], mlpf_train_disc_targets)
loss2 = m2.train_on_batch(
[mlpf_train_inputs, mlpf_train_outputs],
mlpf_train_disc_targets,
)

# Train the MLPF reconstruction (generative) model with an inverted target
disc_train_disc_targets = tf.concat([batch_size * [1.0]], axis=0)
Expand Down Expand Up @@ -189,7 +210,10 @@ def loss(x, y):
mlpf_train_inputs = tf.concat([xb, xb], axis=0)
mlpf_train_outputs = tf.concat([yb, yp], axis=0)
mlpf_train_disc_targets = tf.concat([batch_size * [0.99], batch_size * [0.01]], axis=0)
loss2 = m2.test_on_batch([mlpf_train_inputs, mlpf_train_outputs], mlpf_train_disc_targets)
loss2 = m2.test_on_batch(
[mlpf_train_inputs, mlpf_train_outputs],
mlpf_train_disc_targets,
)

# Train the MLPF reconstruction (generative) model with an inverted target
disc_train_disc_targets = tf.concat([batch_size * [1.0]], axis=0)
Expand Down
42 changes: 37 additions & 5 deletions mlpf/data_clic/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,32 @@ def track_as_array(df_tr, itr):
def cluster_as_array(df_cl, icl):
row = df_cl[icl]
return np.array(
[2, row["x"], row["y"], row["z"], row["nhits_ecal"], row["nhits_hcal"], row["energy"]] # clusters are type 2
[
2,
row["x"],
row["y"],
row["z"],
row["nhits_ecal"],
row["nhits_hcal"],
row["energy"],
] # clusters are type 2
)


# this defines the genparticle features
def gen_as_array(df_gen, igen):
if igen:
row = df_gen[igen]
return np.array([abs(row["pdgid"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
return np.array(
[
abs(row["pdgid"]),
row["charge"],
row["px"],
row["py"],
row["pz"],
row["energy"],
]
)
else:
return np.zeros(6)

Expand All @@ -71,7 +88,16 @@ def gen_as_array(df_gen, igen):
def pf_as_array(df_pfs, igen):
if igen:
row = df_pfs[igen]
return np.array([abs(row["type"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
return np.array(
[
abs(row["type"]),
row["charge"],
row["px"],
row["py"],
row["pz"],
row["energy"],
]
)
else:
return np.zeros(6)

Expand Down Expand Up @@ -145,9 +171,15 @@ def flatten_event(df_tr, df_cl, df_gen, df_pfs, pairs):

# Here we pad the tracks and clusters to the same shape along the feature dimension
if Xs_tracks.shape[1] > Xs_clusters.shape[-1]:
Xs_clusters = np.pad(Xs_clusters, [(0, 0), (0, Xs_tracks.shape[1] - Xs_clusters.shape[-1])])
Xs_clusters = np.pad(
Xs_clusters,
[(0, 0), (0, Xs_tracks.shape[1] - Xs_clusters.shape[-1])],
)
elif Xs_tracks.shape[1] < Xs_clusters.shape[-1]:
Xs_clusters = np.pad(Xs_clusters, [(0, 0), (0, Xs_clusters.shape[-1] - Xs_tracks.shape[1])])
Xs_clusters = np.pad(
Xs_clusters,
[(0, 0), (0, Xs_clusters.shape[-1] - Xs_tracks.shape[1])],
)

Xs = np.concatenate([Xs_tracks, Xs_clusters], axis=0) # [Ntracks+Nclusters, max(Nfeat_cluster, Nfeat_track)]
ys_gen = np.stack(ys_gen, axis=-1).T
Expand Down
25 changes: 20 additions & 5 deletions mlpf/data_cms/multicrab.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,27 @@
def submit(config):
crabCommand("submit", config=config)
# save crab config for the future
with open(config.General.workArea + "/crab_" + config.General.requestName + "/crab_config.py", "w") as fi:
with open(
config.General.workArea + "/crab_" + config.General.requestName + "/crab_config.py",
"w",
) as fi:
fi.write(config.pythonise_())


# https://cmsweb.cern.ch/das/request?view=plain&limit=50&instance=prod%2Fglobal&input=%2FRelVal*%2FCMSSW_11_0_0_pre4*%2FGEN-SIM-DIGI-RAW
samples = [
("/RelValQCD_FlatPt_15_3000HS_14/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "QCD_run3"),
("/RelValNuGun/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "NuGun_run3"),
("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", "TTbar_run3"),
(
"/RelValQCD_FlatPt_15_3000HS_14/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
"QCD_run3",
),
(
"/RelValNuGun/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
"NuGun_run3",
),
(
"/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW",
"TTbar_run3",
),
# ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU140-v1/GEN-SIM-DIGI-RAW",
# "TTbar_run4_pu140"),
# ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU200-v1/GEN-SIM-DIGI-RAW",
Expand All @@ -37,7 +49,10 @@ def submit(config):
conf.JobType.psetName = "step3_dump.py"
conf.JobType.maxJobRuntimeMin = 8 * 60
conf.JobType.allowUndistributedCMSSW = True
conf.JobType.outputFiles = ["step3_inMINIAODSIM.root", "step3_AOD.root"]
conf.JobType.outputFiles = [
"step3_inMINIAODSIM.root",
"step3_AOD.root",
]
conf.JobType.maxMemoryMB = 6000
conf.JobType.numCores = 2

Expand Down
Loading

0 comments on commit 34d8695

Please sign in to comment.