Skip to content

Commit

Permalink
CLIC new samples with 1M events (#181)
Browse files Browse the repository at this point in the history
* clic new samples

* fix torch_geometric to 2.2.0 avoid issues with dataset abstract method len

Former-commit-id: e422e35
  • Loading branch information
jpata authored Mar 24, 2023
1 parent cad6997 commit f033066
Show file tree
Hide file tree
Showing 14 changed files with 56 additions and 44 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
cache: 'pip'
- run: pip install -r requirements.txt
- run: pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-1.13.0+cpu.html

tf-clic-pipeline:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -78,7 +78,7 @@ jobs:
cache: 'pip'
- run: pip install -r requirements.txt
- run: pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: ./scripts/local_test_pyg_cms.sh

pyg-delphes-pipeline:
Expand All @@ -92,7 +92,7 @@ jobs:
cache: 'pip'
- run: pip install -r requirements.txt
- run: pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: ./scripts/local_test_pyg_delphes.sh

pyg-clic-pipeline:
Expand All @@ -106,7 +106,7 @@ jobs:
cache: 'pip'
- run: pip install -r requirements.txt
- run: pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: ./scripts/local_test_pyg_clic.sh

pyg-ssl-pipeline:
Expand All @@ -120,5 +120,5 @@ jobs:
cache: 'pip'
- run: pip install -r requirements.txt
- run: pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.2.0 -f https://data.pyg.org/whl/torch-1.13.0+cpu.html
- run: ./scripts/local_test_pyg_ssl.sh
2 changes: 1 addition & 1 deletion fcc/check_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
samples = [
("p8_ee_tt_ecm380", 1, 10011),
("p8_ee_qq_ecm380", 100001, 110011),
("p8_ee_ZH_Htautau_ecm380", 200001, 202011),
("p8_ee_ZH_Htautau_ecm380", 200001, 210011),
]

samples_pu = [
Expand Down
6 changes: 5 additions & 1 deletion fcc/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ def get_feature_matrix(feature_dict, features):

def process_one_file(fn, ofn):

#output exists, do not recreate
if os.path.isfile(ofn):
return

fi = uproot.open(fn)

arrs = fi["events"]
Expand Down Expand Up @@ -773,7 +777,7 @@ def process_all_files():
samps = [
"p8_ee_qq_ecm380",
"p8_ee_tt_ecm380",
"p8_ee_ZH_Htautau_ecm380"
#"p8_ee_ZH_Htautau_ecm380"
]

pool = multiprocessing.Pool(12)
Expand Down
7 changes: 1 addition & 6 deletions fcc/run_sim.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH -p main
#SBATCH -x comp-d-[001-128],comp-u-[001-128],comp-r-003
#SBATCH -x comp-u-[001-128],comp-r-003
#SBATCH --mem-per-cpu=4G
#SBATCH --cpus-per-task=1
#SBATCH -o logs/slurm-%x-%j-%N.out
Expand Down Expand Up @@ -41,11 +41,6 @@ cat card.cmd
source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2023-01-15/x86_64-centos7-gcc11.2.0-opt/csapx/setup.sh
k4run $PFDIR/fcc/pythia.py -n $NEV --Dumper.Filename out.hepmc --Pythia8.PythiaInterface.pythiacard card.cmd

#with PU (needs double checking)
#LD_LIBRARY_PATH=/home/joosep/HepMC3/hepmc3-install/lib/:/home/joosep/pythia8308/lib/ ./main $NUM
#mv pythia.hepmc out.hepmc
#source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2022-12-23/x86_64-centos7-gcc11.2.0-opt/ll3gi/setup.sh

ddsim --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml \
--outputFile out_sim_edm4hep.root \
--steeringFile clic_steer.py \
Expand Down
2 changes: 1 addition & 1 deletion fcc/run_sim_pu.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH -p main
#SBATCH -x comp-d-[001-128],comp-u-[001-128],comp-r-003
#SBATCH -x comp-u-[001-128],comp-r-003
#SBATCH --mem-per-cpu=4G
#SBATCH --cpus-per-task=1
#SBATCH -o logs/slurm-%x-%j-%N.out
Expand Down
1 change: 1 addition & 0 deletions mlpf/customizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def customize_pipeline_test(config):
config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]}
config["train_test_datasets"]["physical"]["batch_per_gpu"] = 50
config["validation_dataset"] = "clic_edm_ttbar_pf"
config["validation_batch_size"] = 50
config["evaluation_datasets"] = {"clic_edm_ttbar_pf": {"batch_size": 50, "num_events": -1}}

# validate only on a small number of events
Expand Down
3 changes: 2 additions & 1 deletion mlpf/heptfds/clic_pf_edm4hep/qq.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@


class ClicEdmQqPf(tfds.core.GeneratorBasedBuilder):
VERSION = tfds.core.Version("1.2.0")
VERSION = tfds.core.Version("1.3.0")
RELEASE_NOTES = {
"1.0.0": "Initial release.",
"1.1.0": "update stats, move to 380 GeV",
"1.2.0": "sin cos as separate features",
"1.3.0": "Update stats to ~1M events",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
Expand Down
3 changes: 2 additions & 1 deletion mlpf/heptfds/clic_pf_edm4hep/ttbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@


class ClicEdmTtbarPf(tfds.core.GeneratorBasedBuilder):
VERSION = tfds.core.Version("1.2.0")
VERSION = tfds.core.Version("1.3.0")
RELEASE_NOTES = {
"1.0.0": "Initial release.",
"1.1.0": "update stats, move to 380 GeV",
"1.2.0": "sin/cos phi separately",
"1.3.0": "Update stats to ~1M events",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep_2023_02_27/ ./
Expand Down
4 changes: 2 additions & 2 deletions mlpf/heptfds/clic_pf_edm4hep/utils_edm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
import vector

jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
min_jet_pt = 1.0 # GeV
jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.7, -1.0)
min_jet_pt = 5.0 # GeV

# from fcc/postprocessing.py
X_FEATURES_TRK = [
Expand Down
10 changes: 10 additions & 0 deletions mlpf/tallinn/mlpf-evaluate-tf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

IMG=/home/software/singularity/tf-2.11.0.simg
cd ~/particleflow

#TF training
singularity exec -B /scratch-persistent --nv \
--env PYTHONPATH=hep_tfds \
--env TFDS_DATA_DIR=/scratch-persistent/joosep/tensorflow_datasets \
$IMG python mlpf/pipeline.py evaluate --train-dir $1
2 changes: 1 addition & 1 deletion mlpf/tallinn/mlpf-train.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH -p gpu
#SBATCH --gpus 2
#SBATCH --gpus 1
#SBATCH --mem-per-gpu=8G
#SBATCH -o logs/slurm-%x-%j-%N.out

Expand Down
10 changes: 5 additions & 5 deletions parameters/clic-fp16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ setup:
train: yes
weights:
weights_config:
lr: 0.0001
num_epochs: 500
lr: 0.001
num_epochs: 100
dtype: float16
trainable:
classification_loss_type: sigmoid_focal_crossentropy
lr_schedule: none # cosinedecay, exponentialdecay, onecycle, none
lr_schedule: onecycle # cosinedecay, exponentialdecay, onecycle, none
optimizer: adam # adam, adamw, sgd
horovod_enabled: no
cls_output_as_logits: yes
Expand Down Expand Up @@ -248,10 +248,10 @@ evaluation_jet_algo: ee_genkt_algorithm

datasets:
clic_edm_ttbar_pf:
version: 1.2.0
version: 1.3.0
data_dir:
manual_dir:
clic_edm_qq_pf:
version: 1.2.0
version: 1.3.0
data_dir:
manual_dir:
12 changes: 6 additions & 6 deletions parameters/clic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ setup:
weights:
weights_config:
lr: 0.0001
num_epochs: 500
num_epochs: 50
dtype: float32
trainable:
classification_loss_type: sigmoid_focal_crossentropy
lr_schedule: none # cosinedecay, exponentialdecay, onecycle, none
lr_schedule: cosinedecay # cosinedecay, exponentialdecay, onecycle, none
optimizer: adam # adam, adamw, sgd
horovod_enabled: no
cls_output_as_logits: yes
Expand Down Expand Up @@ -236,8 +236,8 @@ train_test_datasets:
- clic_edm_qq_pf

validation_dataset: clic_edm_ttbar_pf
validation_batch_size: 50
validation_num_events: 500
validation_batch_size: 100
validation_num_events: 2000

evaluation_datasets:
clic_edm_ttbar_pf:
Expand All @@ -248,10 +248,10 @@ evaluation_jet_algo: ee_genkt_algorithm

datasets:
clic_edm_ttbar_pf:
version: 1.2.0
version: 1.3.0
data_dir:
manual_dir:
clic_edm_qq_pf:
version: 1.2.0
version: 1.3.0
data_dir:
manual_dir:
28 changes: 14 additions & 14 deletions scripts/generate_tfds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@ export CMD="singularity exec -B /local -B /scratch-persistent --env PYTHONPATH=$
# CMD="singularity exec --env PYTHONPATH=$PYTHONPATH $IMG tfds build "

# CMS
$CMD mlpf/heptfds/cms_pf/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
$CMD mlpf/heptfds/cms_pf/qcd --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd.log &
$CMD mlpf/heptfds/cms_pf/ztt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ztt.log &
$CMD mlpf/heptfds/cms_pf/qcd_high_pt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd_high_pt.log &
$CMD mlpf/heptfds/cms_pf/singlepi0 --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlepi0.log &
$CMD mlpf/heptfds/cms_pf/singleneutron --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleneutron.log &
$CMD mlpf/heptfds/cms_pf/singleele --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleele.log &
$CMD mlpf/heptfds/cms_pf/singlegamma --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlegamma.log &
$CMD mlpf/heptfds/cms_pf/singlemu --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlemu.log &
$CMD mlpf/heptfds/cms_pf/singlepi --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlepi.log &
$CMD mlpf/heptfds/cms_pf/singleproton --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleproton.log &
$CMD mlpf/heptfds/cms_pf/singletau --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singletau.log &
wait
# $CMD mlpf/heptfds/cms_pf/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
# $CMD mlpf/heptfds/cms_pf/qcd --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd.log &
# $CMD mlpf/heptfds/cms_pf/ztt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ztt.log &
# $CMD mlpf/heptfds/cms_pf/qcd_high_pt --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qcd_high_pt.log &
# $CMD mlpf/heptfds/cms_pf/singlepi0 --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlepi0.log &
# $CMD mlpf/heptfds/cms_pf/singleneutron --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleneutron.log &
# $CMD mlpf/heptfds/cms_pf/singleele --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleele.log &
# $CMD mlpf/heptfds/cms_pf/singlegamma --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlegamma.log &
# $CMD mlpf/heptfds/cms_pf/singlemu --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlemu.log &
# $CMD mlpf/heptfds/cms_pf/singlepi --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singlepi.log &
# $CMD mlpf/heptfds/cms_pf/singleproton --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singleproton.log &
# $CMD mlpf/heptfds/cms_pf/singletau --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_singletau.log &
# wait

# CLIC
# export MANUAL_DIR=/local/joosep/mlpf/clic_edm4hep_2023_02_27
# $CMD mlpf/heptfds/clic_pf_edm4hep/qq --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_qq.log &
# $CMD mlpf/heptfds/clic_pf_edm4hep/ttbar --data_dir $DATA_DIR --manual_dir $MANUAL_DIR --overwrite &> logs/tfds_ttbar.log &
# wait
wait

# Delphes
# $CMD mlpf/heptfds/delphes_pf/delphes_pf &> logs/tfds_delphes.log &
Expand Down

0 comments on commit f033066

Please sign in to comment.